In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

db_existing_pathfile = os.environ['DB_CURRENT_WITH_PASS_STATUS']
consolidated_original_mailing_batch_path_file = os.environ['CONSOLIDATED_ORIGINAL_MAILING_BATCH_PATH_FILE']
consolidated_original_sms_batch_path_file = os.environ['CONSOLIDATED_ORIGINAL_SMS_BATCH_PATH_FILE']
consolidated_original_people_not_contacted_batch_path_file = os.environ['CONSOLIDATED_ORIGINAL_PEOPLE_NOT_CONTACTED_BATCH_PATH_FILE']
ditp_analysis_output_path_file = os.environ['DITP_ANALYSIS_EXPORT_OUPUT_PATH_FILE']

# Columns for exported CSV to DITP
type_benef_col = 'type_benef' # 1=parent, 2=direct
type_canal_col= 'type_canal' # 1=mail, 2=sms
object_type_col = 'objet_recu' # 0=control, 1=test
mail_type_col = 'mail_recu' # 0=control, 1=test
activated_col = 'recours' # 0=non, 1=oui
benef_gender_col = 'beneficiaire_genre' # 1=Femme, 2=Homme
benef_age_col = 'beneficiaire_age'
parents_gender_col = 'parents_genre' # 1=Femme, 2=Homme
parents_age_col = 'parents_age'
postal_code_col = 'code_postal'
child_age_col = 'age_enfant'
qpv_col = 'qpv'
zrr_col ='zrr'
residential_area_col = 'zone_habitation'
activation_date_col = 'date_recours'

# Additionnal columns for DITP
type_benef = {
  'parent': 1,
  'benef': 2
}

object_type = {
  'control': 0,
  'test': 1
}

mail_type = {
  'control': 0,
  'test': 1
}

canal_type = {
  'mail': 1,
  'sms': 2
}

gender_type = {
  'female': 1,
  'man': 2
}

In [None]:
import pandas as pd

# Load into dataframes the 8 files that were initially sent to Link Mobility
df_mailing = pd.read_csv(consolidated_original_mailing_batch_path_file)
df_sms = pd.read_csv(consolidated_original_sms_batch_path_file)
df_people_not_contacted = pd.read_csv(consolidated_original_people_not_contacted_batch_path_file)

In [None]:
# Initialise type_benef, object received & mail received for each of the 8 files initially sent to Link Mobility
df_mailing[[type_benef_col, type_canal_col]] = [
  type_benef['parent'],
  canal_type['mail']
]

df_sms[[type_benef_col, type_canal_col]] = [
  type_benef['parent'],
  canal_type['sms']
]

df_people_not_contacted[[type_benef_col, type_canal_col]] = [
  type_benef['parent'],
  pd.NA
]

In [None]:
# Load activated pass Sports
df_db_existing = pd.read_csv(db_existing_pathfile, dtype={
  'id': 'str',
  'code': 'str',
  'zone_habitation': 'str',
  'code_postal': 'str',
  'date_recours': 'str',
  'recours': 'Int64',
  'allocataire_age': 'Int64',
  'beneficiaire_age': 'Int64',
  'beneficiaire_genre': 'Int64',
  'allocataire_genre': 'Int64'
})

In [None]:
# Merge activated pass Sports on mailing batch
df_mailing_activated = pd.merge(
  df_mailing,
  df_db_existing,
  how='left',
  on=['id_psp'],
  suffixes=(None, '_new')
)

In [None]:
# Merge activated pass Sports on sms batch
df_sms_activated = pd.merge(
  df_sms,
  df_db_existing,
  how='left',
  on=['id_psp'],
  suffixes=(None, '_new')
)

In [None]:
# Merge activated pass Sports on people who were not contacted batch
df_people_not_contacted_activated = pd.merge(
  df_people_not_contacted,
  df_db_existing,
  how='left',
  on=['id_psp'],
  suffixes=(None, '_new')
)

In [None]:
# Merge all the files that were sent to Link Mobility into one dataframe
merged_df = pd.concat([
  df_mailing_activated,
  df_sms_activated,
  df_people_not_contacted_activated
], ignore_index=True).reset_index()

In [None]:
# Sort by activation_date_col, most recent must appear first and NaT last
merged_df[activation_date_col] = pd.to_datetime(
  merged_df[activation_date_col], 
  format='%d/%m/%Y',
  errors='coerce'
)

merged_df = merged_df.sort_values(by=activation_date_col, na_position='last', ascending=False)

# Reformat human readable date (initial format)
merged_df[activation_date_col] = merged_df[activation_date_col].dt.strftime('%d/%m/%Y')

In [None]:
# Casting after the merge
merged_df[['allocataire_genre', 'allocataire_age', 'beneficiaire_genre', 'beneficiaire_age']] = merged_df[['allocataire_genre', 'allocataire_age', 'beneficiaire_genre', 'beneficiaire_age']].astype('Int64')

In [None]:
columns_to_keep = [
  type_benef_col, 
  type_canal_col,
  activated_col,
  activation_date_col,
  parents_age_col,
  parents_gender_col,
  benef_gender_col,
  benef_age_col,
  postal_code_col,
  residential_area_col
]

final_df = merged_df.rename(columns={
  'allocataire_genre': parents_gender_col,
  'allocataire_age': parents_age_col,
  'beneficiaire_genre': benef_gender_col,
  'beneficiaire_age': benef_age_col
})

final_df = final_df[columns_to_keep]

In [None]:
# Export final csv file for DITP
final_df.to_csv(ditp_analysis_output_path_file, index=False)

In [None]:
# Number of activated pass sport for batch who where not contacted
len(final_df[(final_df[type_canal_col].isna()) & (~final_df['date_recours'].isna())])

In [None]:
# Number of activated pass sport for mailing batch
len(final_df[(final_df[type_canal_col] == canal_type['mail']) & (~final_df['date_recours'].isna())])

In [None]:
# Number of activated pass sport for sms batch
len(final_df[(final_df[type_canal_col] == canal_type['sms']) & (~final_df['date_recours'].isna())])