# DITP experimentation n°2 - part 1
# Notebook for LM campaign on the 19th september that begins on the 26th of september
Analysis deadlines: 10 october, 10 november, 31 december

- 8 files are to be generated by this script
  - 4 files for parents
  - 4 files for direct beneficiaires
- The CSV format is similar to the previous campaign, except that there is a new column named "pronom" whose value can be "il" or "elle"
- 1 file that will contain exhaustive information about these 8 files is also generated for further analysis with DITP later on at step 3 

In [None]:
import os
from dotenv import load_dotenv
import pandas as pd
import json
import numpy as np
from datetime import date

load_dotenv()

sms_batch_pathfile = os.environ['BATCH_SMS_CAMPAIGN_PATHFILE']
mailing_batch_pathfile = os.environ['BATCH_MAILING_CAMPAIGN_PATHFILE']

mailing_batch_lm_formatted_pathfile = os.environ['BATCH_MAILING_LM_FORMATTED_CAMPAIGN_PATHFILE']
sms_batch_lm_formatted_pathfile = os.environ['BATCH_SMS_LM_FORMATTED_CAMPAIGN_PATHFILE']

qr_code_secret_key = os.environ['BENEF_2024_QR_CODE_URL_SECRET']
qr_code_base_url = os.environ['BENEF_2024_QR_CODE_BASE_URL']

In [None]:
# Combine the two merged CSV (with ids and without ids)
columns = ['id', 'nom', 'prenom', 'genre', 'allocataire', 'adresse_allocataire', 'id_psp','date_naissance', 'zrr', 'qpv']

df_sms_batch = pd.read_csv(sms_batch_pathfile, sep=',', usecols=columns)
df_mailing_batch =  pd.read_csv(mailing_batch_pathfile, sep=',', usecols=columns)

# Merge both batches, and later on break them into their respective batches
merged_df = pd.concat([df_sms_batch, df_mailing_batch], ignore_index=True)

In [None]:
# unwrap alloc
df_json_allocataire = pd.json_normalize(merged_df['allocataire'].apply(json.loads))
df_json_allocataire = df_json_allocataire.add_prefix('allocataire-')

merged_df.index = pd.RangeIndex(start=0, stop=len(merged_df), step=1)

merged_df_unwrapped = pd.merge(
  merged_df, 
  df_json_allocataire[
    ['allocataire-courriel', 'allocataire-qualite', 'allocataire-nom', 'allocataire-prenom', 'allocataire-telephone', 'allocataire-date_naissance']
  ], 
  left_index=True, 
  right_index=True
)

merged_df_unwrapped = merged_df_unwrapped.drop(columns=['allocataire'])

In [None]:
# unwrap adresse alloc
df_json_adresse_allocataire = pd.json_normalize(merged_df['adresse_allocataire'].apply(json.loads))

merged_df_unwrapped.index = pd.RangeIndex(start=0, stop=len(merged_df), step=1)

df_db_unwrapped = pd.merge(merged_df_unwrapped, df_json_adresse_allocataire[['code_postal']], left_index=True, right_index=True)
df_db_unwrapped = df_db_unwrapped.drop(columns=['adresse_allocataire'])

In [None]:
# rename columns
column_mapping = {
    'allocataire-courriel': 'email',
    'allocataire-qualite': 'allocataire_qualite',
    'allocataire-nom': 'allocataire_nom',
    'allocataire-prenom': 'allocataire_prenom',
    'allocataire-telephone': 'telephone',
    'prenom': 'beneficiaire_prenom',
    'nom': 'beneficiaire_nom',
    'genre': 'beneficiaire_genre',
    'date_naissance': 'beneficiaire_date_naissance',
    'id_psp': 'code',
    'allocataire-date_naissance': 'allocataire_date_naissance'
}

df_db_unwrapped.columns = df_db_unwrapped.columns.to_series().replace(column_mapping)

In [None]:
# only keep necessary columns
df_campaign = df_db_unwrapped[['email',
                                         'allocataire_nom',
                                         'allocataire_prenom',
                                         'allocataire_date_naissance',
                                         'allocataire_qualite',
                                         'beneficiaire_prenom',
                                         'beneficiaire_nom',
                                         'beneficiaire_genre',
                                         'beneficiaire_date_naissance', 
                                         'code', 
                                         'telephone',
                                         'zrr',
                                         'qpv',
                                         'code_postal',
                                         'id']]

In [None]:
# Cast to date_time benef + allocataire birth dates
df_campaign['beneficiaire_date_naissance'] = pd.to_datetime(df_campaign['beneficiaire_date_naissance'], errors='coerce')
df_campaign['allocataire_date_naissance'] = pd.to_datetime(df_campaign['allocataire_date_naissance'], errors='coerce')

In [None]:
# Add column for beneficiaire gender
df_campaign['neele'] = 'Né le'
mask_girl = df_campaign['beneficiaire_genre'] == 'F'
df_campaign.loc[mask_girl, 'neele'] =  'Née le'

In [None]:
# Add column for allocataire gender
df_campaign['allocataire_genre'] = np.where(df_campaign['allocataire_qualite'] == 'Mme', 'F', 'M')

In [None]:
# capitalize on name / surname
df_campaign['allocataire_prenom'] = df_campaign['allocataire_prenom'].astype(str).str.capitalize()
df_campaign['allocataire_nom'] = df_campaign['allocataire_nom'].astype(str).str.capitalize()
df_campaign['beneficiaire_prenom'] = df_campaign['beneficiaire_prenom'].astype(str).str.capitalize()
df_campaign['beneficiaire_nom'] = df_campaign['beneficiaire_nom'].astype(str).str.capitalize()

In [None]:
# internationalize phone_number
df_campaign['telephone'] = df_campaign['telephone'].replace('^0', '+33', regex=True)

In [None]:
# "il", "elle" values for column "pronom"
df_campaign['pronom'] = np.where(df_campaign['beneficiaire_genre'] == 'M', 'il', 'elle')

In [None]:
# Format date naissance
df_campaign['beneficiaire_date_naissance'] = pd.to_datetime(df_campaign['beneficiaire_date_naissance'], format='%d-%m-%Y')
df_campaign['beneficiaire_date_naissance'] = df_campaign['beneficiaire_date_naissance'].dt.strftime('%d/%m/%Y')

In [None]:
# Génération des URLs pour le QR code
import urllib.parse
import base64

from Crypto.Cipher import AES
from Crypto.Util.Padding import pad

base_64_key = base64.b64decode(qr_code_secret_key)
key_mapping = { 
  'beneficiaire_prenom': 'bp', 
  'beneficiaire_nom': 'bn', 
  'beneficiaire_genre' : 'bg', 
  'beneficiaire_date_naissance': 'bdn', 
  'code': 'c'
}

def encrypt(data):
    cipher = AES.new(base_64_key, AES.MODE_CBC)
    ct_bytes = cipher.encrypt(pad(data.encode('utf-8'), AES.block_size))
    iv = cipher.iv
    ct = base64.b64encode(iv + ct_bytes).decode('utf-8')
    return ct

def generate_encrypted_url_column(row):
    params = {key_mapping.get(column): row[column] for column in df_campaign.columns}
    cleaned_params = {k: v for k, v in params.items() if k is not None}
    encoded_params = urllib.parse.urlencode(cleaned_params)
    encoded_encrypted_params = encrypt(encoded_params)
    full_url_string = f"{qr_code_base_url}#{urllib.parse.quote_plus(encoded_encrypted_params)}"
    return full_url_string

In [None]:
# Generation des QR code
if 'url_qr_code' in df_campaign:
    del df_campaign['url_qr_code']
    
df_campaign['url_qr_code'] = df_campaign.apply(generate_encrypted_url_column, axis=1)

In [None]:
# Dataframe that contains all information to update information for DITP later on in october/november/december
df_campaign_consolidated = df_campaign.copy()

# Dataframe for Link Mobility
df_campaign = df_campaign[[
  'email',
  'allocataire_nom',
  'allocataire_prenom',
  'beneficiaire_prenom',
  'beneficiaire_nom',
  'beneficiaire_genre',
  'beneficiaire_date_naissance',
  'code',
  'telephone',
  'neele',
  'pronom',
  'url_qr_code'
]]

In [None]:
# Get matching batches
df_sms_batch_formatted_for_lm = df_campaign[df_campaign['code'].isin(df_sms_batch['id_psp'])]
df_mailing_batch_formatted_for_lm  = df_campaign[df_campaign['code'].isin(df_mailing_batch['id_psp'])]

In [None]:
# Export to CSV
df_sms_batch_formatted_for_lm.to_csv(sms_batch_lm_formatted_pathfile, index=False)
df_mailing_batch_formatted_for_lm.to_csv(mailing_batch_lm_formatted_pathfile, index=False)