## Summary
Emailing campaign for the first wave

## Process

- Load list of eligible beneficiaries from 2025
- JSON extraction for the allocataire field
- Data mapping and deletion of unusable data
- URL generation containing the code
- Output two seperate files
   - One where the benef is the same than the allocataire (direct benef)
   - Other one where the benef is different than the allocataire (indirect benef)



In [None]:
import time
import pandas as pd
from dotenv import load_dotenv
import os
import json

from data.utils.emailing_utils import format_allocataire_benef_names_in_place, format_born_text_in_place, \
    format_benef_birth_date_in_place, get_indirect_beneficiaries, get_direct_beneficiaries

load_dotenv()

start_time = time.time()
benef_2025 = os.environ['BENEF_2025_PARQUET_PATHFILE']
qr_code_secret_key = os.environ['BENEF_2025_QR_CODE_URL_SECRET']
qr_code_base_url = os.environ['BENEF_2025_QR_CODE_BASE_URL']
pathfile_campaign_csv_output_b = os.environ['CAMPAIGN_CSV_OUTPUT_B']
pathfile_campaign_csv_output_b_and_a = os.environ['CAMPAIGN_CSV_OUTPUT_B_AND_A']

In [None]:
df_main = pd.read_parquet(benef_2025)

In [None]:
df_json_normalized = pd.json_normalize(df_main['allocataire'].apply(json.loads))
df_json_normalized = df_json_normalized.add_prefix('allocataire_')
df_main.index = pd.RangeIndex(start=0, stop=len(df_main), step=1)
df_unwrapped_alloc = pd.merge(df_main, df_json_normalized, left_index=True, right_index=True)

print(f"Number of beneficiaries : {len(df_unwrapped_alloc)}")

In [None]:
# Users that email
mask_not_existing_email = df_unwrapped_alloc['allocataire_courriel'].apply(lambda x: pd.isna(x) or x == '')

df_unwrapped_alloc = df_unwrapped_alloc[~mask_not_existing_email]

print(f"Number of beneficiaries with existing email : {len(df_unwrapped_alloc)}")

In [None]:
column_mapping = {
    'id': 'id',
    'allocataire_courriel': 'email',
    'allocataire_qualite': 'allocataire_qualite',
    'allocataire_nom': 'allocataire_nom',
    'allocataire_prenom': 'allocataire_prenom',
    'prenom': 'beneficiaire_prenom',
    'nom': 'beneficiaire_nom',
    'genre': 'beneficiaire_genre',
    'date_naissance': 'beneficiaire_date_naissance',
    'id_psp': 'code',
}

df_unwrapped_alloc.columns = df_unwrapped_alloc.columns.to_series().replace(column_mapping)

In [None]:
# only keep necessary columns
df_campaign = df_unwrapped_alloc[
    [
        'id',
        'email',
        'allocataire_nom',
        'allocataire_prenom',
        'beneficiaire_prenom',
        'beneficiaire_nom',
        'beneficiaire_genre',
        'beneficiaire_date_naissance',
        'code',
    ]
]

In [None]:
# Gender text & capitalize names & format dob text in place
format_born_text_in_place(df_campaign)
format_allocataire_benef_names_in_place(df_campaign)
format_benef_birth_date_in_place(df_campaign)

In [None]:
# Génération des URLs pour le QR code
import hmac
import hashlib
import urllib.parse
import base64

from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad
from Crypto.Random import get_random_bytes

base_64_key = base64.b64decode(qr_code_secret_key)
key_mapping = {'beneficiaire_prenom': 'bp', 'beneficiaire_nom': 'bn', 'beneficiaire_genre': 'bg',
               'beneficiaire_date_naissance': 'bdn', 'code': 'c', 'situation': 'situation', 'organisme': 'organisme',
               'id': 'id'}

def encrypt(data):
    cipher = AES.new(base_64_key, AES.MODE_CBC)
    ct_bytes = cipher.encrypt(pad(data.encode('utf-8'), AES.block_size))
    iv = cipher.iv
    ct = base64.b64encode(iv + ct_bytes).decode('utf-8')
    return ct


def generate_encrypted_url_column(row):
    params = {key_mapping.get(column): row[column] for column in df_campaign.columns}
    cleaned_params = {k: v for k, v in params.items() if k is not None}
    encoded_params = urllib.parse.urlencode(cleaned_params)
    encoded_encrypted_params = encrypt(encoded_params)
    full_url_string = f"{qr_code_base_url}#{urllib.parse.quote_plus(encoded_encrypted_params)}"
    return full_url_string


if 'url_qr_code' in df_campaign:
    del df_campaign['url_qr_code']

df_campaign['url_qr_code'] = df_campaign.apply(generate_encrypted_url_column, axis=1)

In [None]:
# # Debugging purposes
# # # AES decryption test
# def generate_decrypted_url_column(row):
#     encrypted_part = urllib.parse.unquote_plus(row['url_qr_code'].replace(qr_code_base_url+'/', ''))
#     return decrypt(encrypted_part)

# def decrypt(data):
#     enc_data_bytes = base64.b64decode(data)
#     iv = enc_data_bytes[:AES.block_size]
#     ct = enc_data_bytes[AES.block_size:]
#     decrypt_cipher = AES.new(base_64_key, AES.MODE_CBC, iv)
#     decrypted_ct = decrypt_cipher.decrypt(ct)
#     pt = unpad(decrypted_ct, AES.block_size)
#     url_param = pt.decode('utf-8')
#     return url_param

# df_campaign['query_params_decrypted'] = df_campaign.apply(generate_decrypted_url_column, axis=1)

In [None]:
df_alloc_diff_benef = get_indirect_beneficiaries(df_campaign)
df_alloc_eq_benef = get_direct_beneficiaries(df_campaign)

In [None]:
columns_to_keep = [
    'id',
    'email',
    'allocataire_nom',
    'allocataire_prenom',
    'beneficiaire_prenom',
    'beneficiaire_nom',
    'beneficiaire_genre',
    'url_qr_code'
]

df_alloc_eq_benef[columns_to_keep].to_csv(pathfile_campaign_csv_output_b, index=False)
df_alloc_diff_benef[columns_to_keep].to_csv(pathfile_campaign_csv_output_b_and_a, index=False)

end_time = time.time()

print(f"Notebook executed in {end_time - start_time:.2f} seconds")