# Génération d'un export de données pour les campagnes de mails

## Traitements 

- Load list of eligible beneficiaries from 2025
- JSON extraction for the allocataire field
- Data mapping and deletion of unusable data
- URL generation containing the code
- Output two seperate files
   - One where the benef is the same than the allocataire (direct benef)
   - Other one where the benef is different than the allocataire (indirect benef)



In [None]:
import time
import pandas as pd
from dotenv import load_dotenv
import os
import json

from data.utils.emailing_utils import clean_phone_number_in_place, format_benef_birth_date_in_place, \
  format_born_text_in_place, format_allocataire_benef_names_in_place, internationalize_phone_number_in_place, \
  get_indirect_beneficiaries, get_direct_beneficiaries

load_dotenv()

start_time = time.time()

benef_2025 = os.environ['BENEF_2025_PARQUET_PATHFILE']
qr_code_secret_key = os.environ['BENEF_2025_QR_CODE_URL_SECRET']
qr_code_base_url = os.environ['BENEF_2025_QR_CODE_BASE_URL']
pathfile_campaign_csv_output_b = os.environ['CAMPAIGN_CSV_OUTPUT_B']
pathfile_campaign_csv_output_b_and_a = os.environ['CAMPAIGN_CSV_OUTPUT_B_AND_A']

In [None]:
df_main = pd.read_parquet(benef_2025)

In [None]:
df_json_normalized = pd.json_normalize(df_main['allocataire'].apply(json.loads))
df_json_normalized = df_json_normalized.add_prefix('allocataire_')
df_main.index = pd.RangeIndex(start=0, stop=len(df_main), step=1)
df_unwrapped_alloc = pd.merge(df_main, df_json_normalized, left_index=True, right_index=True)

print(f"Number of beneficiaries : {len(df_unwrapped_alloc)}")

In [None]:
# Users with only phone number
mask_phone_only = df_unwrapped_alloc['allocataire_courriel'].apply(lambda x: pd.isna(x) or x == '') & \
                  df_unwrapped_alloc['allocataire_telephone'].apply(lambda x: pd.isna(x) or x != '')

df_unwrapped_alloc = df_unwrapped_alloc[mask_phone_only]

print(f"Number of beneficiaries with phone number only: {len(df_unwrapped_alloc)}")

In [None]:
# Clean phone when possible
clean_phone_number_in_place(df_unwrapped_alloc)

In [None]:
column_mapping = {
    'id': 'id',
    'allocataire_qualite': 'allocataire_qualite',
    'allocataire_nom': 'allocataire_nom',
    'allocataire_prenom': 'allocataire_prenom',
    'prenom': 'beneficiaire_prenom',
    'nom': 'beneficiaire_nom',
    'genre': 'beneficiaire_genre',
    'date_naissance': 'beneficiaire_date_naissance',
    'id_psp': 'code',
    'allocataire_telephone': 'telephone',
}

df_unwrapped_alloc.columns = df_unwrapped_alloc.columns.to_series().replace(column_mapping)

In [None]:
#only keep necessary columns
df_campaign = df_unwrapped_alloc[
    [
        'id',
        'allocataire_nom',
        'allocataire_prenom',
        'beneficiaire_prenom',
        'beneficiaire_nom',
        'beneficiaire_genre',
        'beneficiaire_date_naissance',
        'code',
        'telephone',
    ]
]

In [None]:
# Format dob, gender text, capitalize names and internationalize phone numbers in place
format_benef_birth_date_in_place(df_campaign)
format_born_text_in_place(df_campaign)
format_allocataire_benef_names_in_place(df_campaign)
internationalize_phone_number_in_place(df_campaign)

In [None]:
df_alloc_diff_benef = get_indirect_beneficiaries(df_campaign)
df_alloc_eq_benef = get_direct_beneficiaries(df_campaign)

In [None]:
columns_to_keep = [
    'id',
    'telephone',
    'allocataire_nom',
    'allocataire_prenom',
    'beneficiaire_prenom',
    'beneficiaire_nom',
]

df_alloc_eq_benef[columns_to_keep].to_csv(pathfile_campaign_csv_output_b, index=False)
df_alloc_diff_benef[columns_to_keep].to_csv(pathfile_campaign_csv_output_b_and_a, index=False)

end_time = time.time()
print(f"Notebook executed in {end_time - start_time:.2f} seconds")