In [None]:
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()

injep_caf_pathfile = os.environ['INJEP_QUERY_ENRICHED_CAF_OUTPUT_PATHFILE_2025']
injep_cnous_pathfile = os.environ['INJEP_QUERY_ENRICHED_CNOUS_OUTPUT_PATHFILE_2025']
injep_msa_pathfile = os.environ['INJEP_QUERY_ENRICHED_MSA_OUTPUT_PATHFILE_2025']

osiris_extraction_pathfile = os.environ['OSIRIS_EXTRACTION_OUTPUT_PATHFILE_2025']

injep_enriched_with_osiris_caf_output_pathfile = os.environ['INJEP_OSIRIS_QUERY_ENRICHED_CAF_OUTPUT_PATHFILE_2025']
injep_enriched_with_osiris_msa_output_pathfile = os.environ['INJEP_OSIRIS_QUERY_ENRICHED_MSA_OUTPUT_PATHFILE_2025']
injep_enriched_with_osiris_cnous_output_pathfile = os.environ['INJEP_OSIRIS_QUERY_ENRICHED_CNOUS_OUTPUT_PATHFILE_2025']

renamed_cols = {
  'NumeroPassSport': 'id_psp',
  'Siret': 'structure_siret',
  'NomStructure': 'structure_nom',
  'SiegeCodePostal': 'structure_siege_code_postal',
  'SiegeCommune': 'structure_siege_commune',
  'SiegeCodeInsee': 'structure_siege_code_insee',
  'Departement': 'structure_departement',
  'DepartementCode': 'structure_departement_code',
  'Region': 'structure_region',
  'RegionCode': 'structure_region_code',
  'FederationNom': 'federation_nom',
  'FederationSiren': 'federation_siren',
  'DateModification': 'date_modification'
}
osiris_cols = list(renamed_cols.values())
df_osiris = pd.read_csv(osiris_extraction_pathfile, dtype=str, encoding='utf-8', sep=';')
df_osiris = df_osiris.rename(columns=renamed_cols)

In [None]:
base_dtype_cols = {
    'id': str,
    'id_psp': str,
    'dossier_id': str,
    'is_eligible': 'boolean',
    'age': int,
    'date_naissance': str,
    'genre': str,
    'situation': str,
    'email': str,
    'telephone': str,
    'allocataire_prenom': str,
    'allocataire_nom': str,
    'allocataire_matricule': str,
    'pass_statut': str,
    'date_recours': str,
    'code_insee': str,
    'commune': str,
    'code_commune': str,
    'region': str,
    'code_region': str,
    'departement': str,
    'code_departement': str,
    'zrr': 'boolean',
    'qpv': 'boolean',
    'drom_com': 'boolean',
    'millesime': str,
}

caf_cols = {
    **base_dtype_cols,
    'fratrie_mixte': 'boolean',
    'fratrie_avec_que_des_eligibles': 'boolean',
    'fratrie_avec_que_des_non_eligibles': 'boolean'
}

df_caf_injep = pd.read_csv(injep_caf_pathfile, sep=';', dtype=caf_cols)

In [None]:
df_cnous_injep = pd.read_csv(injep_cnous_pathfile, sep=';', dtype=base_dtype_cols)
df_msa_injep = pd.read_csv(injep_msa_pathfile, sep=';', dtype=base_dtype_cols)

In [None]:
df_osiris.rename(columns={'EtatInscription': 'etat_inscription'}, inplace=True)

mask_etat = df_osiris['etat_inscription'].isin(['Rembours√©', 'En cours de paiement', 'En cours de traitement'])
df_osiris[mask_etat]['etat_inscription'].value_counts()

In [None]:
df_osiris_filtered = df_osiris[mask_etat]
df_osiris_without_duplicates = df_osiris_filtered.drop_duplicates(subset='id_psp', keep='first')

In [None]:
df_caf_merged = pd.merge(df_caf_injep, df_osiris_without_duplicates[osiris_cols], on='id_psp', how='left', suffixes=('', '_osiris'))
df_msa_merged = pd.merge(df_msa_injep, df_osiris_without_duplicates[osiris_cols], on='id_psp', how='left', suffixes=('', '_osiris'))
df_cnous_merged = pd.merge(df_cnous_injep, df_osiris_without_duplicates[osiris_cols], on='id_psp', how='left', suffixes=('', '_osiris'))

In [None]:
import numpy as np

df_caf_merged['pass_statut'] = np.where(df_caf_merged['date_modification'].notna(), 'utilise', 'non_utilise')
df_caf_merged['date_recours'] = np.where(df_caf_merged['date_modification'].notna(), df_caf_merged['date_modification'], df_caf_merged['date_recours'])

df_cnous_merged['pass_statut'] = np.where(df_cnous_merged['date_modification'].notna(), 'utilise', 'non_utilise')
df_cnous_merged['date_recours'] = np.where(df_cnous_merged['date_modification'].notna(), df_cnous_merged['date_modification'], df_cnous_merged['date_recours'])

df_msa_merged['pass_statut'] = np.where(df_msa_merged['date_modification'].notna(), 'utilise', 'non_utilise')
df_msa_merged['date_recours'] = np.where(df_msa_merged['date_modification'].notna(), df_msa_merged['date_modification'], df_msa_merged['date_recours'])

In [None]:
# Ensure there are no incoherent activations
# Osiris being the latest data obtained on the 12/12/2025, and the initial data on the 12/04/2025
# Osiris should take precedence over the initial data when it comes to activation data
# When date_modification is present in osiris, the pass should be activated, and the date populated
assert len(df_caf_merged[(df_caf_merged['date_modification'].notna()) & (df_caf_merged['pass_statut'] == 'non_utilise')]) == 0
assert len(df_cnous_merged[(df_cnous_merged['date_modification'].notna()) & (df_cnous_merged['pass_statut'] == 'non_utilise')]) == 0
assert len(df_msa_merged[(df_msa_merged['date_modification'].notna()) & (df_msa_merged['pass_statut'] == 'non_utilise')]) == 0

In [None]:
final_osiris_cols = osiris_cols.copy()
final_osiris_cols.remove('id_psp')
final_osiris_cols.remove('date_modification')

base_cols_to_keep = [
    'id', 'millesime','genre','situation','age','date_naissance',
    'code_insee','commune', 'code_commune', 'departement','code_departement',
    'region','code_region','drom_com','qpv','zrr',
    'pass_statut','date_recours','email','telephone',
    'type_contact',
]

caf_cols_to_keep = [*base_cols_to_keep, 'fratrie_mixte', 'fratrie_avec_que_des_eligibles', 'fratrie_avec_que_des_non_eligibles', *final_osiris_cols]
cnous_msa_cols_to_keep = [*base_cols_to_keep, *final_osiris_cols]

assert len(df_caf_merged[caf_cols_to_keep]) == len(df_caf_injep)
assert len(df_msa_merged[cnous_msa_cols_to_keep]) == len(df_msa_injep)
assert len(df_cnous_merged[cnous_msa_cols_to_keep]) == len(df_cnous_injep)

In [None]:
df_caf_merged[caf_cols_to_keep].to_csv(injep_enriched_with_osiris_caf_output_pathfile, index=False, sep=';')
df_cnous_merged[cnous_msa_cols_to_keep].to_csv(injep_enriched_with_osiris_cnous_output_pathfile, index=False, sep=';')
df_msa_merged[cnous_msa_cols_to_keep].to_csv(injep_enriched_with_osiris_msa_output_pathfile, index=False, sep=';')