# Traitement des données de financements 

In [1]:
#importer les packages
import pandas as pd
import numpy as np
from code_utils.utils import extract_first_name,replace_all,get_scanR_structure
from code_utils.cached_data_handler import get_structure, get_person
from code_utils.features_into_dictionnary import persons, projects, address
from tqdm import tqdm
import pprint as pp
tqdm.pandas()
from code_utils.pickle import load_cache,write_cache
import os
from dotenv import load_dotenv
import requests

load_dotenv()

Authorization = os.getenv('Authorization_access_185')
Authorization_ORCID = os.getenv('Authorization_cluster_BSO_ORCID')
url_cluster = os.getenv('url_cluster')

In [2]:
sources=pd.read_json('sources.json')

In [4]:
source=list(sources.keys())[3]
source

'SIRANO'

In [5]:
#cache structures, personnes et orcid avec differentes sources de donnees
cached_data = {}
try:
    cached_data = load_cache(cached_data,f"./DATA/{source}/caches/cached_{source.lower()}_data.pkl")
except:
    write_cache(cached_data,f"./DATA/{source}/caches/cached_{source.lower()}_data.pkl")
    
cached_data_persons = {}
try:
    cached_data_persons = load_cache(cached_data_persons,f"./DATA/{source}/caches/cached_{source.lower()}_data_persons.pkl")
except:
    write_cache(cached_data_persons,f"./DATA/{source}/caches/cached_{source.lower()}_data_persons.pkl")
    
cached_data_orcid = {}
try:
    cached_data_orcid = load_cache(cached_data_orcid,f"./DATA/{source}/caches/cached_{source.lower()}_data_orcid.pkl")
except:
    write_cache(cached_data_orcid,f"./DATA/{source}/caches/cached_{source.lower()}_data_orcid.pkl")

# Données partenaires

In [6]:
# amener les partenaires depuis le site 
if source=='ANR':
    page_partenaires_10 = requests.get(sources[source]['url_partenaires']).json()
    colonnes_partenaires_10 = page_partenaires_10['columns']
    donnees_partenaires_10 = page_partenaires_10['data']
    df_partenaires=pd.DataFrame(data=donnees_partenaires_10,columns=colonnes_partenaires_10)
elif source=='ANSES':
    df_from_anses=pd.read_excel(sources[source]['url_partenaires'])
    df=df_from_anses.iloc[1:,:]
    df.columns=list(df_from_anses.iloc[0,:])
    dict_equipe={list(df_from_anses.columns)[k].replace('Équipe 10 ','Équipe 10').replace('Équipe13','Équipe 13'):k for k in range (len(list(df_from_anses.columns))) if list(df_from_anses.columns)[k].find('Équipe')>=0}
    list_df=[]
    number=3
    for n in range(1,len(dict_equipe)+1):
        equipe_n=pd.concat([df.iloc[:,0:3],df.iloc[:,number:number+6]], axis=1)
        list_df.append(equipe_n)
        number+=6
    df_partenaires=pd.concat([list_df[k].dropna(subset=[sources[source]['nom'], sources[source]['prenom'],sources[source]['nom_structure'], sources[source]['nom'], 'Pays'], how='all') for k in range(len(list_df))])
    
elif source=='IRESP':
    df_partenaires1=pd.read_csv(sources[source]['url_partenaires1'] ,sep=";", encoding='UTF-8')
    df_partenaires2=pd.read_csv(sources[source]['url_partenaires2'] ,sep=";", encoding='UTF-8')
    df_partenaires=pd.concat([df_partenaires1,df_partenaires2])
elif source=='ADEME':
    df_partenaires=pd.read_csv(sources[source]['url_partenaires'] ,sep=",", encoding='ISO-8859-1', on_bad_lines='skip')
else:    
    df_partenaires=pd.read_csv(sources[source]['url_partenaires'] ,sep=";", encoding='ISO-8859-1')

df_partenaires=df_partenaires.reset_index()
del df_partenaires['index']

# Matcher établissement

In [6]:
id_struct=df_partenaires
id_struct[f"{sources[source]['nom_structure']}2"]=id_struct.loc[:,sources[source]['nom_structure']].apply(lambda x: replace_all(str(x).lower().replace(" d e"," d'e").replace(" d a"," d'a").replace(" d i"," d'i").replace(" d o"," d'o").replace(" d u"," d'u").replace(" d y"," d'y").replace(" d h"," d'h").replace(" l e"," l'e").replace(" l a"," l'a").replace(" l i"," l'i").replace(" l o"," l'o").replace(" l u"," l'u").replace(" l y"," l'y").replace(" l h"," l'h")))
id_struct=id_struct.drop_duplicates(subset=[f"{sources[source]['nom_structure']}2"])

In [None]:
id_struct.progress_apply(lambda row: get_structure(row,source,cached_data,sources[source]['nom_structure'],sources[source]['ville'],sources[source]['pays'],sources[source]['code_projet'],False), axis=1) 
write_cache(cached_data,f"./DATA/{source}/caches/cached_{source}_data.pkl")

In [None]:
len(cached_data)

In [None]:
id_struct['id_structure_matcher']=id_struct.loc[:,sources[source]['nom_structure']].apply(lambda x: cached_data[x])

In [None]:
id_struct

In [11]:
id_struct=id_struct.reset_index()
del id_struct['index']
id_struct.to_excel(f"./DATA/{source}/df_partenaires.xlsx")
id_struct.to_json(f"./DATA/{source}/df_partenaires.json")

In [None]:
id_struct=pd.read_json(f"./DATA/{source}/df_partenaires.json")
id_struct=id_struct[[sources[source]['nom_structure'],'id_structure_matcher']]
id_struct[f"{sources[source]['nom_structure']}2"]=id_struct.loc[:,sources[source]['nom_structure']].apply(lambda x: replace_all(str(x).lower().replace(" d e"," d'e").replace(" d a"," d'a").replace(" d i"," d'i").replace(" d o"," d'o").replace(" d u"," d'u").replace(" d y"," d'y").replace(" d h"," d'h").replace(" l e"," l'e").replace(" l a"," l'a").replace(" l i"," l'i").replace(" l o"," l'o").replace(" l u"," l'u").replace(" l y"," l'y").replace(" l h"," l'h")))

df_partenaires[f"{sources[source]['nom_structure']}2"]=df_partenaires.loc[:,sources[source]['nom_structure']].apply(lambda x: replace_all(str(x).lower().replace(" d e"," d'e").replace(" d a"," d'a").replace(" d i"," d'i").replace(" d o"," d'o").replace(" d u"," d'u").replace(" d y"," d'y").replace(" d h"," d'h").replace(" l e"," l'e").replace(" l a"," l'a").replace(" l i"," l'i").replace(" l o"," l'o").replace(" l u"," l'u").replace(" l y"," l'y").replace(" l h"," l'h")))
df_partenaires_struct=pd.merge(df_partenaires,id_struct[[f"{sources[source]['nom_structure']}2",'id_structure_matcher']], on=f"{sources[source]['nom_structure']}2", how='left')
df_partenaires_struct

In [None]:
#compléter les données avec scanR
url_scanr='https://storage.gra.cloud.ovh.net/v1/AUTH_32c5d10cb0fe4519b957064a111717e3/scanR/projects.json'
requete_scanR = requests.get(url_scanr)
page_scanR= requete_scanR.json()
df_scanR=pd.DataFrame(page_scanR)
scanR=df_scanR.explode('participants').loc[:,['id','participants']]
scanR=scanR.rename(columns={'id':'id_anr'})
scanR['index']=[x for x in range(len(scanR))]
scanR=scanR.set_index('index')
scanR['id_structure_scanr']=scanR['participants'].apply(lambda x: x.get(str('structure')) if isinstance(x, dict) else None )
scanR['nom_struct']=scanR['participants'].apply(lambda x: get_scanR_structure(x))
del scanR['participants']
scanR_nettoye=scanR.drop_duplicates(subset='nom_struct')
scanR_nettoye[f"{sources[source]['nom_structure']}2"]=scanR_nettoye.loc[:,'nom_struct'].apply(lambda x: replace_all(str(x).lower()))
scanR_nettoye=scanR_nettoye[['id_structure_scanr',f"{sources[source]['nom_structure']}2"]]
scanR_nettoye=scanR_nettoye.drop_duplicates(subset=f"{sources[source]['nom_structure']}2")

In [None]:
df_partenaires_struct=pd.merge(df_partenaires_struct,scanR_nettoye, on=f"{sources[source]['nom_structure']}2", how='left')
df_partenaires_struct

In [None]:
#######fichier avec les identifiants structures rettrouvés à la main par Emmanuel ==> 'code'
scanr_structures=pd.read_excel('scanr_partenaires_non_identifies.xlsx')
scanr_structures[f"{sources[source]['nom_structure']}2"]=scanr_structures.loc[:,'Nom'].apply(lambda x: replace_all(str(x).lower().replace(" d e"," d'e").replace(" d a"," d'a").replace(" d i"," d'i").replace(" d o"," d'o").replace(" d u"," d'u").replace(" d y"," d'y").replace(" d h"," d'h").replace(" l e"," l'e").replace(" l a"," l'a").replace(" l i"," l'i").replace(" l o"," l'o").replace(" l u"," l'u").replace(" l y"," l'y").replace(" l h"," l'h")))
scanr_structures=scanr_structures[[f"{sources[source]['nom_structure']}2",'code']]
scanr_structures=scanr_structures.dropna().drop_duplicates(subset=f"{sources[source]['nom_structure']}2")
df_partenaires_complet=pd.merge(df_partenaires_struct,scanr_structures, on=f"{sources[source]['nom_structure']}2", how='left')
df_partenaires_complet

In [21]:
if 'finess' in list(df_partenaires.columns):
    finess_siret=pd.read_json("finess_siret-siege.json")
    df_partenaires_complet=pd.merge(df_partenaires_complet,finess_siret,how='left', on='finess')

In [None]:
df_partenaires_complet[sources[source]['identifiants_preferes_structure']]

In [None]:
df_partenaires_complet.columns

In [None]:
df_partenaires_complet['id_structure']=df_partenaires_complet.apply(lambda row: identifiant_prefere(row,sources[source]['identifiants_preferes_structure']), axis=1)
df_partenaires_complet

In [None]:
#df_partenaires_complet[pd.isna(df_partenaires_complet.id_structure)]
df_partenaires_complet.loc[(pd.isna(df_partenaires_complet['id_structure']))|(str(df_partenaires_complet['id_structure'])=='None')|(str(df_partenaires_complet['id_structure'])=='nan')]

In [21]:
df_partenaires_complet.to_excel(f"./DATA/{source}/df_partenaires_id_structures.xlsx")
df_partenaires_complet.to_json(f"./DATA/{source}/df_partenaires_id_structures.json")

In [22]:
df_partenaires_complet=pd.read_json(f"./DATA/{source}/df_partenaires_id_structures.json")

In [23]:
########récupération des structures sans identifiants pour les donner à Emmanuel
identifiants_a_remplir=df_partenaires_complet.loc[(pd.isna(df_partenaires_complet['id_structure']))|(str(df_partenaires_complet['id_structure'])=='None')|(str(df_partenaires_complet['id_structure'])=='nan')]
identifiants_a_remplir
identifiants_a_remplir=identifiants_a_remplir.drop_duplicates(subset=f"{sources[source]['nom_structure']}2")
identifiants_a_remplir=identifiants_a_remplir.reset_index()
del identifiants_a_remplir['index']

In [24]:
###récupérer les structures avec les noms de chercheurs 
if source=='ADEME':
    identifiants_a_remplir['nom']=identifiants_a_remplir['Organisme du porteur'].apply(lambda x: str(x).split(' ')[0].capitalize())
    identifiants_a_remplir['prenom']=identifiants_a_remplir['Organisme du porteur'].apply(lambda x: extract_first_name(x))
    nom=[]
    prenom=[]
    id_structure=[]
    nom_structure=[]
    annees=[]
    annees_projet=[]
    idref=[]
    for i in range(len(identifiants_a_remplir)):
        r=get_data_from_elastic(url_cluster,f"{identifiants_a_remplir.iloc[i,:].prenom} {identifiants_a_remplir.iloc[i,:].nom}")
        if len(r['hits']['hits'])==1:
            if 'affiliations' in list(r['hits']['hits'][0]['_source'].keys()):
                for j in range(len(r['hits']['hits'][0]['_source']['affiliations'])):
                    annee_proj=[str(y) for y in range(int(identifiants_a_remplir.iloc[i,:]['Date de dÃ©but du projet'][:4]),int(identifiants_a_remplir.iloc[i,:]['Date de fin du projet'][:4])+1,1)]
                    if len([z for z in annee_proj if z in sorted(list(pd.Series([x['year'] for x in r['hits']['hits'][0]['_source']['affiliations'][j]['sources']]).drop_duplicates()))])>0:
                        nom.append(identifiants_a_remplir.iloc[i,:].nom)
                        prenom.append(identifiants_a_remplir.iloc[i,:].prenom)
                        idref.append(r['hits']['hits'][0]['_source']['id'])
                        annees_projet.append(annee_proj)
                        id_structure.append(r['hits']['hits'][0]['_source']['affiliations'][j]['structure']['id'])
                        nom_structure.append(r['hits']['hits'][0]['_source']['affiliations'][j]['structure']['label']['default'])
                        annees.append(sorted(list(pd.Series([x['year'] for x in r['hits']['hits'][0]['_source']['affiliations'][j]['sources']]).drop_duplicates())))
    df=pd.DataFrame(data={'nom':nom,'prenom':prenom,'idref':idref,'id_structure':id_structure,'nom_structure':nom_structure,'annees':annees,'annees_projet':annees_projet})
    df['structures_potentielles']=df.apply(lambda row: {"id" : row['id_structure'], "nom_structure": row['nom_structure']},axis=1)
    df=df.groupby(['nom','prenom','idref']).agg({'structures_potentielles':lambda x: list(x)}).reset_index()
    df2=pd.merge(identifiants_a_remplir,df, on=['nom','prenom'],how='left')
    df2.columns=['index', 'CatÃ©gorie', 'RÃ©fÃ©rence du projet', 'Acronyme du projet',
        'Titre du projet', "RÃ©fÃ©rence de l'appel Ã  projet",
        "Titre de l'appel Ã  projet", "Acronyme de l'appel Ã  projet",
        "AnnÃ©e de l'appel", 'Organisme du porteur', 'SIRET du porteur',
        'Identifiant ROR (Research Organization Registry) de lâorganisme ou RNSR',
        'Date de dÃ©but du projet', 'Date de fin du projet',
        'Organisme du porteur2', 'id_structure_matcher', 'id_structure_scanr',
        'code', 'id_structure', 'nom', 'prenom', 'idref','structures_potentielles']
    df2[['CatÃ©gorie', 'RÃ©fÃ©rence du projet', 'Acronyme du projet',
        'Titre du projet', "RÃ©fÃ©rence de l'appel Ã  projet",
        'Organisme du porteur',
        'Date de dÃ©but du projet', 'Date de fin du projet',
        'nom', 'prenom','idref','structures_potentielles']].to_excel(f"./structures_manquantes/partenaires_non_identifies_{source}.xlsx", index=False)

In [25]:
if source !='ADEME':   
    if sources[source]['ville'] in list(identifiants_a_remplir.columns) and sources[source]['pays'] in list(identifiants_a_remplir.columns) and sources[source]['adresse'] not in list(identifiants_a_remplir.columns):
        identifiants_a_remplir=identifiants_a_remplir[[sources[source]['nom_structure'],sources[source]['ville'],sources[source]['pays']]]
    elif sources[source]['ville'] in list(identifiants_a_remplir.columns) and sources[source]['pays'] in list(identifiants_a_remplir.columns) and sources[source]['adresse'] in list(identifiants_a_remplir.columns):
        identifiants_a_remplir=identifiants_a_remplir[[sources[source]['nom_structure'],sources[source]['adresse'],sources[source]['ville'],sources[source]['pays']]]
    elif sources[source]['region'] in list(identifiants_a_remplir.columns):
        identifiants_a_remplir=identifiants_a_remplir[[sources[source]['nom_structure'],sources[source]['region']]]
    elif sources[source]['ville'] in list(identifiants_a_remplir.columns) and sources[source]['pays'] not in list(identifiants_a_remplir.columns):
        identifiants_a_remplir=identifiants_a_remplir[[sources[source]['nom_structure'],sources[source]['ville']]]
    identifiants_a_remplir

In [26]:
if source != 'ADEME':
    identifiants_a_remplir.to_excel(f"./structures_manquantes/partenaires_non_identifies_{source}.xlsx", index=False)

# Matcher des chercheurs

In [29]:
df_partenaires=pd.read_json(f"./DATA/{source}/df_partners_id_structures.json")

In [None]:
if len([x for x in ['nom', 'prenom'] if x in list(sources[source].keys())])==2:
    df_partenaires['id_personne']=df_partenaires.progress_apply(lambda row: get_person(row, cached_data_persons,sources[source]['nom'],sources[source]['prenom']), axis=1)
    write_cache(cached_data_persons,f"./DATA/{source}/caches/cached_data_persons.pkl")
    df_partenaires.to_excel(f"./DATA/{source}/df_partenaires_id_personne.xlsx")
    df_partenaires.to_json(f"./DATA/{source}/df_partenaires_id_personne.json")

In [30]:
df_partenaires['id_personne']=df_partenaires.progress_apply(lambda row: get_person(row, cached_data_persons,sources[source]['nom'],sources[source]['prenom']), axis=1)

  1%|          | 21/1737 [00:04<11:56,  2.40it/s]

no exact fullname match for Morin Jean-Paul vs ['trevanian', 'trevanian']


  1%|▏         | 26/1737 [00:05<09:17,  3.07it/s]

no exact fullname match for Clavel Jacqueline vs ['jacqueline clavel marinacce', 'clavel marinacce jacqueline']


  2%|▏         | 27/1737 [00:06<08:29,  3.35it/s]

no exact fullname match for Gilot-Fromont Emmanuelle vs ['emmanuelle fromont', 'fromont emmanuelle']


  2%|▏         | 30/1737 [00:06<06:55,  4.11it/s]

no exact fullname match for Schneider Dominique vs ['dominique jung schneider', 'jung schneider dominique']


  2%|▏         | 35/1737 [00:08<07:55,  3.58it/s]

no exact fullname match for Forestier Christiane vs ['christiane nativel forestier', 'nativel forestier christiane']


  2%|▏         | 37/1737 [00:09<06:45,  4.19it/s]

no exact fullname match for Paris Christophe vs ['jean christophe paris', 'paris jean christophe']
no exact fullname match for Paris Christophe vs ['jean christophe paris', 'paris jean christophe']
no exact fullname match for Paris Christophe vs ['christophe de paris', 'christophe de paris']
no exact fullname match for Paris Christophe vs ['jean christophe coffin', 'coffin jean christophe']


  2%|▏         | 38/1737 [00:10<13:27,  2.10it/s]

no exact fullname match for Paris Christophe vs ['christophe greffet', 'greffet christophe']
no exact fullname match for Paris Christophe vs ['christophe de beaumont du repaire', 'beaumont du repaire christophe de']


  2%|▏         | 40/1737 [00:10<09:24,  3.01it/s]

no exact fullname match for Bénédetti Marc vs ['jean marc benedetti', 'benedetti jean marc']
no exact fullname match for Bénédetti Marc vs ['jean marc benedetti', 'benedetti jean marc']


  3%|▎         | 57/1737 [00:14<07:02,  3.98it/s]

no exact fullname match for Fenet Hélène vs ['helene buisson fenet', 'buisson fenet helene']


  3%|▎         | 59/1737 [00:15<07:25,  3.76it/s]

no exact fullname match for Baeza Armelle vs ['armelle baeza squiban', 'baeza squiban armelle']


  4%|▍         | 68/1737 [00:16<04:58,  5.60it/s]

Error in getting notice 091300940
skipping birth date 1817-01-01T00:00:00


  4%|▍         | 70/1737 [00:17<05:12,  5.34it/s]

no exact fullname match for Krishnan Kannan vs ['kannan m  krishnan', 'krishnan kannan m']
no exact fullname match for Carrière Marie vs ['marie j  carriere', 'carriere marie j']
no exact fullname match for Carrière Marie vs ['anne marie carriere', 'carriere anne marie']
no exact fullname match for Carrière Marie vs ['lucien eugene marie carriere', 'carriere lucien eugene marie']
no exact fullname match for Carrière Marie vs ['jean marie carriere', 'carriere jean marie']
no exact fullname match for Carrière Marie vs ['martin barthelemy carriere', 'carriere martin barthelemy']
no exact fullname match for Carrière Marie vs ['jean marie carriere', 'carriere jean marie']
no exact fullname match for Carrière Marie vs ['marie joseph henri carriere', 'carriere marie joseph henri']


  4%|▍         | 72/1737 [00:18<09:00,  3.08it/s]

no exact fullname match for Carrière Marie vs ['marie paule carriere hervagault', 'carriere hervagault marie paule']


  4%|▍         | 73/1737 [00:18<08:17,  3.34it/s]

no exact fullname match for Zmirou Denis vs ['denis zmirou navier', 'zmirou navier denis']


  6%|▌         | 103/1737 [00:24<04:29,  6.07it/s]

no exact fullname match for Le Moual Nicole vs ['nicole lincot le moual', 'lincot le moual nicole']


  6%|▌         | 108/1737 [00:25<05:35,  4.86it/s]

no exact fullname match for Raherison Chantal vs ['chantal raherison semjen', 'raherison semjen chantal']
no exact fullname match for Pons Françoise vs ['francoise pons lebeau', 'pons lebeau francoise']
no exact fullname match for Pons Françoise vs ['francoise bernard pons', 'bernard pons francoise']
no exact fullname match for Pons Françoise vs ['anne francoise pons', 'pons anne francoise']


  6%|▋         | 110/1737 [00:26<09:54,  2.74it/s]

no exact fullname match for Calvez Marcel vs ['marcel le calvez', 'le calvez marcel']


  7%|▋         | 117/1737 [00:28<06:42,  4.02it/s]

no exact fullname match for Paris Christophe vs ['jean christophe paris', 'paris jean christophe']
no exact fullname match for Paris Christophe vs ['jean christophe paris', 'paris jean christophe']
no exact fullname match for Paris Christophe vs ['christophe de paris', 'christophe de paris']
no exact fullname match for Paris Christophe vs ['jean christophe coffin', 'coffin jean christophe']


  7%|▋         | 118/1737 [00:29<13:13,  2.04it/s]

no exact fullname match for Paris Christophe vs ['christophe greffet', 'greffet christophe']
no exact fullname match for Paris Christophe vs ['christophe de beaumont du repaire', 'beaumont du repaire christophe de']


  7%|▋         | 127/1737 [00:31<05:08,  5.22it/s]

no exact fullname match for Bernard Louis vs ['bernard thobois', 'thobois bernard']
no exact fullname match for Bernard Louis vs ['louis bernard robitaille', 'robitaille louis bernard']
no exact fullname match for Bernard Louis vs ['bernard louis dumont', 'dumont bernard louis']
no exact fullname match for Bernard Louis vs ['bernard louis roques', 'roques bernard louis']


  8%|▊         | 133/1737 [00:32<06:06,  4.38it/s]

no exact fullname match for Garric Jeanne vs ['marie jeanne garric', 'garric marie jeanne']


  8%|▊         | 134/1737 [00:33<07:01,  3.80it/s]

no exact fullname match for Rose Jerome vs ['jerome c  rose', 'rose jerome c']
no exact fullname match for Rose Jerome vs ['jerome g  rose', 'rose jerome g']


  9%|▊         | 150/1737 [00:36<05:53,  4.49it/s]

skipping birth date 1890-01-01T00:00:00


  9%|▊         | 151/1737 [00:36<06:04,  4.35it/s]

no exact fullname match for Lepage Nadege vs ['nadege lepage druetto', 'lepage druetto nadege']


  9%|▉         | 154/1737 [00:37<08:34,  3.08it/s]

no exact fullname match for Allemand Isabelle vs ['isabelle meutelet allemand', 'meutelet allemand isabelle']
no exact fullname match for Allemand Isabelle vs ['isabelle martine', 'martine isabelle']


  9%|▉         | 157/1737 [00:38<06:36,  3.99it/s]

no exact fullname match for Thomas Olivier vs ['olivier paul thomas', 'thomas olivier paul']


  9%|▉         | 162/1737 [00:40<06:34,  3.99it/s]

no exact fullname match for Bertrand Xavier vs ['bertrand xavier soret', 'soret bertrand xavier']
no exact fullname match for Bertrand Xavier vs ['bertrand delafosse', 'delafosse bertrand']
no exact fullname match for Bertrand Xavier vs ['francois xavier bernard bertrand lesage', 'lesage francois xavier bernard bertrand']


  9%|▉         | 163/1737 [00:41<11:17,  2.32it/s]

no exact fullname match for Bertrand Xavier vs ['etienne bertaud du chazaud', 'bertaud du chazaud etienne']


 10%|█         | 174/1737 [00:44<11:25,  2.28it/s]

no exact fullname match for Morin Jean-Paul vs ['trevanian', 'trevanian']


 11%|█         | 186/1737 [00:46<04:13,  6.13it/s]

no exact fullname match for Langouet Sophie vs ['sophie langouet prigent', 'langouet prigent sophie']


 11%|█         | 194/1737 [00:48<04:22,  5.88it/s]

no exact fullname match for Petit Audrey vs ['audrey petit trigg', 'petit trigg audrey']
no exact fullname match for Petit Audrey vs ['audrey francoise petit', 'petit audrey francoise']
no exact fullname match for Petit Audrey vs ['audrey petit bessy', 'petit bessy audrey']
no exact fullname match for Petit Audrey vs ['marie audrey barrat petit', 'barrat petit marie audrey']


 11%|█▏        | 197/1737 [00:50<10:37,  2.42it/s]

no exact fullname match for Hamel Jean-François vs ['jean francois hamel broza', 'hamel broza jean francois']


 12%|█▏        | 200/1737 [00:51<10:35,  2.42it/s]

no exact fullname match for Schmich Isabelle vs ['isabelle schmich yamane', 'schmich yamane isabelle']


 12%|█▏        | 207/1737 [00:53<07:25,  3.43it/s]

no exact fullname match for Legrand Emilie vs ['emilie legrand collin', 'legrand collin emilie']
no exact fullname match for Legrand Emilie vs ['emilie legrand pluquet', 'legrand pluquet emilie']


 12%|█▏        | 216/1737 [00:55<07:15,  3.49it/s]

no exact fullname match for Bouvier Ghislaine vs ['ghislaine briot', 'briot ghislaine']


 13%|█▎        | 218/1737 [00:56<07:10,  3.53it/s]

no exact fullname match for Mir Luis vs ['luis jerez mir', 'jerez mir luis']


 13%|█▎        | 220/1737 [00:56<08:01,  3.15it/s]

no exact fullname match for Mir Luis vs ['luis m  mir', 'm  mir luis']


 13%|█▎        | 227/1737 [00:58<06:15,  4.02it/s]

no exact fullname match for Poinsignon Anne vs ['anne chantal mazingue desailly', 'mazingue desailly anne chantal']


 13%|█▎        | 230/1737 [00:59<06:19,  3.97it/s]

no exact fullname match for Luce Danièle vs ['daniele luce alet', 'alet daniele luce']


 13%|█▎        | 231/1737 [01:00<09:48,  2.56it/s]

no exact fullname match for Morin Didier vs ['gilles andre didier morin', 'morin gilles andre didier']


 13%|█▎        | 233/1737 [01:00<06:26,  3.90it/s]

no exact fullname match for Langouet Sophie vs ['sophie langouet prigent', 'langouet prigent sophie']


 14%|█▎        | 236/1737 [01:01<07:56,  3.15it/s]

skipping Animateur à Radio-Canada, docteur en sémiologie, journaliste (en 2003)


 14%|█▎        | 237/1737 [01:02<10:54,  2.29it/s]

no exact fullname match for Auger Jacques vs ['jean jacques  auger', 'auger jean jacques']


 14%|█▎        | 238/1737 [01:02<10:56,  2.28it/s]

skipping birth date 1890-01-01T00:00:00


 14%|█▍        | 239/1737 [01:02<11:15,  2.22it/s]

no exact fullname match for Riviere Emmanuel vs ['marie emmanuel riviere', 'riviere marie emmanuel']


 14%|█▍        | 248/1737 [01:04<04:27,  5.56it/s]

no exact fullname match for Lacour Brigitte vs ['brigitte lacour schillig', 'lacour schillig brigitte']


 14%|█▍        | 250/1737 [01:04<04:22,  5.67it/s]

no exact fullname match for De Sèze René vs ['rene de seze', 'seze rene de']


 15%|█▍        | 259/1737 [01:06<04:23,  5.60it/s]

no exact fullname match for Fournier Thierry vs ['thierry fournier', 'fournier  thierry']


 15%|█▌        | 266/1737 [01:08<06:12,  3.95it/s]

no exact fullname match for Garrigou Alain vs ['alain garrigou torchy', 'garrigou torchy alain']


 15%|█▌        | 267/1737 [01:09<07:22,  3.32it/s]

no exact fullname match for Maître Anne vs ['anne le maitre', 'le maitre anne']
no exact fullname match for Maître Anne vs ['anne le maitre', 'le maitre anne']
no exact fullname match for Maître Anne vs ['marie anne prioux maitre', 'prioux maitre marie anne']
no exact fullname match for Maître Anne vs ['anne marie peau', 'peau anne marie']
no exact fullname match for Maître Anne vs ['anne laure bourdel', 'bourdel anne laure']
no exact fullname match for Maître Anne vs ['anne waugh', 'waugh anne']
no exact fullname match for Maître Anne vs ['anne claire prince', 'prince anne claire']


 15%|█▌        | 269/1737 [01:10<11:25,  2.14it/s]

no exact fullname match for Maître Anne vs ['anne sophie ledonne', 'ledonne anne sophie']


 16%|█▌        | 271/1737 [01:11<08:54,  2.74it/s]

no exact fullname match for Schroeder Henri vs ['henri christian schroeder', 'schroeder henri christian']


 16%|█▌        | 275/1737 [01:12<06:52,  3.54it/s]

no exact fullname match for Perrin Anne vs ['anne perrin khelissa', 'perrin khelissa anne']
no exact fullname match for Perrin Anne vs ['anne marie perrin', 'perrin anne marie']


 16%|█▌        | 277/1737 [01:13<10:54,  2.23it/s]

no exact fullname match for Perrin Anne vs ['anne gaelle perrin', 'perrin anne gaelle']


 16%|█▌        | 280/1737 [01:14<08:53,  2.73it/s]

no exact fullname match for Bories Serge vs ['serge charles bories', 'bories serge charles']
no exact fullname match for Pons Françoise vs ['francoise pons lebeau', 'pons lebeau francoise']
no exact fullname match for Pons Françoise vs ['francoise bernard pons', 'bernard pons francoise']
no exact fullname match for Pons Françoise vs ['anne francoise pons', 'pons anne francoise']


 16%|█▌        | 282/1737 [01:15<10:37,  2.28it/s]

no exact fullname match for Guzylack Laurence vs ['laurence piriou guzylack', 'piriou guzylack laurence']
no exact fullname match for Martin Perrine vs ['perrine j  martin', 'martin perrine j']


 16%|█▋        | 285/1737 [01:16<07:51,  3.08it/s]

no exact fullname match for Martin Perrine vs ['perrine saint martin', 'saint martin perrine']
no exact fullname match for Barthélémy Philippe vs ['charles philippe barthelemy', 'barthelemy charles philippe']


 17%|█▋        | 287/1737 [01:18<12:01,  2.01it/s]

no exact fullname match for Gomez Elena vs ['elena lopez gomez', 'lopez gomez elena']
no exact fullname match for Gomez Elena vs ['elena gomez ugarte', 'gomez ugarte elena']
no exact fullname match for Gomez Elena vs ['elena gomez perez', 'gomez perez elena']
no exact fullname match for Gomez Elena vs ['elena merino gomez', 'merino gomez elena']
no exact fullname match for Gomez Elena vs ['elena vasquez gomez', 'vasquez gomez elena']
no exact fullname match for Gomez Elena vs ['elena gomez diaz', 'gomez diaz elena']
no exact fullname match for Gomez Elena vs ['maria elena gomez moreno', 'gomez moreno maria elena']
no exact fullname match for Gomez Elena vs ['maria elena gomez hernandez', 'gomez hernandez maria elena']
no exact fullname match for Gomez Elena vs ['elena gomez villalba ballesteros', 'gomez villalba ballesteros elena']
no exact fullname match for Gomez Elena vs ['maria elena gomez miranda', 'gomez miranda maria elena']


 17%|█▋        | 295/1737 [01:20<05:11,  4.63it/s]

no exact fullname match for Etienne Manuel vs ['arnaud manuel etienne', 'etienne arnaud manuel']
no exact fullname match for Etienne Manuel vs ['marie andree manuel etienne', 'manuel etienne marie andree']


 17%|█▋        | 298/1737 [01:21<05:25,  4.42it/s]

no exact fullname match for Etienne Manuel vs ['etienne gougere', 'gougere etienne']
no exact fullname match for Suarez Guillaume vs ['guillaume asuarez', 'asuarez guillaume']


 17%|█▋        | 300/1737 [01:21<05:04,  4.72it/s]

no exact fullname match for Lecomte Catherine vs ['catherine lecomte lapp', 'lecomte lapp catherine']
no exact fullname match for Lecomte Catherine vs ['anne catherine lecomte', 'lecomte anne catherine']


 17%|█▋        | 302/1737 [01:22<07:42,  3.10it/s]

no exact fullname match for Lecomte Catherine vs ['catherine deslandes', 'deslandes catherine']
no exact fullname match for Lecomte Catherine vs ['catherine lecomte pradines', 'lecomte pradines catherine']


 18%|█▊        | 313/1737 [01:24<05:06,  4.65it/s]

no exact fullname match for Tarrade Anne vs ['anne couturier tarrade', 'couturier tarrade anne']


 18%|█▊        | 318/1737 [01:26<08:10,  2.90it/s]

no exact fullname match for Moreau Christophe vs ['jean christophe moreau', 'moreau jean christophe']
no exact fullname match for Moreau Christophe vs ['jean christophe moreau', 'moreau jean christophe']
no exact fullname match for Moreau Christophe vs ['louis mathurin moreau christophe', 'moreau christophe louis mathurin']


 18%|█▊        | 319/1737 [01:27<13:12,  1.79it/s]

no exact fullname match for Moreau Christophe vs ['jean christophe moreau', 'moreau jean christophe']


 19%|█▊        | 322/1737 [01:28<08:59,  2.62it/s]

skipping birth date 0001-01-01T00:00:00


 20%|█▉        | 345/1737 [01:31<03:35,  6.45it/s]

no exact fullname match for Chevalier Nicolas vs ['nicolas r  chevalier', 'chevalier nicolas r']


 20%|█▉        | 346/1737 [01:32<08:51,  2.61it/s]

no exact fullname match for Chevalier Nicolas vs ['nicolas chevalier roch', 'chevalier roch nicolas']


 20%|██        | 356/1737 [01:34<05:30,  4.17it/s]

no exact fullname match for Paris Christophe vs ['jean christophe paris', 'paris jean christophe']
no exact fullname match for Paris Christophe vs ['jean christophe paris', 'paris jean christophe']
no exact fullname match for Paris Christophe vs ['christophe de paris', 'christophe de paris']
no exact fullname match for Paris Christophe vs ['jean christophe coffin', 'coffin jean christophe']


 21%|██        | 357/1737 [01:36<11:46,  1.95it/s]

no exact fullname match for Paris Christophe vs ['christophe greffet', 'greffet christophe']
no exact fullname match for Paris Christophe vs ['christophe de beaumont du repaire', 'beaumont du repaire christophe de']


 21%|██        | 361/1737 [01:37<07:03,  3.25it/s]

no exact fullname match for Petit Fabienne vs ['fabienne arias petit', 'arias petit fabienne']
no exact fullname match for Petit Fabienne vs ['fabienne petit jean', 'petit jean fabienne']


 21%|██        | 369/1737 [01:39<06:10,  3.70it/s]

no exact fullname match for Bories Serge vs ['serge charles bories', 'bories serge charles']


 21%|██▏       | 372/1737 [01:40<06:37,  3.44it/s]

no exact fullname match for Chatel Amélie vs ['marie louise amelie chatel', 'chatel marie louise amelie']


 21%|██▏       | 373/1737 [01:40<06:16,  3.62it/s]

no exact fullname match for Merlin Christophe vs ['antoine merlin', 'merlin antoine']


 22%|██▏       | 374/1737 [01:41<09:18,  2.44it/s]

no exact fullname match for Merlin Christophe vs ['paul christophe elisabeth merlin', 'merlin paul christophe elisabeth']


 22%|██▏       | 380/1737 [01:42<04:34,  4.94it/s]

no exact fullname match for Reynaud Stéphane vs ['stephane denis reynaud', 'reynaud stephane denis']


 22%|██▏       | 384/1737 [01:43<06:08,  3.67it/s]

no exact fullname match for Lecureur Valérie vs ['valerie lecureur rolland', 'lecureur rolland valerie']


 23%|██▎       | 401/1737 [01:46<03:00,  7.42it/s]

no exact fullname match for Arnaud-Cormos Delia vs ['liliana delia arnaud cormos', 'arnaud cormos liliana delia']


 23%|██▎       | 407/1737 [01:47<04:31,  4.90it/s]

no exact fullname match for Bontempi Bruno vs ['bruno bontempi junior', 'bontempi junior bruno']


 24%|██▍       | 417/1737 [01:49<03:10,  6.95it/s]

no exact fullname match for Villanueva Cristina vs ['cristina ferreiro villanueva', 'ferreiro villanueva cristina']
no exact fullname match for Villanueva Cristina vs ['cristina lopez villanueva', 'lopez villanueva cristina']
no exact fullname match for Hicks David vs ['david hicks lillard', 'lillard david hicks']
no exact fullname match for Hicks David vs ['david l  hicks', 'hicks david l']
no exact fullname match for Hicks David vs ['s  david hicks', 'hicks s  david']
no exact fullname match for Hicks David vs ['david w  hicks', 'hicks david w']
no exact fullname match for Hicks David vs ['david m  hicks', 'hicks david m']


 24%|██▍       | 418/1737 [01:50<07:35,  2.89it/s]

no exact fullname match for Hicks David vs ['david trevor hicks', 'hicks david trevor']


 24%|██▍       | 419/1737 [01:51<09:12,  2.39it/s]

no exact fullname match for David Jean-Philippe vs ['jean philippe tricoit', 'tricoit jean philippe']


 25%|██▍       | 434/1737 [01:54<06:12,  3.50it/s]

no exact fullname match for Deniaud Aurélien vs ['aurelien patrick sylvain deniaud', 'deniaud aurelien patrick sylvain']


 25%|██▌       | 435/1737 [01:54<05:52,  3.70it/s]

no exact fullname match for Boyer Laurent vs ['lisette laurent boyer', 'laurent boyer lisette']


 25%|██▌       | 436/1737 [01:56<12:04,  1.80it/s]

skipping death date 1994-01-01T00:00:00


 26%|██▌       | 448/1737 [01:58<03:44,  5.75it/s]

no exact fullname match for Boizet-Bonhoure Brigitte vs ['brigitte boizet', 'boizet brigitte']


 26%|██▌       | 453/1737 [01:59<04:11,  5.10it/s]

no exact fullname match for Dumas Orianne vs ['orianne dumas milne edwards', 'dumas milne edwards orianne']


 27%|██▋       | 466/1737 [02:02<04:14,  5.00it/s]

no exact fullname match for Cuvillier Virginie vs ['virginie cuvillier hot', 'cuvillier hot virginie']


 27%|██▋       | 468/1737 [02:03<09:40,  2.19it/s]

no exact fullname match for Gautier Mathieu vs ['mathieu gauthier', 'gauthier mathieu']
no exact fullname match for Gautier Mathieu vs ['matthieu gaultier', 'gaultier matthieu']


 27%|██▋       | 472/1737 [02:04<05:56,  3.55it/s]

no exact fullname match for Thany Steeve vs ['steeve herve thany', 'thany steeve herve']


 28%|██▊       | 480/1737 [02:05<02:31,  8.28it/s]

no exact fullname match for De Seze René vs ['rene de seze', 'seze rene de']


 28%|██▊       | 493/1737 [02:06<02:20,  8.83it/s]

no exact fullname match for Jean Didier vs ['didier jean nenert', 'nenert didier jean']
no exact fullname match for Jean Didier vs ['jean didier blanchet', 'blanchet jean didier']
no exact fullname match for Jean Didier vs ['jean didier pinguet', 'pinguet jean didier']
no exact fullname match for Jean Didier vs ['jean didier bagot', 'bagot jean didier']
no exact fullname match for Jean Didier vs ['jean didier chastelain', 'chastelain jean didier']
no exact fullname match for Jean Didier vs ['jean didier boukongou', 'boukongou jean didier']


 28%|██▊       | 495/1737 [02:08<06:33,  3.16it/s]

no exact fullname match for Jean Didier vs ['jean sauveur didier', 'didier jean sauveur']


 30%|██▉       | 517/1737 [02:11<04:11,  4.84it/s]

no exact fullname match for Marchi Nicola vs ['nicola de marchi', 'de marchi nicola']


 31%|███       | 532/1737 [02:13<03:42,  5.42it/s]

no exact fullname match for Bellanger Anne vs ['anne wassila bellanger', 'bellanger anne wassila']
no exact fullname match for Bellanger Anne vs ['anne pauline bellanger', 'bellanger anne pauline']


 31%|███       | 533/1737 [02:14<05:34,  3.60it/s]

no exact fullname match for Bellanger Anne vs ['anne sophie bellanger dujardin', 'bellanger dujardin anne sophie']


 31%|███       | 538/1737 [02:15<03:33,  5.61it/s]

no exact fullname match for Clerget-Froidevaux Marie vs ['marie stephanie clerget froidevaux', 'clerget froidevaux marie stephanie']


 31%|███▏      | 547/1737 [02:16<02:39,  7.48it/s]

no exact fullname match for Radoi Loredana vs ['loredana radoi pervilhac', 'radoi pervilhac loredana']


 32%|███▏      | 551/1737 [02:16<02:21,  8.38it/s]

no exact fullname match for Merlin Christophe vs ['antoine merlin', 'merlin antoine']


 32%|███▏      | 552/1737 [02:17<05:36,  3.52it/s]

no exact fullname match for Merlin Christophe vs ['paul christophe elisabeth merlin', 'merlin paul christophe elisabeth']


 32%|███▏      | 553/1737 [02:18<09:20,  2.11it/s]

no exact fullname match for Bonnet Sarah vs ['sarah lesage bonnet', 'lesage bonnet sarah']
no exact fullname match for Bonnet Sarah vs ['sarah ducrot bonnet', 'ducrot bonnet sarah']
no exact fullname match for Bonnet Sarah vs ['sarah ariey bonnet', 'ariey bonnet sarah']


 32%|███▏      | 563/1737 [02:20<03:09,  6.19it/s]

no exact fullname match for Degli Esposti Davide Davide vs ['davide degli esposti', 'degli esposti davide']


 34%|███▍      | 593/1737 [02:23<03:38,  5.24it/s]

no exact fullname match for Galy Edith vs ['edith galy marie', 'galy marie edith']


 34%|███▍      | 597/1737 [02:25<05:35,  3.39it/s]

no exact fullname match for Juillerat Lucienne vs ['lucienne juillerat jeanneret gris', 'juillerat jeanneret gris lucienne']


 35%|███▍      | 601/1737 [02:26<03:46,  5.02it/s]

no exact fullname match for Gratacap-Cavallier Bénédicte vs ['benedicte gratacap', 'gratacap benedicte']


 35%|███▌      | 609/1737 [02:28<04:44,  3.96it/s]

no exact fullname match for SCHNEIDER Dominique vs ['dominique jung schneider', 'jung schneider dominique']


 35%|███▌      | 610/1737 [02:29<09:27,  1.99it/s]

no exact fullname match for HEINRICH Joachim vs ['heinrich joachim jaeck', 'jaeck heinrich joachim']
no exact fullname match for HEINRICH Joachim vs ['heinrich joachim robe', 'robe heinrich joachim']
no exact fullname match for HEINRICH Joachim vs ['joachim heinrich knoll', 'knoll joachim heinrich']
no exact fullname match for HEINRICH Joachim vs ['joachim heinrich von ledebur', 'ledebur joachim heinrich von']
no exact fullname match for HEINRICH Joachim vs ['joachim thies', 'thies joachim']
no exact fullname match for HEINRICH Joachim vs ['joachim brinck', 'brinck joachim']
skipping birth date 1860-01-30T00:00:00


 35%|███▌      | 611/1737 [02:31<13:28,  1.39it/s]

no exact fullname match for HEINRICH Joachim vs ['joachim heinrich schmidt', 'schmidt joachim heinrich']
no exact fullname match for HEINRICH Joachim vs ['joachim heinrich jaeck', 'jaeck joachim heinrich']


 35%|███▌      | 612/1737 [02:31<10:03,  1.86it/s]

no exact fullname match for PINEAU Charles vs ['charles henri pineau', 'pineau charles henri']
skipping birth date 1877-01-01T00:00:00


 36%|███▌      | 623/1737 [02:34<08:24,  2.21it/s]

skipping death date 1628-05-21T00:00:00


 36%|███▌      | 624/1737 [02:35<08:11,  2.26it/s]

no exact fullname match for Bretin Philippe vs ['jean philippe bretin', 'bretin jean philippe']
no exact fullname match for Bretin Philippe vs ['philippe marie bretin', 'bretin philippe marie']


 36%|███▌      | 627/1737 [02:35<06:00,  3.08it/s]

no exact fullname match for Bouvet Elisabeth vs ['elisabeth bouvet cador', 'bouvet cador elisabeth']
no exact fullname match for Guillot Jacques vs ['jacques michel guillot', 'guillot jacques michel']


 36%|███▌      | 628/1737 [02:36<07:38,  2.42it/s]

no exact fullname match for Guillot Jacques vs ['jacques guillot de blancheville', 'guillot de blancheville jacques']


 36%|███▋      | 632/1737 [02:37<05:23,  3.42it/s]

no exact fullname match for Pean Michel vs ['rene michel  pean', 'pean rene michel']
skipping birth date 1913-10-01T00:00:00


 36%|███▋      | 634/1737 [02:38<05:10,  3.55it/s]

no exact fullname match for Pean Michel vs ['jean michel pean', 'pean jean michel']


 37%|███▋      | 643/1737 [02:39<03:01,  6.04it/s]

no exact fullname match for Weber Christiane vs ['christiane weber klein', 'weber klein christiane']


 38%|███▊      | 663/1737 [02:44<03:16,  5.45it/s]

no exact fullname match for Dolez Patricia vs ['patricia i  dolez', 'dolez patricia i']


 39%|███▊      | 669/1737 [02:45<03:39,  4.87it/s]

no exact fullname match for Bernard Cécile vs ['cecile bernard calvet', 'bernard calvet cecile']


 39%|███▊      | 670/1737 [02:46<09:02,  1.97it/s]

no exact fullname match for Bernard Cécile vs ['cecile ollitraut bernard', 'ollitraut bernard cecile']


 39%|███▉      | 679/1737 [02:48<04:09,  4.25it/s]

no exact fullname match for Marquis Nathalie vs ['nathalie le marquis', 'le marquis nathalie']


 39%|███▉      | 682/1737 [02:49<03:45,  4.67it/s]

no exact fullname match for Andre Véronique vs ['veronique andre bochaton', 'andre bochaton veronique']
no exact fullname match for Andre Véronique vs ['veronique andre elisabeth', 'andre elisabeth veronique']
no exact fullname match for Andre Véronique vs ['veronique andre durupt', 'andre durupt veronique']


 39%|███▉      | 684/1737 [02:50<07:10,  2.45it/s]

no exact fullname match for Bousquet Philippe vs ['philippe jean bousquet', 'bousquet philippe jean']


 40%|███▉      | 688/1737 [02:52<05:28,  3.19it/s]

no exact fullname match for Durand Philippe vs ['claude philippe durand', 'durand claude philippe']


 40%|███▉      | 690/1737 [02:53<08:07,  2.15it/s]

no exact fullname match for Garrigou Alain vs ['alain garrigou torchy', 'garrigou torchy alain']


 40%|████      | 696/1737 [02:54<03:38,  4.75it/s]

no exact fullname match for Riviere Emmanuel vs ['marie emmanuel riviere', 'riviere marie emmanuel']


 40%|████      | 703/1737 [02:56<03:06,  5.53it/s]

skipping birth date 0001-01-01T00:00:00
no exact fullname match for Henry Emmanuel vs ['henry emmanuel delbousquet', 'emmanuel delbousquet henry']
no exact fullname match for Henry Emmanuel vs ['henry emmanuel weiss', 'weiss henry emmanuel']
no exact fullname match for Henry Emmanuel vs ['henry emmanuel thyebault', 'thyebault henry emmanuel']
no exact fullname match for Henry Emmanuel vs ['emmanuel ossian henry', 'henry emmanuel ossian']
no exact fullname match for Henry Emmanuel vs ['henry emmanuel marie aubin', 'aubin henry emmanuel marie']


 41%|████      | 704/1737 [02:57<07:13,  2.38it/s]

no exact fullname match for Henry Emmanuel vs ['daniel joseph henry emmanuel aubry', 'aubry daniel joseph henry emmanuel']


 41%|████      | 715/1737 [02:59<03:08,  5.42it/s]

skipping birth date 1910-05-21T00:00:00
no exact fullname match for Martin Michèle vs ['michele guignard martin', 'guignard martin michele']


 42%|████▏     | 725/1737 [03:02<03:26,  4.91it/s]

no exact fullname match for De Barbeyrac Bertille vs ['bertille de barbeyrac', 'barbeyrac bertille de']


 42%|████▏     | 729/1737 [03:03<03:25,  4.92it/s]

no exact fullname match for Roques Christine vs ['daniele roques georget', 'roques georget daniele']


 42%|████▏     | 736/1737 [03:03<02:37,  6.34it/s]

no exact fullname match for Forget-Leray Joëlle vs ['joelle leray forget', 'leray forget joelle']
no exact fullname match for Lacour Stéphanie vs ['stephanie p  lacour', 'lacour stephanie p']


 43%|████▎     | 740/1737 [03:05<03:42,  4.47it/s]

no exact fullname match for Bloch Isabelle vs ['isabelle baladier bloch', 'baladier bloch isabelle']
no exact fullname match for Bloch Isabelle vs ['isabelle sebban', 'sebban isabelle']
no exact fullname match for Bloch Isabelle vs ['chantal cohen bacri', 'cohen bacri chantal']


 43%|████▎     | 747/1737 [03:06<04:02,  4.08it/s]

no exact fullname match for Krebs Tobias vs ['johann tobias krebs', 'krebs johann tobias']


 43%|████▎     | 749/1737 [03:07<04:28,  3.68it/s]

no exact fullname match for Garrigou Alain vs ['alain garrigou torchy', 'garrigou torchy alain']


 43%|████▎     | 754/1737 [03:08<03:41,  4.43it/s]

no exact fullname match for Bouchez Agnès vs ['marie agnes bouchez', 'bouchez marie agnes']


 44%|████▎     | 758/1737 [03:09<02:34,  6.35it/s]

no exact fullname match for De Sèze René vs ['rene de seze', 'seze rene de']


 44%|████▍     | 762/1737 [03:09<02:50,  5.73it/s]

no exact fullname match for Pierre Fabrice vs ['pierre fabrice lopez', 'lopez pierre fabrice']
no exact fullname match for Pierre Fabrice vs ['fabrice pierre koebelin', 'koebelin fabrice pierre']
no exact fullname match for Pierre Fabrice vs ['fabrice woittequand', 'woittequand fabrice']
no exact fullname match for Pierre Fabrice vs ['fabrice detrez', 'detrez fabrice']
no exact fullname match for Pierre Fabrice vs ['fabrice pinard saint pierre', 'pinard saint pierre fabrice']


 44%|████▍     | 763/1737 [03:10<06:43,  2.41it/s]

no exact fullname match for Pierre Fabrice vs ['fabrice pierre sabatier', 'sabatier fabrice pierre']


 45%|████▍     | 773/1737 [03:12<03:02,  5.28it/s]

no exact fullname match for Leveque Philippe vs ['jean philippe leveque', 'leveque jean philippe']


 45%|████▍     | 775/1737 [03:14<05:38,  2.84it/s]

no exact fullname match for Leveque Philippe vs ['philippe berenger leveque', 'berenger leveque philippe']


 45%|████▍     | 777/1737 [03:14<04:20,  3.68it/s]

no exact fullname match for Bourchis Déborah vs ['deborah bourc his', 'bourc his deborah']
no exact fullname match for Laborie Anne vs ['anne laure laborie', 'laborie anne laure']


 45%|████▌     | 782/1737 [03:15<04:44,  3.35it/s]

no exact fullname match for Jay Thérese vs ['therese jay nowaczyk', 'jay nowaczyk therese']
no exact fullname match for Jay Thérese vs ['marie therese jay', 'jay marie therese']


 45%|████▌     | 785/1737 [03:16<02:44,  5.78it/s]

no exact fullname match for De Sèze René vs ['rene de seze', 'seze rene de']
no exact fullname match for De Sèze René vs ['rene de seze', 'seze rene de']


 45%|████▌     | 787/1737 [03:16<02:58,  5.32it/s]

no exact fullname match for David Jean Philippe vs ['jean philippe tricoit', 'tricoit jean philippe']
no exact fullname match for David Jean Philippe vs ['jean cheri david philippe caubet', 'caubet jean cheri david philippe']


 45%|████▌     | 790/1737 [03:17<04:26,  3.56it/s]

no exact fullname match for Villena Isabelle vs ['isabelle bodart', 'bodart isabelle']
no exact fullname match for Villena Isabelle vs ['isabelle coste villena', 'coste villena isabelle']
no exact fullname match for Comte Gilles vs ['gilles francois graimberg de belleau', 'graimberg de belleau gilles francois']
no exact fullname match for Comte Gilles vs ['louis gilles de maupeou d ableiges', 'maupeou d ableiges louis gilles de']


 46%|████▌     | 791/1737 [03:18<05:37,  2.81it/s]

no exact fullname match for Comte Gilles vs ['gilles charles porcher de lissonay', 'porcher de lissonay gilles charles']


 46%|████▌     | 793/1737 [03:18<04:04,  3.86it/s]

no exact fullname match for Boutet Elisa vs ['elisa boutet robinet', 'boutet robinet elisa']


 46%|████▌     | 796/1737 [03:19<04:20,  3.61it/s]

no exact fullname match for Leger Damien vs ['clement leger', 'leger clement']


 46%|████▋     | 805/1737 [03:21<03:10,  4.89it/s]

Error in getting notice 204604125


 47%|████▋     | 821/1737 [03:22<01:30, 10.12it/s]

Error in getting notice 199264295


 49%|████▉     | 848/1737 [03:29<05:46,  2.57it/s]

no exact fullname match for Dumont Laurent vs ['aristide dumont', 'dumont aristide']
no exact fullname match for Dumont Laurent vs ['laurent angliviel de la beaumelle', 'la beaumelle laurent angliviel de']


 49%|████▉     | 852/1737 [03:29<02:50,  5.18it/s]

no exact fullname match for Sanz Guenhaël vs ['guenhael sanz', 'sanz  guenhael']


 49%|████▉     | 854/1737 [03:29<02:31,  5.83it/s]

no exact fullname match for Moret Leila vs ['leila moret majoube', 'moret majoube leila']


 50%|████▉     | 866/1737 [03:32<03:21,  4.32it/s]

no exact fullname match for Chanel Olivier vs ['olivier de crouy chanel', 'crouy chanel olivier de']


 51%|█████     | 879/1737 [03:33<01:43,  8.33it/s]

no exact fullname match for Walter Scott vs ['walter scott peterson', 'peterson walter scott']
no exact fullname match for Walter Scott vs ['walter giorgio scott', 'scott walter giorgio']
no exact fullname match for Walter Scott vs ['walter g  scott', 'scott walter g']
no exact fullname match for Walter Scott vs ['walter scott polland', 'polland walter scott']
no exact fullname match for Walter Scott vs ['walter n  scott', 'scott walter n']


 51%|█████     | 880/1737 [03:34<03:56,  3.63it/s]

no exact fullname match for Walter Scott vs ['walter scott huxford', 'huxford walter scott']
no exact fullname match for Walter Scott vs ['walter scott dalgleish', 'dalgleish walter scott']


 51%|█████     | 888/1737 [03:36<02:31,  5.60it/s]

no exact fullname match for Petit Fabienne vs ['fabienne arias petit', 'arias petit fabienne']
no exact fullname match for Petit Fabienne vs ['fabienne petit jean', 'petit jean fabienne']


 51%|█████▏    | 894/1737 [03:37<02:30,  5.58it/s]

no exact fullname match for Léké Lokombe André vs ['andre leke', 'leke andre']


 52%|█████▏    | 905/1737 [03:39<02:27,  5.64it/s]

no exact fullname match for Ciocca Dominique vs ['dominique sage ciocca', 'sage ciocca dominique']


 52%|█████▏    | 906/1737 [03:39<02:51,  4.85it/s]

skipping death date 1561-09-05T00:00:00


 53%|█████▎    | 917/1737 [03:41<02:34,  5.30it/s]

no exact fullname match for Boulard Yves vs ['pierre yves boulard', 'boulard pierre yves']


 53%|█████▎    | 922/1737 [03:42<03:07,  4.34it/s]

no exact fullname match for Haddad Sami vs ['fuad sami haddad', 'haddad fuad sami']


 53%|█████▎    | 927/1737 [03:43<02:58,  4.53it/s]

no exact fullname match for Le Moual Nicole vs ['nicole lincot le moual', 'lincot le moual nicole']


 54%|█████▎    | 932/1737 [03:44<01:51,  7.25it/s]

no exact fullname match for Lévêque Philippe vs ['jean philippe leveque', 'leveque jean philippe']


 54%|█████▍    | 935/1737 [03:45<03:24,  3.92it/s]

no exact fullname match for Lévêque Philippe vs ['philippe berenger leveque', 'berenger leveque philippe']
no exact fullname match for Maître Anne vs ['anne le maitre', 'le maitre anne']
no exact fullname match for Maître Anne vs ['anne le maitre', 'le maitre anne']
no exact fullname match for Maître Anne vs ['marie anne prioux maitre', 'prioux maitre marie anne']
no exact fullname match for Maître Anne vs ['anne marie peau', 'peau anne marie']
no exact fullname match for Maître Anne vs ['anne laure bourdel', 'bourdel anne laure']
no exact fullname match for Maître Anne vs ['anne waugh', 'waugh anne']
no exact fullname match for Maître Anne vs ['anne claire prince', 'prince anne claire']


 54%|█████▍    | 938/1737 [03:47<04:20,  3.07it/s]

no exact fullname match for Maître Anne vs ['anne sophie ledonne', 'ledonne anne sophie']


 54%|█████▍    | 942/1737 [03:47<02:18,  5.74it/s]

no exact fullname match for Sunyer Jordi vs ['jordi sunyer monfort', 'sunyer monfort jordi']


 55%|█████▍    | 948/1737 [03:48<02:41,  4.90it/s]

no exact fullname match for Chamel Véronique vs ['veronique mossuz', 'mossuz veronique']


 55%|█████▍    | 951/1737 [03:49<02:28,  5.31it/s]

no exact fullname match for Dumas Orianne vs ['orianne dumas milne edwards', 'dumas milne edwards orianne']


 55%|█████▍    | 952/1737 [03:49<02:34,  5.08it/s]

no exact fullname match for Santos Miguel vs ['miguel angel santos', 'santos miguel angel']
no exact fullname match for Santos Miguel vs ['miguel dias santos', 'santos miguel dias']
no exact fullname match for Santos Miguel vs ['miguel santos neves', 'neves miguel santos']
no exact fullname match for Santos Miguel vs ['miguel de los santos', 'santos miguel de los']
no exact fullname match for Santos Miguel vs ['miguel vidal santos', 'vidal santos miguel']
no exact fullname match for Santos Miguel vs ['amandio miguel dos santos', 'santos amandio miguel dos']
no exact fullname match for Santos Miguel vs ['miguel calvo santos', 'calvo santos miguel']


 55%|█████▌    | 957/1737 [03:51<04:24,  2.95it/s]

no exact fullname match for Hubert Gilles vs ['gilles denoix', 'denoix gilles']
no exact fullname match for Hubert Gilles vs ['albert clement gilles hubert de scoville', 'scoville albert clement gilles hubert de']


 56%|█████▌    | 968/1737 [03:54<02:44,  4.69it/s]

no exact fullname match for Maesano Isabella Annesi vs ['isabella annesi maesano', 'annesi maesano isabella']
no exact fullname match for Leveque Philippe vs ['jean philippe leveque', 'leveque jean philippe']


 56%|█████▌    | 971/1737 [03:55<03:48,  3.35it/s]

no exact fullname match for Leveque Philippe vs ['philippe berenger leveque', 'berenger leveque philippe']


 57%|█████▋    | 990/1737 [03:58<01:29,  8.37it/s]

Error in getting notice 236950320


 57%|█████▋    | 991/1737 [03:59<03:01,  4.12it/s]

no exact fullname match for Humbert Bernard vs ['bernard gaspard', 'gaspard bernard']
no exact fullname match for Humbert Bernard vs ['colette humbert', 'humbert colette']


 57%|█████▋    | 992/1737 [03:59<02:53,  4.30it/s]

: 

: 

In [None]:
df_partenaires.columns

In [None]:
if sources[source]['id_ORCID'] in list(df_partenaires.columns):
    df_partenaires=pd.read_json(f"./DATA/{source}/df_partenaires_id_personne.json")
    df_partenaires['idref_ORCID']=df_partenaires.progress_apply(lambda row: orcid_to_idref(row,cached_data_orcid,sources[source]['id_ORCID'],Authorization_ORCID), axis=1)
    write_cache(cached_data_orcid,f"./DATA/{source}/caches/cached_data_orcid.pkl")
    df_partenaires.to_excel(f"./DATA/{source}/df_partenaires_id_personne_ORCID.xlsx")
    df_partenaires.to_json(f"./DATA/{source}/df_partenaires_id_personne_ORCID.json")

# ENVOI DES PROJETS SUR SCANR

In [30]:
if len(sources[source]['identifiants_preferes_personne'])==2:
    df_partenaires=pd.read_json(f"./DATA/{source}/df_partenaires_id_personne_ORCID.json")
elif len(sources[source]['identifiants_preferes_personne'])==1:
    df_partenaires=pd.read_json(f"./DATA/{source}/df_partenaires_id_personne.json")
else:
    df_partenaires=pd.read_json(f"./DATA/{source}/df_partenaires_id_structures.json")

In [65]:
df_partenaires.loc[df_partenaires.id_structure.apply(lambda x :isinstance(x,list)),'id_structure']=df_partenaires.loc[df_partenaires.id_structure.apply(lambda x :isinstance(x,list)),'id_structure'].apply(lambda y: y[0])

In [None]:
if len([x for x in ['nom', 'prenom'] if x in list(sources[source].keys())])==2:
    df_partenaires['id_person']=df_partenaires.apply(lambda row: identifiant_prefere(row,sources[source]['identifiants_preferes_personne']), axis=1)
    df_partenaires['persons']=df_partenaires.progress_apply(lambda row: persons(row,sources[source]['prenom'],sources[source]['nom']) ,axis=1)
else:
    df_partenaires['persons']=np.nan

In [70]:
if source != 'SIRANO':
    df_partenaires=df_partenaires.groupby([sources[source]['code_projet']]).agg({'persons': lambda x: [ y for y in x.tolist() if pd.isna(y)==False]}, dropna=False).reset_index()
else:
    df_projets=df_partenaires.groupby([sources[source]['code_projet'], sources[source]['annee'], sources[source]['acronyme'],sources[source]['titre'],sources[source]['budget']], dropna=False).agg({'persons': lambda x: [ y for y in x.tolist() if pd.isna(y)==False]}, dropna=False)

In [25]:
# amener les projets depuis le site 
if source=='ANR':
    page_projets_10 = requests.get(sources[source]['url_projets']).json()
    colonnes_projets_10 = page_projets_10['columns']
    donnees_projets_10 = page_projets_10['data']
    df_projets=pd.DataFrame(data=donnees_projets_10,columns=colonnes_projets_10)
elif source=='IRESP':
    df_projets1=pd.read_csv(sources[source]['url_projets1'] ,sep=";", encoding='UTF-8')
    df_projets2=pd.read_csv(sources[source]['url_projets2'] ,sep=";", encoding='UTF-8')
    df_projets=pd.concat([df_projets1,df_projets2])
    df_projets.loc[pd.isna(df_projets['Titre_du_projet_FR']),'Titre_du_projet_FR']=df_projets.loc[pd.isna(df_projets['Titre_du_projet_FR']),'Titre_du_projet']
elif source!='SIRANO':
    df_projets=pd.read_csv(sources[source]['url_projets'] ,sep=";", encoding='ISO-8859-1')

df_projets=df_projets.reset_index()
del df_projets['index']

In [26]:
df_projets.columns

Index(['code convention initiale', 'code convention homogénéisé', 'Programme',
       'Année', 'Acronyme', 'Titre FR', 'Titre EN', 'Date de début',
       'Subvention ', 'Résumé FR', 'Résumé EN'],
      dtype='object')

In [None]:
if source!='SIRANO':
    df_projets=pd.merge(df_projets,df_partenaires,on=sources[source]['code_projet'], how='left')
else :
    df_projets['id']=df_partenaires.apply(lambda row: f"{row[sources[source]['code_projet']]}-{row[sources[source]['annee']]}-{row[sources[source]['acronyme']]}" , axis=1)
    del df_projets['code_projet']
    sources[source]['code_projet']='id'

df_projets['type']=source
df_projets['name']=df_projets.progress_apply(lambda row: projects(row,sources[source]['titre_fr'],sources[source]['titre_en']) ,axis=1)
df_projets['description']=df_projets.progress_apply(lambda row: projects(row,sources[source]['resume_fr'],sources[source]['resume_en']) ,axis=1)
df_projets.loc[:,sources[source]['budget']]=df_projets.loc[:,sources[source]['budget']].apply(lambda x : float(str(x).replace('.0','').replace('.00','').replace(' ','').replace(',','.').replace('€','')))
df_projets=df_projets.rename(columns={sources[source]['annee']:'year',sources[source]['acronyme']:'acronym',
                                      sources[source]['budget']:'budget_financed',sources[source]['code_projet']:'id'})
df_projets=df_projets[['id','type','name','description','acronym','year','budget_financed','persons']]

In [None]:
df_projets

In [None]:
dict_row=df_projets.iloc[0,:].to_dict()
dict_row2={k:v for k,v in list(dict_row.items()) if ((str(v)!='nan')&(str(v)!='NaN')&(str(v)!='None')&(str(v)!='x')&(str(v)!='[]'))}
dict_row2

In [None]:
#envoi
err=[]
for i,row in df_projets.iterrows():
    dict_row=row.to_dict()
    dict_row2={k:v for k,v in list(dict_row.items()) if ((str(v)!='nan')&(str(v)!='NaN')&(str(v)!='None')&(str(v)!='x')&(str(v)!='[]'))}
    try:
       r=requests.post('http://185.161.45.213/projects/projects', json = dict_row2, headers={"Authorization":Authorization})
       res= r.json()
       if res.get('status')=='ERR':
           err.append(res)
           if res.get('error').get('code')!=422:
               print(err)
               pp.pprint(err)
    except Exception as e:
        pp.pprint(e)

In [None]:
pd.Series([x.get('issues').get('id') for x in err]).drop_duplicates().tolist() 

pour mettre à jour

In [None]:
nbr_page=int(requests.get('http://185.161.45.213/projects/projects?where={"type":"ANR"}&projection={"id":1}&max_results=500&page=1', headers={"Authorization":Authorization}).json()['hrefs']['last']['href'].split('page=')[1])

list_ids=[]
for i in range(1,nbr_page+1):
    print("page",i)
    page=requests.get('http://185.161.45.213/projects/projects?where={"type":"ANR"}&projection={"id":1}&max_results=500'+f"&page={i}", headers={"Authorization":Authorization}).json()
    for k in range(len(page['data'])):
        print("k",k)
        list_ids.append(page['data'][k]['id'])
    
projets_a_ajouter=[x for x in list(df_projets['id']) if x not in list_ids]

projets_a_retirer=[x for x in list_ids if x not in list(df_projets['id'])]

df_projets = df_projets[df_projets['id'].apply(lambda x: x in projets_a_ajouter)]

In [None]:
dict_row=df_projets.iloc[0,:].to_dict()
dict_row2={k:v for k,v in list(dict_row.items()) if ((str(v)!='nan')&(str(v)!='NaN')&(str(v)!='None')&(str(v)!='x')&(str(v)!='[]'))}
dict_row2

In [81]:
err=[]
for i,row in df_projets.iterrows():
    dict_row=row.to_dict()
    dict_row2={k:v for k,v in list(dict_row.items()) if ((str(v)!='nan')&(str(v)!='NaN')&(str(v)!='None')&(str(v)!='x')&(str(v)!='[]'))}
    try:
       r=requests.post('http://185.161.45.213/projects/projects', json = dict_row2, headers={"Authorization":Authorization})
       res= r.json()
       if res.get('status')=='ERR':
           err.append(res)
           if res.get('error').get('code')!=422:
               print(err)
               pp.pprint(err)
    except Exception as e:
        pp.pprint(e)

In [None]:
len(projets_a_ajouter)

Modifications

In [None]:
nbr_page=int(requests.get('http://185.161.45.213/projects/projects?where={"type":"ANR"}&projection={"id":1,"year":1}&max_results=500'+f"&page={1}", headers={"Authorization":Authorization}).json()['hrefs']['last']['href'].split('page=')[1])
nbr_page

In [None]:
list_ids=[]
for i in range(1,nbr_page+1):
    print("page",i)
    page=requests.get('http://185.161.45.213/projects/projects?where={%22type%22:%22ANR%22}&projection={%22id%22:1,%22year%22:1}&max_results=500'+f"&page={i}", headers={"Authorization":Authorization}).json()
    for k in range(len(page['data'])):
        list_ids.append(page['data'][k]['year'])

In [None]:
max(list_ids)

In [None]:
df_projets = df_projets[df_projets['id'].apply(lambda x: x in list_ids)]

In [None]:
"""
en principe, il faut que je mette à jour tous les projets 
qui ne sont pas clos avec les titres résumés et persons, mais 
peut etre qu'on peut mettre à jour automatiquement les projets
des 3 dernières années ? 

"""

err=[]
for id in df_projets.iterrows() :
    url = f"http://185.161.45.213/projects/projects/{id}"
    project=requests.get(url, headers={'Authorization': Authorization}).json()
    head = {"Authorization": Authorization, "If-Match": project['etag'], "Content-Type": "application/json"}

    r = requests.patch(url, json = {"year": 2023}, headers=head)
    res= r.json()
    if res.get('status')=='ERR':
        err.append(res)
        if res.get('error').get('code')!=422:
            print(err)
            pp.pprint(err)


# ENVOI DES PARTENAIRES SUR SCANR

In [111]:
df_partenaires=pd.read_json(f"./DATA/{source}/df_partenaires_id_structures.json")

In [None]:
df_partenaires.columns

In [113]:
### ATTENTION, vérifier que les projets sirano sont dans des structures françaises
if source=='IRESP':
    df_partenaires[sources[source]['pays']]=df_partenaires.loc[:,sources[source]['ville']].apply(lambda x: x.split('(')[1].replace(')','') if x.find('(')>=0 else 'France')
    df_partenaires.loc[:,sources[source]['ville']]=df_partenaires.loc[:,sources[source]['ville']].apply(lambda x: x.split('(')[0] if x.find('(')>=0 else x)
  
df_partenaires['address']=df_partenaires.apply(lambda row: address(row,sources[source]['pays'],sources[source]['ville'],source), axis=1)
df_partenaires.loc[:,'id_structure']=df_partenaires.loc[:,'id_structure'].apply(lambda x: x[0] if isinstance(x,list) else x )

In [None]:
if source in ['ANSES','SIRANO']:
    df_partenaires['id']=df_partenaires.apply(lambda row: f"{row[sources[source]['code_projet']]}-{row[{sources[source]['nom_structure']}+'2']}-{row[sources[source]['nom']]}-{row[sources[source]['prenom']]}" , axis=1)
if source =='REG_IDF':
    df_partenaires['id']=df_partenaires.apply(lambda row: f"{row[sources[source]['code_projet']]}-{row[str(sources[source]['nom_structure'])+'2']}-{row['entite_role']}" , axis=1)
df_partenaires['address']=df_partenaires.apply(lambda row: address(row,sources[source]['pays'],sources[source]['ville'],source), axis=1)
df_partenaires=df_partenaires.rename(columns={sources[source]['nom_structure']: 'name', sources[source]['code_projet']: 'project_id', 'id_structure':'participant_id','Projet.Partenaire.Code_Decision_ANR':'id'})
df_partenaires=df_partenaires[['name','id','project_id','participant_id','address']]
df_partenaires['project_type']=source
df_partenaires['participant_id']=df_partenaires.loc[:,'participant_id'].apply(lambda x: str(x[0]).replace('.0','') if isinstance(x,list) else str(x).split(';')[0].replace('.0',''))
df_partenaires=df_partenaires[['id','project_id', 'project_type', 'participant_id', 'name','address']]
df_partenaires['name'] = df_partenaires['name'].astype(str)
df_partenaires

In [None]:
df_partenaires[df_partenaires.duplicated(subset=['id'])]

In [None]:
dict_row=df_partenaires.iloc[1,:].to_dict()
dict_row2={k:v for k,v in list(dict_row.items()) if ((str(v)!='nan')&(str(v)!='NaN')&(str(v)!='None')&(str(v)!='x'))}
dict_row2

In [None]:
len(df_partenaires)

In [None]:
err=[]
for i,row in df_partenaires.iterrows():
    dict_row=row.to_dict()
    dict_row2={k:v for k,v in list(dict_row.items()) if ((str(v)!='nan')&(str(v)!='NaN')&(str(v)!='None')&(str(v)!='x'))}
    try:
       r=requests.post('http://185.161.45.213/projects/participations', json = dict_row2, headers={"Authorization":Authorization})
       res= r.json()
       if res.get('status')=='ERR':
           print(i)
           err.append(res)
           if res.get('error').get('code')!=422:
               print(err)
               pp.pprint(err)
    except Exception as e:
        pp.pprint(e)

Mise à jour

In [None]:
nbr_page=int(requests.get('http://185.161.45.213/projects/participations?where={"project_type":"ANR"}&projection={"id":1}&max_results=500&page=1', headers={"Authorization":Authorization}).json()['hrefs']['last']['href'].split('page=')[1])

list_ids=[]
for i in range(1,nbr_page+1):
    print("page",i)
    page=requests.get('http://185.161.45.213/projects/participations?where={"project_type":"ANR"}&projection={"id":1}&max_results=500'+f"&page={i}", headers={"Authorization":Authorization}).json()
    for k in range(len(page['data'])):
        print("k",k)
        list_ids.append(page['data'][k]['id'])
    
projets_a_ajouter=[x for x in list(df_partenaires['id'].drop_duplicates()) if x not in list(pd.Series(list_ids).drop_duplicates())]

projets_a_retirer=[x for x in list_ids if x not in list(df_partenaires['id'])]

df_partenaires = df_partenaires[df_partenaires['id'].apply(lambda x: x in projets_a_ajouter)]


In [None]:
len(projets_a_ajouter)

In [108]:
err=[]
for i,row in df_partenaires.iterrows():
    dict_row=row.to_dict()
    dict_row2={k:v for k,v in list(dict_row.items()) if ((str(v)!='nan')&(str(v)!='NaN')&(str(v)!='None')&(str(v)!='x'))}
    try:
       r=requests.post('http://185.161.45.213/projects/participations', json = dict_row2, headers={"Authorization":Authorization})
       res= r.json()
       if res.get('status')=='ERR':
           print(i)
           err.append(res)
           if res.get('error').get('code')!=422:
               print(err)
               pp.pprint(err)
    except Exception as e:
        pp.pprint(e)

Modifications

In [None]:
nbr_page=int(requests.get('http://185.161.45.213/projects/participations?where={"project_type":"ANR","participant_id":{"$exists":false}}&projection={"id":1}&max_results=500'+f"&page={1}", headers={"Authorization":Authorization}).json()['hrefs']['last']['href'].split('page=')[1])

list_ids=[]
for i in range(1,nbr_page+1):
    print("page",i)
    page=requests.get('http://185.161.45.213/projects/participations?where={"project_type":"ANR","participant_id":{"$exists":false}}&projection={"id":1}&max_results=500'+f"&page={i}", headers={"Authorization":Authorization}).json()
    for k in range(len(page['data'])):
        print("k",k)
        list_ids.append(page['data'][k]['id'])

df_partenaires = df_partenaires[df_partenaires['id'].apply(lambda x: x in list_ids)]

In [None]:
len(list_ids)

In [127]:
df_partenaires=df_partenaires[(df_partenaires.participant_id!='x') & (df_partenaires.participant_id!='None')]

In [None]:
df_partenaires

In [None]:
err=[]
for i,row in df_partenaires.iterrows() :
    print(i)
    id=row['id']
    url = f"http://185.161.45.213/projects/participations/{id}"
    project=requests.get(url, headers={'Authorization': Authorization}).json()
    head = {"Authorization": Authorization, "If-Match": project['etag'], "Content-Type": "application/json"}

    r = requests.patch(url, json = {"participant_id": row['participant_id']}, headers=head)
    res= r.json()
    if res.get('status')=='ERR':
        err.append(res)
        if res.get('error').get('code')!=422:
            print(err)
            pp.pprint(err)

In [None]:
pd.Series([x.get('issues').get('id')[25:] for x in err]).drop_duplicates().tolist()

Faire un nouveau fichier avec toutes les structures manquantes

In [11]:
struct_manq=pd.read_excel('scanr_partenaires_non_identifies.xlsx')

  warn(msg)


In [19]:
nom1=struct_manq[struct_manq.New=='IRESP'].drop_duplicates(subset='Nom').Nom

In [15]:
iresp_actuel=pd.read_excel('./missing_ids_structures/partenaires_non_identifies_IRESP.xlsx')

In [20]:
nom2=iresp_actuel['Nom_equipe']

In [21]:
pd.Series(list(nom1)+list(nom2))

0      Association SPP-IR (Soins Primaires Pluriprofe...
1                                Campus LyonTech la DOUA
2            Centre d’AddictoVigilance de Paris (CEIP-A)
3        CIUS - Centre d'innovation et d'Usages en Sante
4                     Ecole santé social Sud-Est (ESSSE)
                             ...                        
486    AP-HP, Hôpital Saint Louis, Office de Recherch...
487         Inserm ERI-20 Nutrition, hormones et cancers
488                                          Inserm U909
489                                             GIS ELFE
490    LABORATOIRE D'ÉCONOMIE ET DE GESTION DES ORGAN...
Length: 491, dtype: object

In [22]:
pd.Series(list(nom1)+list(nom2)).drop_duplicates()

0      Association SPP-IR (Soins Primaires Pluriprofe...
1                                Campus LyonTech la DOUA
2            Centre d’AddictoVigilance de Paris (CEIP-A)
3        CIUS - Centre d'innovation et d'Usages en Sante
4                     Ecole santé social Sud-Est (ESSSE)
                             ...                        
486    AP-HP, Hôpital Saint Louis, Office de Recherch...
487         Inserm ERI-20 Nutrition, hormones et cancers
488                                          Inserm U909
489                                             GIS ELFE
490    LABORATOIRE D'ÉCONOMIE ET DE GESTION DES ORGAN...
Length: 491, dtype: object