### Notebook to convert json to csv
The aim of this notebook is to convert json files into csv, to then extract values and save to another csv for import into Oracle chatbot platform.

In [310]:
# imports and read files
import pandas as pd
CE_candidats_src = pd.read_json('../CE/datas/2351/CE_infosCandidat.json')
GC_candidats_src = pd.read_json('../GC/datas/2353/GC_infosCandidat.json')

In [311]:
# check CE read in correctly
CE_candidats_src.head(3)

Unnamed: 0,fonction,id,liste,nom,photoUrl,sexe
0,Conseiller-ère d'Etat,20001,"{'id': 203, 'noDepot': 3}",WENGER Saliha (Salika),WENGER_Saliha.jpg,1
1,Conseiller-ère d'Etat,20002,"{'id': 203, 'noDepot': 3}",HALLER Jocelyne,HALLER_Jocelyne.jpg,1
2,Conseiller-ère d'Etat,20003,"{'id': 201, 'noDepot': 1}",EMERY-TORRACINTA Anne,EMERY-TORRACINTA_Anne.jpg,1


In [312]:
# check GC read in correctly
GC_candidats_src.head(3)

Unnamed: 0,fonction,id,liste,nom,photoUrl,sexe
0,Député-e,10001,"{'id': 101, 'noDepot': 1}",PAGANI Rémy,PAGANI_Remy.jpg,0
1,Député-e,10002,"{'id': 101, 'noDepot': 1}",WENGER Saliha (Salika),WENGER_Saliha.jpg,1
2,Député-e,10003,"{'id': 101, 'noDepot': 1}",ZAUGG Christian,ZAUGG_Christian.jpg,0


In [313]:
# edit dataframe to extract required info
# Entities format: entity,value,synonyms
df_CE = CE_candidats_src.drop(['id', 'liste', 'photoUrl','sexe'], axis=1)
df_GC = GC_candidats_src.drop(['id', 'liste', 'photoUrl','sexe'], axis=1)

In [314]:
# add default synonyms column
df_CE['synonyms'] = df_CE['nom']
df_GC['synonyms'] = df_GC['nom']

In [315]:
# rename columns
df_CE.rename(index=str, columns={"fonction": "entity", "nom": "value"}, inplace=True)
df_GC.rename(index=str, columns={"fonction": "entity", "nom": "value"}, inplace=True)

In [316]:
# all to lowercase, remove special chars, remove 'dit:' and replace space by colon
## FOR CE
df_CE.synonyms = df_CE.synonyms.str.lower();
df_CE.synonyms = df_CE.synonyms.str.replace(' ',':');
df_CE.synonyms = df_CE.synonyms.str.replace('(','');
df_CE.synonyms = df_CE.synonyms.str.replace(')','');
df_CE.synonyms = df_CE.synonyms.str.replace('dit:','');
## FOR GC
df_GC.synonyms = df_GC.synonyms.str.lower();
df_GC.synonyms = df_GC.synonyms.str.replace(' ',':');
df_GC.synonyms = df_GC.synonyms.str.replace('(','');
df_GC.synonyms = df_GC.synonyms.str.replace(')','');
df_GC.synonyms = df_GC.synonyms.str.replace('dit:','');

In [317]:
# check CE output
df_CE.head()

Unnamed: 0,entity,value,synonyms
0,Conseiller-ère d'Etat,WENGER Saliha (Salika),wenger:saliha:salika
1,Conseiller-ère d'Etat,HALLER Jocelyne,haller:jocelyne
2,Conseiller-ère d'Etat,EMERY-TORRACINTA Anne,emery-torracinta:anne
3,Conseiller-ère d'Etat,APOTHÉLOZ Thierry,apothéloz:thierry
4,Conseiller-ère d'Etat,SALERNO Sandrine,salerno:sandrine


In [318]:
# check GC output
df_GC.head()

Unnamed: 0,entity,value,synonyms
0,Député-e,PAGANI Rémy,pagani:rémy
1,Député-e,WENGER Saliha (Salika),wenger:saliha:salika
2,Député-e,ZAUGG Christian,zaugg:christian
3,Député-e,HALLER Jocelyne,haller:jocelyne
4,Député-e,EMERY-TORRACINTA Anne,emery-torracinta:anne


In [319]:
# export to csv as separate files
df_CE.to_csv('./candidates-CE-Entities.csv',index=False)
df_GC.to_csv('./candidates-GC-Entities.csv',index=False)

In [320]:
# join dfs for single export
df_ALL = pd.concat([df_CE, df_GC])
# export joined dataframe
df_ALL.to_csv('./candidates-ALL-Entities.csv',index=False)