## Liste de locaux et communes
Données tirées du [site de l'Etat de Genève](https://www.ge.ch/votations/20180304/federal/)

In [20]:
# import libraries
import pandas as pd
# import files
df_raw = pd.read_csv('./DATA_format_JSON/listeLocaux.csv', delimiter=';')
df_raw.head()

Unnamed: 0,commune_no,local_no,local-commune_nom,electorat,cartes-de-vote,bulletins,nuls,q1_blancs,q1_blancs_pc,q1_valables,...,q1_non_pc,q1_participation,q2_blancs,q2_blancs_pc,q2_valables,q2_oui,q2_oui_pc,q2_non,q2_non_pc,q2_participation
0,1,1,Aire-la-Ville,718,491,491,0,25,5.09%,466,...,14.59%,68.38%,6,1.22%,485,106,21.86%,379,78.14%,68.38%
1,2,1,Anières,1'313,869,869,0,20,2.30%,849,...,7.77%,66.18%,8,0.92%,861,178,20.67%,683,79.33%,66.18%
2,3,1,Avully,1'100,628,626,0,47,7.51%,579,...,15.72%,57.09%,8,1.28%,618,129,20.87%,489,79.13%,57.09%
3,4,1,Avusy,1'000,632,632,0,19,3.01%,613,...,9.46%,63.20%,1,0.16%,631,119,18.86%,512,81.14%,63.20%
4,5,1,Bardonnex,1'371,896,896,0,40,4.46%,856,...,14.49%,65.35%,7,0.78%,889,201,22.61%,688,77.39%,65.35%


In [21]:
# create new df and drop unnecessary columns
df_edt = df_raw.drop(['electorat', 'cartes-de-vote', 'bulletins','nuls','q1_blancs','q1_blancs_pc'
                      ,'q1_valables','q1_oui','q1_oui_pc','q1_non','q1_non_pc','q1_participation'
                      ,'q2_blancs','q2_blancs_pc','q2_valables','q2_oui','q2_oui_pc','q2_non','q2_non_pc'
                      ,'q2_participation'], axis=1)
df_edt.reset_index(drop=True, inplace=True)
df_edt.head()

Unnamed: 0,commune_no,local_no,local-commune_nom
0,1,1,Aire-la-Ville
1,2,1,Anières
2,3,1,Avully
3,4,1,Avusy
4,5,1,Bardonnex


In [22]:
# create padded columns
df_edt['commune_no_padded'] = df_edt['commune_no'].astype(str).str.zfill(2)
df_edt['local_no_padded'] = df_edt['local_no'].astype(str).str.zfill(2)
# in local_no_padded_zeroed, replace '01' with '00'
df_edt['local_no_padded_zeroed'] = df_edt['local_no_padded'].replace(regex=True,to_replace='01',value='00')

In [23]:
df_edt.head()

Unnamed: 0,commune_no,local_no,local-commune_nom,commune_no_padded,local_no_padded,local_no_padded_zeroed
0,1,1,Aire-la-Ville,1,1,0
1,2,1,Anières,2,1,0
2,3,1,Avully,3,1,0
3,4,1,Avusy,4,1,0
4,5,1,Bardonnex,5,1,0


In [24]:
# concatenate padded columns - first pass ##00
df_edt['commLocal'] = df_edt['commune_no_padded'] + df_edt['local_no_padded_zeroed']

In [25]:
df_edt.head()

Unnamed: 0,commune_no,local_no,local-commune_nom,commune_no_padded,local_no_padded,local_no_padded_zeroed,commLocal
0,1,1,Aire-la-Ville,1,1,0,100
1,2,1,Anières,2,1,0,200
2,3,1,Avully,3,1,0,300
3,4,1,Avusy,4,1,0,400
4,5,1,Bardonnex,5,1,0,500


In [26]:
# fix bad numbers
# chene bougeries centre
df_edt['commLocal'].replace(regex=True,inplace=True,to_replace='1200',value='1201')
# cite rive
df_edt['commLocal'].replace(regex=True,inplace=True,to_replace='2100',value='2101')
# grand lancy
df_edt['commLocal'].replace(regex=True,inplace=True,to_replace='2800',value='2801')
# vernier village
df_edt['commLocal'].replace(regex=True,inplace=True,to_replace='4300',value='4301')

In [27]:
df_edt.head()

Unnamed: 0,commune_no,local_no,local-commune_nom,commune_no_padded,local_no_padded,local_no_padded_zeroed,commLocal
0,1,1,Aire-la-Ville,1,1,0,100
1,2,1,Anières,2,1,0,200
2,3,1,Avully,3,1,0,300
3,4,1,Avusy,4,1,0,400
4,5,1,Bardonnex,5,1,0,500


In [28]:
# drop unnecessary columns
df_edt.drop(['commune_no', 'local_no', 'commune_no_padded'
             , 'local_no_padded', 'local_no_padded_zeroed']
            , axis=1, inplace=True)
df_edt.head()

Unnamed: 0,local-commune_nom,commLocal
0,Aire-la-Ville,100
1,Anières,200
2,Avully,300
3,Avusy,400
4,Bardonnex,500


In [29]:
# add 4 rows for missing communes with ##00
df2 = pd.DataFrame([['Chêne-Bougeries', 1200]
                    , ['Genève', 2100]
                    , ['Lancy',2800]
                    , ['Vernier',4300]]
                   , columns=['local-commune_nom', 'commLocal'])
df2

Unnamed: 0,local-commune_nom,commLocal
0,Chêne-Bougeries,1200
1,Genève,2100
2,Lancy,2800
3,Vernier,4300


In [30]:
# append df2 to df_edt
df_edt = df_edt.append(df2, ignore_index=True)

In [31]:
df_edt.head()

Unnamed: 0,local-commune_nom,commLocal
0,Aire-la-Ville,100
1,Anières,200
2,Avully,300
3,Avusy,400
4,Bardonnex,500


In [32]:
# add value,entity,synonyms
df_edt['value'] = df_edt['local-commune_nom']
df_edt['entity'] = 'LocalDeVote'
df_edt['synonyms'] = df_edt['local-commune_nom'] + ":" + df_edt['commLocal'].astype(str)

In [33]:
df_edt.head()

Unnamed: 0,local-commune_nom,commLocal,value,entity,synonyms
0,Aire-la-Ville,100,Aire-la-Ville,LocalDeVote,Aire-la-Ville:0100
1,Anières,200,Anières,LocalDeVote,Anières:0200
2,Avully,300,Avully,LocalDeVote,Avully:0300
3,Avusy,400,Avusy,LocalDeVote,Avusy:0400
4,Bardonnex,500,Bardonnex,LocalDeVote,Bardonnex:0500


In [34]:
# drop unused columns
df_edt.drop(['local-commune_nom', 'commLocal'], axis=1, inplace=True)

In [35]:
df_edt.head()

Unnamed: 0,value,entity,synonyms
0,Aire-la-Ville,LocalDeVote,Aire-la-Ville:0100
1,Anières,LocalDeVote,Anières:0200
2,Avully,LocalDeVote,Avully:0300
3,Avusy,LocalDeVote,Avusy:0400
4,Bardonnex,LocalDeVote,Bardonnex:0500


In [36]:
# remove accents and spec chars from value
df_edt['value'] = df_edt['value'].str.lower()
df_edt['value'].replace(regex=True,inplace=True,to_replace='à',value='a')
df_edt['value'].replace(regex=True,inplace=True,to_replace='â',value='a')
df_edt['value'].replace(regex=True,inplace=True,to_replace='ä',value='a')
df_edt['value'].replace(regex=True,inplace=True,to_replace='é',value='e')
df_edt['value'].replace(regex=True,inplace=True,to_replace='è',value='e')
df_edt['value'].replace(regex=True,inplace=True,to_replace='ê',value='e')
df_edt['value'].replace(regex=True,inplace=True,to_replace='ï',value='i')
df_edt['value'].replace(regex=True,inplace=True,to_replace='ì',value='i')
df_edt['value'].replace(regex=True,inplace=True,to_replace='î',value='i')
df_edt['value'].replace(regex=True,inplace=True,to_replace='ò',value='o')
df_edt['value'].replace(regex=True,inplace=True,to_replace='ö',value='o')
df_edt['value'].replace(regex=True,inplace=True,to_replace='ô',value='o')
df_edt['value'].replace(regex=True,inplace=True,to_replace='ù',value='u')
df_edt['value'].replace(regex=True,inplace=True,to_replace='ü',value='u')
# remove spec chars from value
#df_edt['value'].replace(regex=True,inplace=True,to_replace=" - ",value=' ')
df_edt['value'].replace(regex=True,inplace=True,to_replace="-",value=' ')
df_edt.head()

Unnamed: 0,value,entity,synonyms
0,aire la ville,LocalDeVote,Aire-la-Ville:0100
1,anieres,LocalDeVote,Anières:0200
2,avully,LocalDeVote,Avully:0300
3,avusy,LocalDeVote,Avusy:0400
4,bardonnex,LocalDeVote,Bardonnex:0500


In [37]:
# add lower case to synonyms
df_edt['synonyms'] = df_edt['synonyms'] + ":" + df_edt['value']

In [38]:
df_edt.head()

Unnamed: 0,value,entity,synonyms
0,aire la ville,LocalDeVote,Aire-la-Ville:0100:aire la ville
1,anieres,LocalDeVote,Anières:0200:anieres
2,avully,LocalDeVote,Avully:0300:avully
3,avusy,LocalDeVote,Avusy:0400:avusy
4,bardonnex,LocalDeVote,Bardonnex:0500:bardonnex


In [39]:
# export to csv
df_edt.to_csv('./electionBot_sBox/GBarthelet_chatbot/entities/localDeVote_match_communeIdJS_v2.csv',index=False)