In [9]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/stats-def-csv/stats_def.csv
/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.shp
/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.shx
/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.prj
/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.dbf
/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.cpg
/kaggle/input/contours-france-entiere-latest-v2/contours-france-entiere-latest-v2.geojson
/kaggle/input/indic-stat-circo/indic-stat-circonscriptions-legislatives-2022.xlsx
/kaggle/input/d/jcabouat/resultats-legislatives/resultats-provisoires-par-bureau-de-votevmn.csv
/kaggle/input/d/jcabouat/resultats-legislatives/resultats-provisoires-par-circonscription.csv
/kaggle/input/d/jcabouat/resultats-legislatives/resultats-provisoires-par-departement.csv


In [10]:
!pip install -q streamlit
!pip install geopandas
!pip install folium



In [100]:
!rm -rf /kaggle/working/*

In [101]:
import os
import pandas as pd
import numpy as np

import geopandas as gpd
import folium
import tqdm

In [102]:
from multiprocessing import cpu_count
n_cores = cpu_count()
print(f'Number of Logical CPU cores: {n_cores}')

Number of Logical CPU cores: 4


In [103]:
from tqdm import tqdm

In [104]:
def new_folder(new_folder_path):
    # Check if the folder exists, if not, create it
    if not os.path.exists(new_folder_path):
        os.makedirs(new_folder_path)
        print(f'Created new folder at {new_folder_path}')
    else:
        print(f'Folder {new_folder_path} already exists')

In [105]:
def save_group_to_csv(df,column_name, output_directory,data_type):   
    groups = df.groupby(column_name)
    for name,group in tqdm(groups):
        filename = f"{output_directory}/{data_type}_{name}.csv"
        group.to_csv(filename, index=False)
        #print(f"Exported {filename}")

**0. Départements**

In [106]:
# Define the path for the new folder
dpt_folder_path = '/kaggle/working/dpt/'
new_folder(dpt_folder_path)

data_dpt_folder_path = '/kaggle/working/dpt/data/'
new_folder(data_dpt_folder_path)

Created new folder at /kaggle/working/dpt/
Created new folder at /kaggle/working/dpt/data/


In [107]:
df_dpt_resultats = pd.read_csv('/kaggle/input/d/jcabouat/resultats-legislatives/resultats-provisoires-par-departement.csv', sep=';')
df_dpt_resultats.rename(columns={'Code département': 'id_dep'}, inplace=True)
df_dpt_resultats.rename(columns={'Libellé département': 'libDepartement'}, inplace=True)
df_dpt_resultats['id_dep'] = df_dpt_resultats['id_dep'].astype(str)
df_dpt_resultats

Unnamed: 0,id_dep,libDepartement,Inscrits,Votants,% Votants,Abstentions,% Abstentions,Exprimés,% Exprimés/inscrits,% Exprimés/votants,...,% Voix/inscrits 13,% Voix/exprimés 13,Nuance candidat 14,Voix 14,% Voix/inscrits 14,% Voix/exprimés 14,Nuance candidat 15,Voix 15,% Voix/inscrits 15,% Voix/exprimés 15
0,01,Ain,446866,310781,"69,55%",136085,"30,45%",303352,"67,88%","97,61%",...,,,,,,,,,,
1,02,Aisne,372655,238505,"64,00%",134150,"36,00%",231265,"62,06%","96,96%",...,,,,,,,,,,
2,03,Allier,248492,170917,"68,78%",77575,"31,22%",164573,"66,23%","96,29%",...,,,,,,,,,,
3,04,Alpes-de-Haute-Provence,128144,89839,"70,11%",38305,"29,89%",86811,"67,74%","96,63%",...,,,,,,,,,,
4,05,Hautes-Alpes,114564,82232,"71,78%",32332,"28,22%",79958,"69,79%","97,23%",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,986,Wallis et Futuna,9031,6964,"77,11%",2067,"22,89%",6877,"76,15%","98,75%",...,,,,,,,,,,
103,987,Polynésie française,212049,91386,"43,10%",120663,"56,90%",89951,"42,42%","98,43%",...,,,,,,,,,,
104,988,Nouvelle-Calédonie,222114,133305,"60,02%",88809,"39,98%",130788,"58,88%","98,11%",...,,,,,,,,,,
105,ZX,Saint-Martin/Saint-Barthélemy,25227,7232,"28,67%",17995,"71,33%",6919,"27,43%","95,67%",...,,,,,,,,,,


In [109]:
df_dpt_resultats = df_dpt_resultats.melt(id_vars=['id_dep', 'libDepartement', 'Inscrits', 'Votants', '% Votants', 'Abstentions', '% Abstentions', 'Exprimés', '% Exprimés/inscrits', '% Exprimés/votants', 'Blancs', '% Blancs/inscrits', '% Blancs/votants', 'Nuls', '% Nuls/inscrits', '% Nuls/votants'])
df_dpt_resultats.rename(columns={'variable': 'indicateur', 'value': 'valeur'}, inplace=True)
df_dpt_resultats_fin = df_dpt_resultats[~df_dpt_resultats['id_dep'].isin(['986', '987', '988', 'ZX', 'ZZ'])]
df_dpt_resultats_fin

Unnamed: 0,id_dep,libDepartement,Inscrits,Votants,% Votants,Abstentions,% Abstentions,Exprimés,% Exprimés/inscrits,% Exprimés/votants,Blancs,% Blancs/inscrits,% Blancs/votants,Nuls,% Nuls/inscrits,% Nuls/votants,indicateur,valeur
0,01,Ain,446866,310781,"69,55%",136085,"30,45%",303352,"67,88%","97,61%",5326,"1,19%","1,71%",2103,"0,47%","0,68%",Nuance candidat 1,EXG
1,02,Aisne,372655,238505,"64,00%",134150,"36,00%",231265,"62,06%","96,96%",4878,"1,31%","2,05%",2362,"0,63%","0,99%",Nuance candidat 1,EXG
2,03,Allier,248492,170917,"68,78%",77575,"31,22%",164573,"66,23%","96,29%",3605,"1,45%","2,11%",2739,"1,10%","1,60%",Nuance candidat 1,EXG
3,04,Alpes-de-Haute-Provence,128144,89839,"70,11%",38305,"29,89%",86811,"67,74%","96,63%",2170,"1,69%","2,42%",858,"0,67%","0,96%",Nuance candidat 1,EXG
4,05,Hautes-Alpes,114564,82232,"71,78%",32332,"28,22%",79958,"69,79%","97,23%",1516,"1,32%","1,84%",758,"0,66%","0,92%",Nuance candidat 1,EXG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6410,972,Martinique,304683,95062,"31,20%",209621,"68,80%",90186,"29,60%","94,87%",2732,"0,90%","2,87%",2144,"0,70%","2,26%",% Voix/exprimés 15,
6411,973,Guyane,108900,35462,"32,56%",73438,"67,44%",34337,"31,53%","96,83%",610,"0,56%","1,72%",515,"0,47%","1,45%",% Voix/exprimés 15,
6412,974,La Réunion,697145,317485,"45,54%",379660,"54,46%",298834,"42,87%","94,13%",9320,"1,34%","2,94%",9331,"1,34%","2,94%",% Voix/exprimés 15,
6413,975,Saint-Pierre-et-Miquelon,5069,2807,"55,38%",2262,"44,62%",2748,"54,21%","97,90%",37,"0,73%","1,32%",22,"0,43%","0,78%",% Voix/exprimés 15,


In [111]:
save_group_to_csv(df_dpt_resultats_fin,'id_dep','/kaggle/working/dpt/data','resultats')

100%|██████████| 102/102 [00:00<00:00, 815.68it/s]


**1. Circonscriptions législatives**

In [112]:
# Define the path for the new folder
circo_folder_path = '/kaggle/working/circo/'
new_folder(circo_folder_path)

map_circo_folder_path = '/kaggle/working/circo/map/'
new_folder(map_circo_folder_path)

data_circo_folder_path = '/kaggle/working/circo/data/'
new_folder(data_circo_folder_path)

Created new folder at /kaggle/working/circo/
Created new folder at /kaggle/working/circo/map/
Created new folder at /kaggle/working/circo/data/


a. Cartes

In [113]:
# Set filepath
fp_circo = "/kaggle/input/circonscriptions-legislatives-p20/circonscriptions-legislatives-p20.geojson"

# Read file using gpd.read_file()
df_circo = gpd.read_file(fp_circo, driver='GeoJSON', chunksize=10000)
df_circo = df_circo.to_crs(epsg=4326)
#print(df_bv.crs)
df_circo.head()

Unnamed: 0,codeDepartement,nomDepartement,codeCirconscription,nomCirconscription,geometry
0,1,Ain,104,4ème circonscription,"POLYGON ((4.92750 45.98000, 4.92450 45.96820, ..."
1,1,Ain,105,5ème circonscription,"POLYGON ((5.59060 45.76000, 5.59020 45.76180, ..."
2,1,Ain,103,3ème circonscription,"POLYGON ((5.58640 45.66710, 5.58680 45.66510, ..."
3,1,Ain,102,2ème circonscription,"POLYGON ((4.74920 46.00360, 4.74850 46.00200, ..."
4,1,Ain,101,1ère circonscription,"POLYGON ((5.51130 46.26450, 5.51050 46.26510, ..."


In [114]:
df_circo['centroid'] = df_circo.geometry.centroid
df_circo.rename(columns={'codeDepartement': 'id_dep',
                         'nomDepartement': 'libDep', 
                         'codeCirconscription': 'id_circo',
                         'nomCirconscription': 'libCirco'}, inplace=True)
df_circo['id_dep'] = df_circo['id_dep'].astype(str)

df_circo['libCirco'] = df_circo['libDep'] + " - " + df_circo['libCirco']

df_circo.head()


  df_circo['centroid'] = df_circo.geometry.centroid


Unnamed: 0,id_dep,libDep,id_circo,libCirco,geometry,centroid
0,1,Ain,104,Ain - 4ème circonscription,"POLYGON ((4.92750 45.98000, 4.92450 45.96820, ...",POINT (4.99592 46.12814)
1,1,Ain,105,Ain - 5ème circonscription,"POLYGON ((5.59060 45.76000, 5.59020 45.76180, ...",POINT (5.57105 46.01701)
2,1,Ain,103,Ain - 3ème circonscription,"POLYGON ((5.58640 45.66710, 5.58680 45.66510, ...",POINT (5.85020 46.07872)
3,1,Ain,102,Ain - 2ème circonscription,"POLYGON ((4.74920 46.00360, 4.74850 46.00200, ...",POINT (5.09768 45.89049)
4,1,Ain,101,Ain - 1ère circonscription,"POLYGON ((5.51130 46.26450, 5.51050 46.26510, ...",POINT (5.21240 46.29780)


In [115]:
df_circo['id_dep'] = np.where(df_circo['id_dep'] == 'ZA', '971',
                           np.where(df_circo['id_dep'] == 'ZB', '972',
                           np.where(df_circo['id_dep'] == 'ZC', '973',
                           np.where(df_circo['id_dep'] == 'ZD', '974',
                           np.where(df_circo['id_dep'] == 'ZS', '975',
                           np.where(df_circo['id_dep'] == 'ZM', '976', df_circo['id_dep']))))))

In [116]:
# Centroid column
df_circo['lat'] = df_circo["centroid"].y
df_circo['lon'] = df_circo["centroid"].x

df_circo['id_circo_len'] = df_circo['id_circo'].str.len()
df_circo['id_circo'] = np.where(df_circo['id_circo_len'] == 5, 
                                df_circo['id_circo'], 
                                df_circo['id_circo'].astype(str).str[:2] + '0' + df_circo['id_circo'].astype(str).str[2:])



In [118]:
df_circo_fin = df_circo[~df_circo['id_dep'].isin(['986', '987', '988', 'ZX', 'ZZ'])]
df_circo_fin

Unnamed: 0,id_dep,libDep,id_circo,libCirco,geometry,centroid,lat,lon,id_circo_len
0,01,Ain,01004,Ain - 4ème circonscription,"POLYGON ((4.92750 45.98000, 4.92450 45.96820, ...",POINT (4.99592 46.12814),46.128139,4.995922,4
1,01,Ain,01005,Ain - 5ème circonscription,"POLYGON ((5.59060 45.76000, 5.59020 45.76180, ...",POINT (5.57105 46.01701),46.017014,5.571051,4
2,01,Ain,01003,Ain - 3ème circonscription,"POLYGON ((5.58640 45.66710, 5.58680 45.66510, ...",POINT (5.85020 46.07872),46.078718,5.850199,4
3,01,Ain,01002,Ain - 2ème circonscription,"POLYGON ((4.74920 46.00360, 4.74850 46.00200, ...",POINT (5.09768 45.89049),45.890492,5.097675,4
4,01,Ain,01001,Ain - 1ère circonscription,"POLYGON ((5.51130 46.26450, 5.51050 46.26510, ...",POINT (5.21240 46.29780),46.297804,5.212401,4
...,...,...,...,...,...,...,...,...,...
554,974,La Réunion,ZD006,La Réunion - 6ème circonscription,"POLYGON ((55.68280 -20.94410, 55.68200 -20.943...",POINT (55.55293 -20.94451),-20.944507,55.552929,4
555,974,La Réunion,ZD001,La Réunion - 1ère circonscription,"POLYGON ((55.46110 -21.01500, 55.46290 -21.014...",POINT (55.43791 -20.93211),-20.932112,55.437910,4
556,975,Saint-Pierre-et-Miquelon,ZS001,Saint-Pierre-et-Miquelon - Saint-Pierre-et-Miq...,"MULTIPOLYGON (((-56.37190 46.79370, -56.37010 ...",POINT (-56.30839 46.93481),46.934814,-56.308387,4
557,976,Mayotte,ZM001,Mayotte - 1ère circonscription,"MULTIPOLYGON (((45.05060 -12.72300, 45.05200 -...",POINT (45.15332 -12.74074),-12.740740,45.153316,4


In [120]:
#from IPython.display import display

groups = df_circo_fin.groupby('id_dep')
for name,group in tqdm(groups):
    m = folium.Map(location=[48.858885,2.34694], zoom_start=6, tiles="CartoDB positron")
    for _, r in group.iterrows():
        # Without simplifying the representation of each borough,
        # the map might not be displayed
        sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.001)
        geo_j = sim_geo.to_json()
        geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "blue"})
        folium.Popup(r["libCirco"]+ " - " + r["id_circo"]).add_to(geo_j)
        geo_j.add_to(m)
    #display(m)
    m.save('/kaggle/working/circo/map/map_' +  r['id_dep'] + '.html')

100%|██████████| 102/102 [00:07<00:00, 13.65it/s]


b. data - https://www.insee.fr/fr/statistiques/6436476?sommaire=6436478

In [121]:
df_circo_stats = pd.read_excel('/kaggle/input/indic-stat-circo/indic-stat-circonscriptions-legislatives-2022.xlsx',
                               sheet_name='indicateurs_circonscriptions',
                               skiprows=7)
df_circo_stats.rename(columns={'circo': 'id_circo'}, inplace=True)
df_circo_stats['id_dep']= df_circo_stats['id_circo'].str[:2]

dom = ['971', '972', '973', '974', '975', '976','978','986','987','988']
df_circo_stats['id_dep'] = np.where(df_circo_stats['id_circo'].str[:3].isin(dom), 
                                df_circo_stats['id_circo'].astype(str).str[:3], 
                                df_circo_stats['id_circo'].astype(str).str[:2])

array(['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
       '11', '12', '13', '14', '15', '16', '17', '18', '19', '21', '22',
       '23', '24', '25', '26', '27', '28', '29', '2A', '2B', '30', '31',
       '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42',
       '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53',
       '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64',
       '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75',
       '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86',
       '87', '88', '89', '90', '91', '92', '93', '94', '95', '971', '972',
       '973', '974', '975', '976', '978', '986', '987', '988'],
      dtype=object)

In [122]:
df_circo_stats_fin = df_circo_stats[~df_circo_stats['id_dep'].isin(['00','978','986', '987', '988', 'ZX', 'ZZ'])]
df_circo_stats_fin

Unnamed: 0,id_circo,Nom de la circonscription,Inscrit_22,pop_légal_19,pop_légal_13,tvar_pop,pop_pole_aav,pop_cour_aav,pop_horsaav,pop_urb,...,PPSOC,PIMPOT,acc_ecole,acc_college,acc_lycee,acc_medecin,acc_dentiste,acc_pharmacie,part_eloig,id_dep
1,01001,Ain - 1re circonscription,85723,122750,115683,1.0,28.5,65.3,6.1,28.5,...,5.6,-15.6,96.7,41.1,23.8,65.4,54.2,61.5,14.7,01
2,01002,Ain - 2e circonscription,99383,137975,129760,1.0,6.1,92.3,1.6,69.9,...,4.1,-17.5,100,47,22.1,71.5,62.9,69.3,8.2,01
3,01003,Ain - 3e circonscription,81500,146110,131843,1.7,8.4,87,4.5,78.1,...,2.3,-16.3,98,60.1,27.9,76.1,69.6,75.3,24.8,01
4,01004,Ain - 4e circonscription,94359,128896,126379,0.3,23.4,63,13.6,20.5,...,4.6,-16.1,96.6,37.2,26.6,61,52.2,57.3,5.6,01
5,01005,Ain - 5e circonscription,77144,116701,115832,0.1,40.7,39.6,19.8,45.8,...,6.2,-15.1,93.3,49.7,41.6,68.9,58.3,66.9,22.4,01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
555,97406,La Réunion - 6e circonscription,81747,109774,105365,0.7,27,73,0,100,...,16.9,-12.7,100,100,100,100,100,100,0,974
556,97407,La Réunion - 7e circonscription,117900,139172,137130,0.2,100,0,0,100,...,15.1,-14.4,100,100,89.8,100,100,100,5.1,974
557,97501,Saint-Pierre-et-Miquelon - Circonscription uni...,5045,5974,6057,-0.2,nd,nd,nd,nd,...,nd,nd,nd,nd,nd,nd,nd,nd,nd,975
558,97601,Mayotte - 1re circonscription,42446,137126,112760,4.0,35.6,64.4,0,86,...,nd,nd,100,96.7,68.7,83.2,71.8,96.7,37,976


In [124]:
save_group_to_csv(df_circo_stats_fin,'id_dep','/kaggle/working/circo/data','stats')

100%|██████████| 102/102 [00:00<00:00, 606.97it/s]


In [125]:
df_circo_resultats = pd.read_csv('/kaggle/input/d/jcabouat/resultats-legislatives/resultats-provisoires-par-circonscription.csv', sep=';')
df_circo_resultats

Unnamed: 0,Code département,Libellé département,Code circonscription législative,Libellé circonscription législative,Inscrits,Votants,% Votants,Abstentions,% Abstentions,Exprimés,...,Elu 18,Numéro de panneau 19,Nuance candidat 19,Nom candidat 19,Prénom candidat 19,Sexe candidat 19,Voix 19,% Voix/inscrits 19,% Voix/exprimés 19,Elu 19
0,01,Ain,0101,1ère circonscription,86843,61830,"71,20%",25013,"28,80%",60495,...,,,,,,,,,,
1,01,Ain,0102,2ème circonscription,101874,73437,"72,09%",28437,"27,91%",71918,...,,,,,,,,,,
2,01,Ain,0103,3ème circonscription,84130,54961,"65,33%",29169,"34,67%",53720,...,,,,,,,,,,
3,01,Ain,0104,4ème circonscription,96119,67580,"70,31%",28539,"29,69%",65687,...,,,,,,,,,,
4,01,Ain,0105,5ème circonscription,77900,52973,"68,00%",24927,"32,00%",51532,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
572,ZZ,Français établis hors de France,ZZ07,7ème circonscription,130824,58559,"44,76%",72265,"55,24%",58050,...,,,,,,,,,,
573,ZZ,Français établis hors de France,ZZ08,8ème circonscription,148957,33070,"22,20%",115887,"77,80%",32482,...,,,,,,,,,,
574,ZZ,Français établis hors de France,ZZ09,9ème circonscription,130387,36603,"28,07%",93784,"71,93%",35880,...,,19.0,DIV,MAACHOU,Rania Tessa,FEMININ,61.0,"0,05%","0,17%",
575,ZZ,Français établis hors de France,ZZ10,10ème circonscription,113855,36502,"32,06%",77353,"67,94%",35824,...,,,,,,,,,,


In [128]:
df_circo_resultats.rename(columns={'Code département': 'id_dep',
                                   'Code circonscription législative': 'id_circo',
                                   'Libellé département': 'libDep',
                                   'Libellé circonscription législative': 'libCirco'
                                  }, inplace=True)
df_circo_resultats['id_dep'] = df_circo_resultats['id_dep'].astype(str)
df_circo_resultats['id_circo'] = df_circo_resultats['id_circo'].astype(str)

df_circo_resultats['id_circo'] = np.where(df_circo_resultats['id_dep'].str.len() == 3,
                             df_circo_resultats['id_dep'] + df_circo_resultats['id_circo'].astype(str).str[2:],
                             df_circo_resultats['id_circo'].astype(str).str[:2] + '0' + df_circo_resultats['id_circo'].astype(str).str[2:])

df_circo_resultats = df_circo_resultats.melt(id_vars=['id_dep', 'libDep','id_circo','libCirco', 'Inscrits', 'Votants', '% Votants', 'Abstentions', '% Abstentions', 'Exprimés', '% Exprimés/inscrits', '% Exprimés/votants', 'Blancs', '% Blancs/inscrits', '% Blancs/votants', 'Nuls', '% Nuls/inscrits', '% Nuls/votants'])
df_circo_resultats.rename(columns={'variable': 'indicateur', 'value': 'valeur'}, inplace=True)

df_circo_resultats_fin = df_circo_resultats[~df_circo_resultats['id_dep'].isin(['00','986', '987', '988', 'ZX', 'ZZ'])]
df_circo_resultats_fin

Unnamed: 0,id_dep,libDep,id_circo,libCirco,Inscrits,Votants,% Votants,Abstentions,% Abstentions,Exprimés,% Exprimés/inscrits,% Exprimés/votants,Blancs,% Blancs/inscrits,% Blancs/votants,Nuls,% Nuls/inscrits,% Nuls/votants,indicateur,valeur
0,01,Ain,01001,1ère circonscription,86843,61830,"71,20%",25013,"28,80%",60495,"69,66%","97,84%",929,"1,07%","1,50%",406,"0,47%","0,66%",Numéro de panneau 1,1
1,01,Ain,01002,2ème circonscription,101874,73437,"72,09%",28437,"27,91%",71918,"70,60%","97,93%",1198,"1,18%","1,63%",321,"0,32%","0,44%",Numéro de panneau 1,1
2,01,Ain,01003,3ème circonscription,84130,54961,"65,33%",29169,"34,67%",53720,"63,85%","97,74%",860,"1,02%","1,56%",381,"0,45%","0,69%",Numéro de panneau 1,1
3,01,Ain,01004,4ème circonscription,96119,67580,"70,31%",28539,"29,69%",65687,"68,34%","97,20%",1361,"1,42%","2,01%",532,"0,55%","0,79%",Numéro de panneau 1,1
4,01,Ain,01005,5ème circonscription,77900,52973,"68,00%",24927,"32,00%",51532,"66,15%","97,28%",978,"1,26%","1,85%",463,"0,59%","0,87%",Numéro de panneau 1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98644,974,La Réunion,974406,6ème circonscription,84755,38120,"44,98%",46635,"55,02%",36352,"42,89%","95,36%",854,"1,01%","2,24%",914,"1,08%","2,40%",Elu 19,
98645,974,La Réunion,974407,7ème circonscription,121146,53859,"44,46%",67287,"55,54%",50261,"41,49%","93,32%",1806,"1,49%","3,35%",1792,"1,48%","3,33%",Elu 19,
98646,975,Saint-Pierre-et-Miquelon,975501,Saint-Pierre-et-Miquelon,5069,2807,"55,38%",2262,"44,62%",2748,"54,21%","97,90%",37,"0,73%","1,32%",22,"0,43%","0,78%",Elu 19,
98647,976,Mayotte,976601,1ère circonscription,45660,18122,"39,69%",27538,"60,31%",17162,"37,59%","94,70%",417,"0,91%","2,30%",543,"1,19%","3,00%",Elu 19,


In [129]:
save_group_to_csv(df_circo_resultats_fin,'id_dep','/kaggle/working/circo/data','resultats')

100%|██████████| 102/102 [00:01<00:00, 94.17it/s]


**2. Bureaux de votes**

In [130]:
# Define the path for the new folder
bv_folder_path = '/kaggle/working/bv/'
new_folder(bv_folder_path)

map_bv_folder_path = '/kaggle/working/bv/map/'
new_folder(map_bv_folder_path)

map_bv_folder_path = '/kaggle/working/bv/data/'
new_folder(map_bv_folder_path)

Created new folder at /kaggle/working/bv/
Created new folder at /kaggle/working/bv/map/
Created new folder at /kaggle/working/bv/data/


a. Map

In [131]:
# Set filepath
fp_bv = "/kaggle/input/contours-france-entiere-latest-v2/contours-france-entiere-latest-v2.geojson"

# Read file using gpd.read_file()
df_bv = gpd.read_file(fp_bv, driver='GeoJSON', chunksize=10000)
df_bv = df_bv.to_crs(epsg=4326)
#print(df_bv.crs)
df_bv.head()

Unnamed: 0,codeDepartement,nomDepartement,codeCirconscription,nomCirconscription,codeCommune,nomCommune,numeroBureauVote,codeBureauVote,id_bv,geometry
0,1,Ain,104,4ème circonscription,1001,L'Abergement-Clémenciat,1,01001_0001,01001_1,"POLYGON ((4.95812 46.15316, 4.95771 46.15288, ..."
1,1,Ain,105,5ème circonscription,1002,L'Abergement-de-Varey,1,01002_0001,01002_1,"POLYGON ((5.43001 45.98299, 5.42980 45.98312, ..."
2,1,Ain,105,5ème circonscription,1004,Ambérieu-en-Bugey,1,01004_0001,01004_1,"POLYGON ((5.33312 45.95904, 5.33358 45.95987, ..."
3,1,Ain,105,5ème circonscription,1004,Ambérieu-en-Bugey,2,01004_0002,01004_2,"MULTIPOLYGON (((5.35287 45.95468, 5.35225 45.9..."
4,1,Ain,105,5ème circonscription,1004,Ambérieu-en-Bugey,3,01004_0003,01004_3,"POLYGON ((5.36083 45.94544, 5.36069 45.94557, ..."


In [133]:
df_bv.rename(columns={'codeDepartement': 'id_dep',
                      'codeCirconscription': 'id_circo',
                      'nomDepartement': 'libDep',
                      'nomCirconscription': 'libCirco'
                      }, inplace=True)

df_bv['id_dep'] = df_bv['id_dep'].astype(str)
df_bv['id_circo'] = df_bv['id_circo'].astype(str)
df_bv['id_bv'] = df_bv['id_bv'].astype(str)

df_bv['id_dep'] = np.where(df_bv['id_dep'] == 'ZA', '971',
                           np.where(df_bv['id_dep'] == 'ZB', '972',
                           np.where(df_bv['id_dep'] == 'ZC', '973',
                           np.where(df_bv['id_dep'] == 'ZD', '974',
                           np.where(df_bv['id_dep'] == 'ZS', '975',
                           np.where(df_bv['id_dep'] == 'ZM', '976', df_bv['id_dep']))))))


df_bv['id_circo'] = np.where(df_bv['id_dep'].str.len() == 3,
                             df_bv['id_dep'] + df_bv['id_circo'].astype(str).str[2:],
                             df_bv['id_circo'].astype(str).str[:2] + '0' + df_bv['id_circo'].astype(str).str[2:])

df_bv['libBv'] = df_bv[['nomCommune', 'numeroBureauVote']].agg(' - '.join, axis=1)

df_bv_fin = df_bv[~df_bv['id_dep'].isin(['00','986', '987', '988', 'ZX', 'ZZ'])]
df_bv_fin

Unnamed: 0,id_dep,libDep,id_circo,libCirco,codeCommune,nomCommune,numeroBureauVote,codeBureauVote,id_bv,geometry,libBv
0,01,Ain,01004,4ème circonscription,01001,L'Abergement-Clémenciat,0001,01001_0001,01001_1,"POLYGON ((4.95812 46.15316, 4.95771 46.15288, ...",L'Abergement-Clémenciat - 0001
1,01,Ain,01005,5ème circonscription,01002,L'Abergement-de-Varey,0001,01002_0001,01002_1,"POLYGON ((5.43001 45.98299, 5.42980 45.98312, ...",L'Abergement-de-Varey - 0001
2,01,Ain,01005,5ème circonscription,01004,Ambérieu-en-Bugey,0001,01004_0001,01004_1,"POLYGON ((5.33312 45.95904, 5.33358 45.95987, ...",Ambérieu-en-Bugey - 0001
3,01,Ain,01005,5ème circonscription,01004,Ambérieu-en-Bugey,0002,01004_0002,01004_2,"MULTIPOLYGON (((5.35287 45.95468, 5.35225 45.9...",Ambérieu-en-Bugey - 0002
4,01,Ain,01005,5ème circonscription,01004,Ambérieu-en-Bugey,0003,01004_0003,01004_3,"POLYGON ((5.36083 45.94544, 5.36069 45.94557, ...",Ambérieu-en-Bugey - 0003
...,...,...,...,...,...,...,...,...,...,...,...
68801,976,Mayotte,97602,2ème circonscription,97617,Tsingoni,0023,97617_0023,97617_23,"MULTIPOLYGON (((45.13320 -12.79175, 45.13341 -...",Tsingoni - 0023
68802,976,Mayotte,97602,2ème circonscription,97617,Tsingoni,0049,97617_0049,97617_49,"MULTIPOLYGON (((45.13359 -12.79192, 45.13344 -...",Tsingoni - 0049
68803,976,Mayotte,97602,2ème circonscription,97617,Tsingoni,0073,97617_0073,97617_73,"MULTIPOLYGON (((45.09781 -12.79254, 45.09773 -...",Tsingoni - 0073
68804,976,Mayotte,97602,2ème circonscription,97617,Tsingoni,0074,97617_0074,97617_74,"MULTIPOLYGON (((45.13136 -12.79085, 45.13146 -...",Tsingoni - 0074


In [135]:
export_test = df_bv_fin[['id_dep','libDep','id_circo','libCirco','codeCommune','nomCommune','numeroBureauVote','codeBureauVote','id_bv','libBv']]
export_test.to_csv('/kaggle/working/dataset_dpt_circo_bv_test.csv', index=False)

In [136]:
df_bv_fin['centroid'] = df_bv_fin.geometry.centroid
df_bv_fin['lat'] = df_bv_fin["centroid"].y
df_bv_fin['lon'] = df_bv_fin["centroid"].x
df_bv_fin


  df_bv_fin['centroid'] = df_bv_fin.geometry.centroid


Unnamed: 0,id_dep,libDep,id_circo,libCirco,codeCommune,nomCommune,numeroBureauVote,codeBureauVote,id_bv,geometry,libBv,centroid,lat,lon
0,01,Ain,01004,4ème circonscription,01001,L'Abergement-Clémenciat,0001,01001_0001,01001_1,"POLYGON ((4.95812 46.15316, 4.95771 46.15288, ...",L'Abergement-Clémenciat - 0001,POINT (4.92585 46.15373),46.153725,4.925846
1,01,Ain,01005,5ème circonscription,01002,L'Abergement-de-Varey,0001,01002_0001,01002_1,"POLYGON ((5.43001 45.98299, 5.42980 45.98312, ...",L'Abergement-de-Varey - 0001,POINT (5.42809 46.00961),46.009608,5.428087
2,01,Ain,01005,5ème circonscription,01004,Ambérieu-en-Bugey,0001,01004_0001,01004_1,"POLYGON ((5.33312 45.95904, 5.33358 45.95987, ...",Ambérieu-en-Bugey - 0001,POINT (5.34232 45.97219),45.972189,5.342320
3,01,Ain,01005,5ème circonscription,01004,Ambérieu-en-Bugey,0002,01004_0002,01004_2,"MULTIPOLYGON (((5.35287 45.95468, 5.35225 45.9...",Ambérieu-en-Bugey - 0002,POINT (5.35523 45.95659),45.956591,5.355228
4,01,Ain,01005,5ème circonscription,01004,Ambérieu-en-Bugey,0003,01004_0003,01004_3,"POLYGON ((5.36083 45.94544, 5.36069 45.94557, ...",Ambérieu-en-Bugey - 0003,POINT (5.38341 45.94771),45.947706,5.383414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68801,976,Mayotte,97602,2ème circonscription,97617,Tsingoni,0023,97617_0023,97617_23,"MULTIPOLYGON (((45.13320 -12.79175, 45.13341 -...",Tsingoni - 0023,POINT (45.13623 -12.78729),-12.787287,45.136228
68802,976,Mayotte,97602,2ème circonscription,97617,Tsingoni,0049,97617_0049,97617_49,"MULTIPOLYGON (((45.13359 -12.79192, 45.13344 -...",Tsingoni - 0049,POINT (45.14078 -12.79935),-12.799354,45.140784
68803,976,Mayotte,97602,2ème circonscription,97617,Tsingoni,0073,97617_0073,97617_73,"MULTIPOLYGON (((45.09781 -12.79254, 45.09773 -...",Tsingoni - 0073,POINT (45.10330 -12.78955),-12.789554,45.103305
68804,976,Mayotte,97602,2ème circonscription,97617,Tsingoni,0074,97617_0074,97617_74,"MULTIPOLYGON (((45.13136 -12.79085, 45.13146 -...",Tsingoni - 0074,POINT (45.13315 -12.79692),-12.796919,45.133152


In [149]:
groups = df_bv_fin.groupby('id_circo')
for name,group in tqdm(groups):
    m = folium.Map(location=[48.858885,2.34694], zoom_start=6, tiles="CartoDB positron")
    for _, r in group.iterrows():
        # Without simplifying the representation of each borough,
        # the map might not be displayed
        sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.001)
        geo_j = sim_geo.to_json()
        geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "red"})
        folium.Popup(r["libBv"]).add_to(geo_j)
        geo_j.add_to(m)
    m.save('/kaggle/working/bv/map/map_' +  r['id_circo'] + '.html')

100%|██████████| 559/559 [06:58<00:00,  1.34it/s]


b. Résultats Europénnes

In [138]:
chunksize = 10 ** 3  # 1,000 rows   
df_list = []

for chunk in pd.read_csv('/kaggle/input/d/jcabouat/resultats-legislatives/resultats-provisoires-par-bureau-de-votevmn.csv',
                         sep=';', 
                         chunksize=chunksize,
                         dtype={'Code département': object, 'Code commune': object, 'Code BV': object}):
    df_list.append(chunk)

df_bv_resultats = pd.concat(df_list, ignore_index=True)

df_bv_resultats.rename(columns={'Code département': 'id_dep',
                      'Libellé département': 'libDep',
                      'Code commune': 'id_com',
                      'Libellé commune': 'libCom',
                      'Code BV': 'codeBV'
                      }, inplace=True)

dpt_list = df_bv_resultats['id_dep'].unique()
dpt_list.sort()
dpt_list

array(['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11',
       '12', '13', '14', '15', '16', '17', '18', '19', '21', '22', '23',
       '24', '25', '26', '27', '28', '29', '2A', '2B', '30', '31', '32',
       '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43',
       '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54',
       '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65',
       '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76',
       '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87',
       '88', '89', '90', '91', '92', '93', '94', '95', '971', '972',
       '973', '974', '975', '976', '986', '987', '988', 'ZX', 'ZZ'],
      dtype=object)

In [139]:
df_bv_resultats['id_dep'] = df_bv_resultats['id_dep'].astype(str)
df_bv_resultats['libCom'] = df_bv_resultats['libCom'].astype(str)
df_bv_resultats['codeBV'] = df_bv_resultats['codeBV'].astype(str)

df_bv_resultats['libBv'] = df_bv_resultats[['libCom', 'codeBV']].agg(' - '.join, axis=1)

df_bv_resultats['codeBureauVote'] = df_bv_resultats['id_com'].str.cat(df_bv_resultats['codeBV'], sep = '_')

df_bv_resultats_fin = df_bv_resultats[~df_bv_resultats['id_dep'].isin(['00','986', '987', '988', 'ZX', 'ZZ'])]
df_bv_resultats_fin

  df_bv_resultats['libBv'] = df_bv_resultats[['libCom', 'codeBV']].agg(' - '.join, axis=1)
  df_bv_resultats['codeBureauVote'] = df_bv_resultats['id_com'].str.cat(df_bv_resultats['codeBV'], sep = '_')


Unnamed: 0,id_dep,libDep,id_com,libCom,codeBV,Inscrits,Votants,% Votants,Abstentions,% Abstentions,...,Nuance candidat 19,Nom candidat 19,Prénom candidat 19,Sexe candidat 19,Voix 19,% Voix/inscrits 19,% Voix/exprimés 19,Elu 19,libBv,codeBureauVote
0,01,Ain,01001,L'Abergement-Clémenciat,0001,662,492,"74,32%",170,"25,68%",...,,,,,,,,,L'Abergement-Clémenciat - 0001,01001_0001
1,01,Ain,01002,L'Abergement-de-Varey,0001,228,178,"78,07%",50,"21,93%",...,,,,,,,,,L'Abergement-de-Varey - 0001,01002_0001
2,01,Ain,01004,Ambérieu-en-Bugey,0001,1093,752,"68,80%",341,"31,20%",...,,,,,,,,,Ambérieu-en-Bugey - 0001,01004_0001
3,01,Ain,01004,Ambérieu-en-Bugey,0002,1077,734,"68,15%",343,"31,85%",...,,,,,,,,,Ambérieu-en-Bugey - 0002,01004_0002
4,01,Ain,01004,Ambérieu-en-Bugey,0003,1124,830,"73,84%",294,"26,16%",...,,,,,,,,,Ambérieu-en-Bugey - 0003,01004_0003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69285,976,Mayotte,97617,Tsingoni,0074,521,189,"36,28%",332,"63,72%",...,,,,,,,,,Tsingoni - 0074,97617_0074
69286,976,Mayotte,97617,Tsingoni,0099,319,122,"38,24%",197,"61,76%",...,,,,,,,,,Tsingoni - 0099,97617_0099
69287,976,Mayotte,97617,Tsingoni,0160,523,150,"28,68%",373,"71,32%",...,,,,,,,,,Tsingoni - 0160,97617_0160
69288,976,Mayotte,97617,Tsingoni,0161,663,307,"46,30%",356,"53,70%",...,,,,,,,,,Tsingoni - 0161,97617_0161


In [140]:
df_bv_resultats_fin = pd.merge(df_bv_resultats_fin, df_bv[['codeBureauVote','id_circo']], on='codeBureauVote', how='left')
df_bv_resultats_fin = df_bv_resultats_fin.drop_duplicates()
df_bv_resultats_fin

Unnamed: 0,id_dep,libDep,id_com,libCom,codeBV,Inscrits,Votants,% Votants,Abstentions,% Abstentions,...,Nom candidat 19,Prénom candidat 19,Sexe candidat 19,Voix 19,% Voix/inscrits 19,% Voix/exprimés 19,Elu 19,libBv,codeBureauVote,id_circo
0,01,Ain,01001,L'Abergement-Clémenciat,0001,662,492,"74,32%",170,"25,68%",...,,,,,,,,L'Abergement-Clémenciat - 0001,01001_0001,01004
1,01,Ain,01002,L'Abergement-de-Varey,0001,228,178,"78,07%",50,"21,93%",...,,,,,,,,L'Abergement-de-Varey - 0001,01002_0001,01005
2,01,Ain,01004,Ambérieu-en-Bugey,0001,1093,752,"68,80%",341,"31,20%",...,,,,,,,,Ambérieu-en-Bugey - 0001,01004_0001,01005
3,01,Ain,01004,Ambérieu-en-Bugey,0002,1077,734,"68,15%",343,"31,85%",...,,,,,,,,Ambérieu-en-Bugey - 0002,01004_0002,01005
4,01,Ain,01004,Ambérieu-en-Bugey,0003,1124,830,"73,84%",294,"26,16%",...,,,,,,,,Ambérieu-en-Bugey - 0003,01004_0003,01005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69480,976,Mayotte,97617,Tsingoni,0074,521,189,"36,28%",332,"63,72%",...,,,,,,,,Tsingoni - 0074,97617_0074,97602
69481,976,Mayotte,97617,Tsingoni,0099,319,122,"38,24%",197,"61,76%",...,,,,,,,,Tsingoni - 0099,97617_0099,97602
69482,976,Mayotte,97617,Tsingoni,0160,523,150,"28,68%",373,"71,32%",...,,,,,,,,Tsingoni - 0160,97617_0160,97602
69483,976,Mayotte,97617,Tsingoni,0161,663,307,"46,30%",356,"53,70%",...,,,,,,,,Tsingoni - 0161,97617_0161,97602


In [141]:
df_bv_resultats_fin.rename(columns={'codeBureauVote': 'id_bv'}, inplace=True)
df_bv_resultats_fin = df_bv_resultats_fin.melt(id_vars=['id_dep','libDep','id_circo','id_com','libCom','codeBV','id_bv','libBv', 'Inscrits', 'Votants', '% Votants', 'Abstentions', '% Abstentions', 'Exprimés', '% Exprimés/inscrits', '% Exprimés/votants', 'Blancs', '% Blancs/inscrits', '% Blancs/votants', 'Nuls', '% Nuls/inscrits', '% Nuls/votants'])
df_bv_resultats_fin.rename(columns={'variable': 'indicateur', 'value': 'valeur'}, inplace=True)
df_bv_resultats_fin

Unnamed: 0,id_dep,libDep,id_circo,id_com,libCom,codeBV,id_bv,libBv,Inscrits,Votants,...,% Exprimés/inscrits,% Exprimés/votants,Blancs,% Blancs/inscrits,% Blancs/votants,Nuls,% Nuls/inscrits,% Nuls/votants,indicateur,valeur
0,01,Ain,01004,01001,L'Abergement-Clémenciat,0001,01001_0001,L'Abergement-Clémenciat - 0001,662,492,...,"71,90%","96,75%",9,"1,36%","1,83%",7,"1,06%","1,42%",Numéro de panneau 1,1
1,01,Ain,01005,01002,L'Abergement-de-Varey,0001,01002_0001,L'Abergement-de-Varey - 0001,228,178,...,"75,00%","96,07%",6,"2,63%","3,37%",1,"0,44%","0,56%",Numéro de panneau 1,1
2,01,Ain,01005,01004,Ambérieu-en-Bugey,0001,01004_0001,Ambérieu-en-Bugey - 0001,1093,752,...,"66,33%","96,41%",20,"1,83%","2,66%",7,"0,64%","0,93%",Numéro de panneau 1,1
3,01,Ain,01005,01004,Ambérieu-en-Bugey,0002,01004_0002,Ambérieu-en-Bugey - 0002,1077,734,...,"66,20%","97,14%",14,"1,30%","1,91%",7,"0,65%","0,95%",Numéro de panneau 1,1
4,01,Ain,01005,01004,Ambérieu-en-Bugey,0003,01004_0003,Ambérieu-en-Bugey - 0003,1124,830,...,"72,60%","98,31%",10,"0,89%","1,20%",4,"0,36%","0,48%",Numéro de panneau 1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11848585,976,Mayotte,97602,97617,Tsingoni,0074,97617_0074,Tsingoni - 0074,521,189,...,"35,32%","97,35%",3,"0,58%","1,59%",2,"0,38%","1,06%",Elu 19,
11848586,976,Mayotte,97602,97617,Tsingoni,0099,97617_0099,Tsingoni - 0099,319,122,...,"36,99%","96,72%",2,"0,63%","1,64%",2,"0,63%","1,64%",Elu 19,
11848587,976,Mayotte,97602,97617,Tsingoni,0160,97617_0160,Tsingoni - 0160,523,150,...,"26,00%","90,67%",6,"1,15%","4,00%",8,"1,53%","5,33%",Elu 19,
11848588,976,Mayotte,97602,97617,Tsingoni,0161,97617_0161,Tsingoni - 0161,663,307,...,"44,95%","97,07%",6,"0,90%","1,95%",3,"0,45%","0,98%",Elu 19,


In [142]:
save_group_to_csv(df_bv_resultats_fin,'id_circo','/kaggle/working/bv/data','resultats')

100%|██████████| 559/559 [02:25<00:00,  3.85it/s]


In [143]:
import zipfile
import os
from IPython.display import FileLink

def zip_dir(directory = os.curdir, file_name = 'stats_legis_full.zip'):
    """
    zip all the files in a directory
    
    Parameters
    _____
    directory: str
        directory needs to be zipped, defualt is current working directory
        
    file_name: str
        the name of the zipped file (including .zip), default is 'directory.zip'
        
    Returns
    _____
    Creates a hyperlink, which can be used to download the zip file)
    """
    os.chdir(directory)
    zip_ref = zipfile.ZipFile(file_name, mode='w')
    for folder, _, files in os.walk(directory):
        for file in files:
            if file_name in file:
                pass
            else:
                zip_ref.write(os.path.join(folder, file))

    return FileLink(file_name)

In [150]:
zip_dir()

In [145]:
%%writefile app.py
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import streamlit as st
st.set_page_config(layout="wide")

# Load the HTML file
def read_html_file(filename):
    with open(filename, 'r') as f:
        return f.read()


# Stats definitions
df_stats_def = pd.read_csv('/kaggle/input/stats-def-csv/stats_def.csv',sep=';')
df_stats_def_demo = df_stats_def[(df_stats_def['Catégorie'] == 'Démographie') | (df_stats_def['Variable'] == 'Nom de la circonscription')]
df_stats_def_acti = df_stats_def[(df_stats_def['Catégorie'] == 'Activité') | (df_stats_def['Variable'] == 'Nom de la circonscription')]
df_stats_def_qualif = df_stats_def[(df_stats_def['Catégorie'] == 'Qualification') | (df_stats_def['Variable'] == 'Nom de la circonscription')]
df_stats_def_secteur = df_stats_def[(df_stats_def['Catégorie'] == 'Secteur') | (df_stats_def['Variable'] == 'Nom de la circonscription')]
df_stats_def_log = df_stats_def[(df_stats_def['Catégorie'] == 'Logements') | (df_stats_def['Variable'] == 'Nom de la circonscription')]
df_stats_def_fam = df_stats_def[(df_stats_def['Catégorie'] == 'Famille') | (df_stats_def['Variable'] == 'Nom de la circonscription')]
df_stats_def_mob = df_stats_def[(df_stats_def['Catégorie'] == 'Mobilité') | (df_stats_def['Variable'] == 'Nom de la circonscription')]
df_stats_def_rev = df_stats_def[(df_stats_def['Catégorie'] == 'Revenus') | (df_stats_def['Variable'] == 'Nom de la circonscription')]
df_stats_def_niv_vie = df_stats_def[(df_stats_def['Catégorie'] == 'Niveau de vie') | (df_stats_def['Variable'] == 'Nom de la circonscription')]
df_stats_def_autres = df_stats_def[(df_stats_def['Catégorie'] == 'Autres') | (df_stats_def['Variable'] == 'Nom de la circonscription')]

    
# 1.Liste des départements
file_path = '/kaggle/working/dataset_dpt_circo_bv_test.csv'
df = pd.read_csv(file_path,low_memory=False)
df.rename(columns={'codeDepartement': 'id_dep','nomDepartement': 'dep_name'}, inplace=True)
df['id_dep'] = df['id_dep'].astype(str)
df['id_circo'] = df['id_circo'].astype(str)
df['id_bv'] = df['id_bv'].astype(str)

df_dpt = df.drop(columns=['id_circo','libCirco','codeCommune','nomCommune','numeroBureauVote','codeBureauVote','id_bv','libBv']).drop_duplicates()
df_dpt['id_dep'] = df_dpt['id_dep'].astype(str)
df_dpt.sort_values(by='id_dep',inplace=True)
df_dpt['id_dep'] = df_dpt['id_dep'].astype(str)
df_dpt['dep_lib'] = df_dpt['id_dep'].str.cat(df_dpt['libDep'], sep = ' - ')

dpt = df_dpt['dep_lib'].drop_duplicates().sort_values()
dpt_selected = st.sidebar.selectbox('Sélection du département:', dpt)
# ID dpt
dpt_id_selected = dpt_selected.split(" - ")[0]

#dpt_id_selected = '14'

file_path_dpt_resultats = '/kaggle/working/dpt/data/resultats_' + dpt_id_selected + '.csv'
dpt_resultats = pd.read_csv(file_path_dpt_resultats,low_memory=False)

# a. Résultats - Stats descriptives - DPT
dpt_resultats_overview = dpt_resultats[['libDepartement', 'Inscrits', 'Votants', '% Votants', 'Abstentions', '% Abstentions', 'Exprimés', '% Exprimés/inscrits', '% Exprimés/votants', 'Blancs', '% Blancs/inscrits', '% Blancs/votants', 'Nuls', '% Nuls/inscrits', '% Nuls/votants']]
dpt_resultats_overview = dpt_resultats_overview.drop_duplicates()

data_container = st.container()
with data_container:
    st.write("Département - Elections législatives:")
    st.dataframe(dpt_resultats_overview,hide_index=True)

# b. Résultats (top10) - DPT
dpt_resultats_details = dpt_resultats[['indicateur','valeur']]
dpt_resultats_details['id_candidat'] = dpt_resultats_details['indicateur'].str[-2:]
dpt_resultats_details['id_candidat'] = dpt_resultats_details['id_candidat'].str.strip()

dpt_resultats_details['indicateur'] = dpt_resultats_details['indicateur'].apply(lambda x: ''.join([i for i in x if not i.isdigit()]))
dpt_resultats_details['indicateur']= dpt_resultats_details['indicateur'].apply(lambda x: x[:-1] if isinstance(x, str) else x)

dpt_resultats_details = dpt_resultats_details.pivot(index ='id_candidat', columns='indicateur', values='valeur')
dpt_resultats_details = pd.DataFrame(dpt_resultats_details.to_records())
dpt_resultats_details["Voix"] = dpt_resultats_details["Voix"].fillna(0).astype(float).round().astype(int)
dpt_resultats_details = dpt_resultats_details.nlargest(10, 'Voix')
dpt_resultats_details = dpt_resultats_details[['Nuance candidat', 'Voix','% Voix/exprimés','% Voix/inscrits']]
dpt_resultats_details = dpt_resultats_details.dropna(axis=0, subset=['Nuance candidat'])

data_container2 = st.container()
with data_container2:
    st.write("Département - Résultats législatives (top10):")
    st.dataframe(dpt_resultats_details,hide_index=True)

    
# 2.Circonscriptions du département sélectionné

# a.Carte
#Read the HTML content from the file
html_content = read_html_file('/kaggle/working/circo/map/map_' + dpt_id_selected + '.html')
# Display the HTML content in Streamlit
map_container1 = st.container()

with map_container1:
    st.write("Circonscriptions:")
    st.components.v1.html(html_content,height=500)

# b.Stats des circonscriptions du département sélectionné
file_path_circo_stats = '/kaggle/working/circo/data/stats_' + dpt_id_selected + '.csv'
df_stats_circo_selected = pd.read_csv(file_path_circo_stats,low_memory=False)

df_stats_circo_demo = df_stats_circo_selected[['Nom de la circonscription','Inscrit_22','pop_légal_19','pop_légal_13','tvar_pop','pop_pole_aav','pop_cour_aav','pop_horsaav','pop_urb','pop_rur_periu','pop_rur_non_periu','age_moyen','dec90','dec75','dec50','dec25','dec10']]
df_stats_circo_acti = df_stats_circo_selected[['Nom de la circonscription','actemp','actcho','inactret','inactetu','inactm14','inactaut','actemp_hom','actcho_hom','inactret_hom','inactetu_hom','inactm14_hom','inactaut_hom','actemp_fem','actcho_fem','inactret_fem','inactetu_fem','inactm14_fem','inactaut_fem']]
df_stats_circo_qualif = df_stats_circo_selected[['Nom de la circonscription','actdip_PEU','actdip_CAP','actdip_BAC','actdip_BAC2','actdip_BAC3','actdip_BAC5','actdip_BAC3P']]
df_stats_circo_secteur = df_stats_circo_selected[['Nom de la circonscription','act_agr','act_art','act_cad','act_int','act_emp','act_ouv','act_cho']]
df_stats_circo_log = df_stats_circo_selected[['Nom de la circonscription','log_res','log_sec','log_vac','proprio','locatai','gratuit','maison','ach90','mfuel']]
df_stats_circo_fam = df_stats_circo_selected[['Nom de la circonscription','men_seul','men_coupae','men_coupse','men_monop','men_sfam','men_seul_com','men_coupse_com','men_coupae_com','men_monop_com','men_complexe_com']]
df_stats_circo_mob = df_stats_circo_selected[['Nom de la circonscription','iranr_com','iranr_dep','iranr_fra','iranr_etr','mobresid','ilt_com','ilt_dep','ilt_fra','ilt_etr','mobtrav','modtrans_aucun','modtrans_pied','modtrans_velo','modtrans_moto','modtrans_voit','modtrans_commun']]
df_stats_circo_niv_vie = df_stats_circo_selected[['Nom de la circonscription','tx_pauvrete60_diff','nivvie_median_diff','part_pauvres_diff','part_modestes_diff','part_medians_diff','part_plutot_aises_diff','part_aises_diff','D1_diff','D9_diff','rpt_D9_D1_diff','tx_pauvrete60_diff_trageRF1','tx_pauvrete60_diff_trageRF2','tx_pauvrete60_diff_trageRF3','tx_pauvrete60_diff_trageRF4','tx_pauvrete60_diff_trageRF5','tx_pauvrete60_diff_trageRF6']]
df_stats_circo_rev = df_stats_circo_selected[['Nom de la circonscription','PACT','PPEN','PPAT','PPSOC','PIMPOT']]
df_stats_circo_autres = df_stats_circo_selected[['Nom de la circonscription','acc_ecole','acc_college','acc_lycee','acc_medecin','acc_dentiste','acc_pharmacie','part_eloig']]

with st.expander("Statistiques descriptives:"):
    tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9, tab10 = st.tabs(["Démographie", "Activité/inactivité:", "Qualification","Secteurs","Logement","Famille","Mobilité","Niveau de vie","Revenus","Autres" ])
    
    with tab1:
        st.dataframe(df_stats_circo_demo,hide_index=True)
        st.dataframe(df_stats_def_demo,hide_index=True)
    with tab2:  
        st.dataframe(df_stats_circo_acti,hide_index=True)
        st.dataframe(df_stats_def_acti,hide_index=True)
    with tab3: 
        st.dataframe(df_stats_circo_qualif,hide_index=True)
        st.dataframe(df_stats_def_qualif,hide_index=True)
    with tab4:
        st.dataframe(df_stats_circo_secteur,hide_index=True)
        st.dataframe(df_stats_def_secteur,hide_index=True)
    with tab5:  
        st.dataframe(df_stats_circo_log,hide_index=True)
        st.dataframe(df_stats_def_log,hide_index=True)
    with tab6:
        st.dataframe(df_stats_circo_fam,hide_index=True)
        st.dataframe(df_stats_def_fam,hide_index=True)
    with tab7: 
        st.dataframe(df_stats_circo_mob,hide_index=True)
        st.dataframe(df_stats_def_mob,hide_index=True)
    with tab8: 
        st.dataframe(df_stats_circo_niv_vie,hide_index=True)
        st.dataframe(df_stats_def_niv_vie,hide_index=True)
    with tab9:  
        st.dataframe(df_stats_circo_rev,hide_index=True)
        st.dataframe(df_stats_def_rev,hide_index=True)
    with tab10:
        st.dataframe(df_stats_circo_autres,hide_index=True)
        st.dataframe(df_stats_def_autres,hide_index=True)

# c.Résultats des circonscriptions du département sélectionné
file_path_circo_resultats = '/kaggle/working/circo/data/resultats_' + dpt_id_selected + '.csv'
df_resultats_circo_selected = pd.read_csv(file_path_circo_resultats,low_memory=False)

    # i. Stats descriptives
circo_resultats_overview = df_resultats_circo_selected[['libCirco', 'Inscrits', 'Votants', '% Votants', 'Abstentions', '% Abstentions', 'Exprimés', '% Exprimés/inscrits', '% Exprimés/votants', 'Blancs', '% Blancs/inscrits', '% Blancs/votants', 'Nuls', '% Nuls/inscrits', '% Nuls/votants']]
circo_resultats_overview = circo_resultats_overview.drop_duplicates()
data_container3 = st.container()
with data_container3:
    st.write("Circonscriptions - Elections législatives:")
    st.dataframe(circo_resultats_overview,hide_index=True)

    # ii. Résultats (top10)
circo_resultats_details = df_resultats_circo_selected[['id_circo','libCirco','indicateur','valeur']]
circo_resultats_details['id_candidat'] = circo_resultats_details['indicateur'].str[-2:]
circo_resultats_details['id_candidat'] = circo_resultats_details['id_candidat'].str.strip()

circo_resultats_details['indicateur'] = circo_resultats_details['indicateur'].apply(lambda x: ''.join([i for i in x if not i.isdigit()]))
circo_resultats_details['indicateur']= circo_resultats_details['indicateur'].apply(lambda x: x[:-1] if isinstance(x, str) else x)

#circo_resultats_details

with st.expander("Circonscriptions - Résultats législatives (top10):"):
    groups = circo_resultats_details.groupby('id_circo')
    for name,group in groups:
        tmp_details_circo = group.pivot(index = ['id_circo','id_candidat'], columns='indicateur', values='valeur')
        tmp_details_circo = pd.DataFrame(tmp_details_circo.to_records())
        tmp_details_circo["Voix"] = tmp_details_circo["Voix"].fillna(0).astype(float).round().astype(int)
        tmp_details_circo = tmp_details_circo.nlargest(10, 'Voix')
        tmp_details_circo = tmp_details_circo[['id_candidat','Nuance candidat', 'Voix','% Voix/exprimés','% Voix/inscrits']]
        tmp_details_circo = tmp_details_circo.dropna(axis=0, subset=['Nuance candidat'])
        st.write(name)
        st.dataframe(tmp_details_circo,hide_index=True)   

# d.Liste des circonscriptions du département sélectionné
df_circo = df[df['id_dep'] == dpt_id_selected].drop(columns=['codeCommune','nomCommune','numeroBureauVote','codeBureauVote','id_bv','libBv']).drop_duplicates()
df_circo['id_circo'] = df_circo['id_circo'].astype(str)
df_circo.sort_values(by='id_circo',inplace=True)
df_circo['circo_lib'] = df_circo['id_circo'].str.cat(df_circo['libCirco'], sep = ' - ')

circo =  df_circo['circo_lib'].drop_duplicates().sort_values()
circo_selected = st.sidebar.selectbox('Sélection de la circonscription:', circo)
# ID circo
circo_id_selected = str(circo_selected).split(" - ")[0]

# 3.Bureaux de votes de la circonscription sélectionnée

# a.Carte
# Read the HTML content from the file
html_content2 = read_html_file('/kaggle/working/bv/map/map_' + circo_id_selected + '.html')
# Display the HTML content in Streamlit
map_container2 = st.container()
with map_container2:
    st.write("Bureaux de vote:")
    st.components.v1.html(html_content2,height=500)

# b.Résultats des circonscriptions du département sélectionné
file_path_bv_resultats = '/kaggle/working/bv/data/resultats_' + circo_id_selected + '.csv'
df_resultats_bv_selected = pd.read_csv(file_path_bv_resultats,low_memory=False)

    # i. Stats descriptives
bv_resultats_overview = df_resultats_bv_selected[['id_bv','libBv', 'Inscrits', 'Votants', '% Votants', 'Abstentions', '% Abstentions', 'Exprimés', '% Exprimés/inscrits', '% Exprimés/votants', 'Blancs', '% Blancs/inscrits', '% Blancs/votants', 'Nuls', '% Nuls/inscrits', '% Nuls/votants']]
bv_resultats_overview = bv_resultats_overview.drop_duplicates()
data_container3 = st.container()
with data_container3:
    st.write("Bureaux de votes - Elections législatives:")
    st.dataframe(bv_resultats_overview,hide_index=True)

    # ii. Résultats (top10)
bv_resultats_details = df_resultats_bv_selected[['id_bv','libBv','indicateur','valeur']]
bv_resultats_details['id_candidat'] = bv_resultats_details['indicateur'].str[-2:]
bv_resultats_details['id_candidat'] = bv_resultats_details['id_candidat'].str.strip()

bv_resultats_details['indicateur'] = bv_resultats_details['indicateur'].apply(lambda x: ''.join([i for i in x if not i.isdigit()]))
bv_resultats_details['indicateur']= bv_resultats_details['indicateur'].apply(lambda x: x[:-1] if isinstance(x, str) else x)



bv =  bv_resultats_details['id_bv'].drop_duplicates().sort_values()
bv_selected = st.sidebar.selectbox('Sélection du bureau de vote:', bv)
# ID BV
bv_id_selected = bv_selected

df_bv = bv_resultats_details[bv_resultats_details['id_bv'] == bv_id_selected]

tmp_details_bv = df_bv.pivot(index = ['id_bv','id_candidat'], columns='indicateur', values='valeur')
tmp_details_bv = pd.DataFrame(tmp_details_bv.to_records())
tmp_details_bv["Voix"] = tmp_details_bv["Voix"].fillna(0).astype(float).round().astype(int)
tmp_details_bv = tmp_details_bv.nlargest(10, 'Voix')
tmp_details_bv = tmp_details_bv[['id_bv','id_candidat','Nuance candidat', 'Voix','% Voix/exprimés','% Voix/inscrits']]
tmp_details_bv = tmp_details_bv.dropna(axis=0, subset=['Nuance candidat'])

data_container4 = st.container()
with data_container4:
    st.write("Bureau de vote sélectionné - Résultats législatives (top10):")
    st.dataframe(tmp_details_bv,hide_index=True)

Writing app.py


In [151]:
!npm install localtunnel

[K[?25hm#########[0m[100;90m.........[0m] / idealTree: [32;40mtiming[0m [35midealTree[0m Completed in 186ms[0m[K
up to date, audited 23 packages in 716ms

3 packages are looking for funding
  run `npm fund` for details

2 [33m[1mmoderate[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.


In [152]:
import urllib
print("Password/Enpoint IP for localtunnel is:",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip("\n"))

Password/Enpoint IP for localtunnel is: 34.91.0.191


In [153]:
!streamlit run app.py &>./logs.txt & npx localtunnel --port 8501

your url is: https://thin-lights-clap.loca.lt
^C
