In [3]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/contours-france-entiere-latest-v2/contours-france-entiere-latest-v2.geojson
/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.shp
/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.shx
/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.prj
/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.dbf
/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.cpg
/kaggle/input/resultats-definitifs-par-bureau-de-vote/resultats-definitifs-par-bureau-de-vote.xlsx
/kaggle/input/indic-stat-circo/indic-stat-circonscriptions-legislatives-2022.xlsx


In [4]:
!pip install -q streamlit
!pip install geopandas
!pip install folium



**1. Circonscriptions législatives**

In [5]:
!rm -rf /kaggle/working/*

In [6]:
import os

def new_folder(new_folder_path):
    # Check if the folder exists, if not, create it
    if not os.path.exists(new_folder_path):
        os.makedirs(new_folder_path)
        print(f'Created new folder at {new_folder_path}')
    else:
        print(f'Folder {new_folder_path} already exists')
    

# Define the path for the new folder
circo_folder_path = '/kaggle/working/circo/'
new_folder(circo_folder_path)

data_circo_folder_path = '/kaggle/working/circo/data/'
new_folder(data_circo_folder_path)

map_circo_folder_path = '/kaggle/working/circo/map/'
new_folder(map_circo_folder_path)

Created new folder at /kaggle/working/circo/
Created new folder at /kaggle/working/circo/data/
Created new folder at /kaggle/working/circo/map/


In [7]:
# Define the path for the new folder
bv_folder_path = '/kaggle/working/bv/'
new_folder(bv_folder_path)

map_bv_folder_path = '/kaggle/working/bv/map/'
new_folder(map_bv_folder_path)

Created new folder at /kaggle/working/bv/
Created new folder at /kaggle/working/bv/map/


In [8]:
map_bv_folder_path = '/kaggle/working/bv/data/'
new_folder(map_bv_folder_path)

Created new folder at /kaggle/working/bv/data/


a. Cartes

In [9]:
import numpy as np
import geopandas as gpd
import folium
import pandas as pd

# Set filepath
fp_circo = "/kaggle/input/circonscriptions-legislatives-030522/circonscriptions_legislatives_030522.shp"

# Read file using gpd.read_file()
df_circo = gpd.read_file(fp_circo)
df_circo = df_circo.to_crs(epsg=4326)
df_circo['centroid'] = df_circo.geometry.centroid
print(df_circo.crs)
df_circo.head()


  df_circo['centroid'] = df_circo.geometry.centroid


EPSG:4326


Unnamed: 0,id_circo,dep,libelle,geometry,centroid
0,97302,973,Guyane - 2e circonscription,"MULTIPOLYGON (((-54.60236 2.33356, -54.60242 2...",POINT (-53.61144 4.09182)
1,97301,973,Guyane - 1re circonscription,"MULTIPOLYGON (((-51.93691 4.46648, -51.93667 4...",POINT (-52.55558 3.61228)
2,97201,972,Martinique - 1re circonscription,"MULTIPOLYGON (((-61.00495 14.57791, -61.00492 ...",POINT (-60.96320 14.66706)
3,97202,972,Martinique - 2e circonscription,"MULTIPOLYGON (((-61.12889 14.63162, -61.12890 ...",POINT (-61.10999 14.76083)
4,97203,972,Martinique - 3e circonscription,"POLYGON ((-61.03945 14.64265, -61.03940 14.642...",POINT (-61.06897 14.64060)


In [10]:
# Centroid column
df_circo['lat'] = df_circo["centroid"].y
df_circo['lon'] = df_circo["centroid"].x

df_circo['id_circo_len'] = df_circo['id_circo'].str.len()
df_circo['id_circo'] = np.where(df_circo['id_circo_len'] == 5, df_circo['id_circo'], df_circo['id_circo'].astype(str).str[:2] + '0' + df_circo['id_circo'].astype(str).str[2:])

df_circo.head()

Unnamed: 0,id_circo,dep,libelle,geometry,centroid,lat,lon,id_circo_len
0,97302,973,Guyane - 2e circonscription,"MULTIPOLYGON (((-54.60236 2.33356, -54.60242 2...",POINT (-53.61144 4.09182),4.091818,-53.611439,5
1,97301,973,Guyane - 1re circonscription,"MULTIPOLYGON (((-51.93691 4.46648, -51.93667 4...",POINT (-52.55558 3.61228),3.612285,-52.555584,5
2,97201,972,Martinique - 1re circonscription,"MULTIPOLYGON (((-61.00495 14.57791, -61.00492 ...",POINT (-60.96320 14.66706),14.667059,-60.963205,5
3,97202,972,Martinique - 2e circonscription,"MULTIPOLYGON (((-61.12889 14.63162, -61.12890 ...",POINT (-61.10999 14.76083),14.760831,-61.109993,5
4,97203,972,Martinique - 3e circonscription,"POLYGON ((-61.03945 14.64265, -61.03940 14.642...",POINT (-61.06897 14.64060),14.640601,-61.068971,5


In [11]:
df_circo_test = df_circo[df_circo['dep'].isin(['14','43','78'])]
df_circo_test

Unnamed: 0,id_circo,dep,libelle,geometry,centroid,lat,lon,id_circo_len
60,14002,14,Calvados - 2e circonscription,"MULTIPOLYGON (((-0.09779 49.16358, -0.09779 49...",POINT (-0.23039 49.16719),49.167194,-0.230388,4
136,14001,14,Calvados - 1re circonscription,"MULTIPOLYGON (((-0.50179 49.16021, -0.50179 49...",POINT (-0.50910 49.18438),49.184385,-0.509097,4
158,14003,14,Calvados - 3e circonscription,"MULTIPOLYGON (((-0.41112 48.91971, -0.41112 48...",POINT (-0.02012 49.01783),49.01783,-0.020119,4
176,14006,14,Calvados - 6e circonscription,"MULTIPOLYGON (((-0.94426 48.96699, -0.91508 48...",POINT (-0.67385 48.95874),48.958738,-0.673849,4
270,78001,78,Yvelines - 1re circonscription,"MULTIPOLYGON (((2.00891 48.76789, 2.00891 48.7...",POINT (2.08090 48.78946),48.789456,2.080897,4
272,78002,78,Yvelines - 2e circonscription,"MULTIPOLYGON (((1.92481 48.66980, 1.92481 48.6...",POINT (2.05369 48.72890),48.728905,2.053692,4
407,14005,14,Calvados - 5e circonscription,"MULTIPOLYGON (((-0.92435 49.22818, -0.92435 49...",POINT (-0.77003 49.25623),49.256234,-0.770033,4
408,14004,14,Calvados - 4e circonscription,"MULTIPOLYGON (((0.30604 49.11147, 0.29713 49.1...",POINT (0.10671 49.26170),49.261701,0.106712,4
456,78004,78,Yvelines - 4e circonscription,"MULTIPOLYGON (((2.12436 48.87438, 2.12436 48.8...",POINT (2.13774 48.88680),48.886797,2.137738,4
470,78003,78,Yvelines - 3e circonscription,"MULTIPOLYGON (((2.13101 48.81487, 2.11428 48.8...",POINT (2.03799 48.84967),48.849673,2.037992,4


In [12]:
for _, r in df_circo_test.iterrows():
    m = folium.Map(location=[r["lat"],r["lon"]], zoom_start=8, tiles="CartoDB positron")
    # Without simplifying the representation of each borough,
    # the map might not be displayed
    sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.001)
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "blue"})
    folium.Popup(r["libelle"]+ " " + r["id_circo"]).add_to(geo_j)
    geo_j.add_to(m)
    m.save('/kaggle/working/circo/map/map_' +  r["id_circo"] + '.html')

b. Données - https://www.insee.fr/fr/statistiques/6436476?sommaire=6436478

In [13]:
df_circo_stats = pd.read_excel('/kaggle/input/indic-stat-circo/indic-stat-circonscriptions-legislatives-2022.xlsx',
                               sheet_name='indicateurs_circonscriptions',
                               skiprows=7)
df_circo_stats.rename(columns={'circo': 'id_circo'}, inplace=True)

In [14]:
df_circo_stats_test = df_circo_stats[df_circo_stats['id_circo'].str[:2].isin(['14','43','78'])]
df_circo_stats_test.head()

Unnamed: 0,id_circo,Nom de la circonscription,Inscrit_22,pop_légal_19,pop_légal_13,tvar_pop,pop_pole_aav,pop_cour_aav,pop_horsaav,pop_urb,...,PPAT,PPSOC,PIMPOT,acc_ecole,acc_college,acc_lycee,acc_medecin,acc_dentiste,acc_pharmacie,part_eloig
60,14001,Calvados - 1re circonscription,71988,112766,111543,0.2,71.9,28.1,0.0,86.5,...,10.1,6.4,-18.1,98.5,72.8,66.8,89.2,80.6,89.2,2.3
61,14002,Calvados - 2e circonscription,68045,108854,105416,0.5,85.0,15.0,0.0,90.7,...,6.6,8.3,-14.8,97.4,82.1,70.8,95.7,85.4,94.7,1.1
62,14003,Calvados - 3e circonscription,79595,106852,109296,-0.4,18.8,64.9,16.3,27.9,...,7.6,6.7,-13.8,79.4,53.7,35.4,61.4,54.9,60.6,30.5
63,14004,Calvados - 4e circonscription,104420,125987,124737,0.2,32.6,64.1,3.4,56.5,...,12.4,4.6,-17.3,84.8,38.6,17.0,72.2,51.5,66.3,12.1
64,14005,Calvados - 5e circonscription,92374,112916,112476,0.1,14.7,77.6,7.7,44.2,...,9.9,4.5,-16.5,76.4,30.5,19.2,59.2,35.4,56.3,19.3


In [15]:
def save_row_to_csv(row, output_directory='/kaggle/working/circo/data/'):    
    # Define the CSV file path
    filename = f"{output_directory}/data_{row.id_circo}.csv"
    # Save the row to a CSV file
    row.to_csv(filename)

In [16]:
for index, row in df_circo_stats_test.iterrows():
    save_row_to_csv(row)

**2. Bureaux de votes**

a. Cartes

In [17]:
# Set filepath
fp_bv = "/kaggle/input/contours-france-entiere-latest-v2/contours-france-entiere-latest-v2.geojson"

# Read file using gpd.read_file()
df_bv = gpd.read_file(fp_bv)
df_bv = df_bv.to_crs(epsg=4326)

df_bv['codeDepartement'] = np.where(df_bv['codeDepartement'] == 'ZA', '971',
                           np.where(df_bv['codeDepartement'] == 'ZB', '972',
                           np.where(df_bv['codeDepartement'] == 'ZC', '973',
                           np.where(df_bv['codeDepartement'] == 'ZD', '974',
                           np.where(df_bv['codeDepartement'] == 'ZS', '975',
                           np.where(df_bv['codeDepartement'] == 'ZM', '976', df_bv['codeDepartement']))))))


df_bv['id_circo'] = np.where(df_bv['codeDepartement'].str.len() == 3,
                             df_bv['codeDepartement'] + df_bv['codeCirconscription'].astype(str).str[2:],
                             df_bv['codeCirconscription'].astype(str).str[:2] + '0' + df_bv['codeCirconscription'].astype(str).str[2:])



In [18]:
df_bv_test = df_bv[df_bv['codeDepartement'].isin(['14','43','78'])]
df_bv_test.head()

Unnamed: 0,codeDepartement,nomDepartement,codeCirconscription,nomCirconscription,codeCommune,nomCommune,numeroBureauVote,codeBureauVote,id_bv,geometry,id_circo
7776,14,Calvados,1404,4ème circonscription,14001,Ablon,1,14001_0001,14001_1,"POLYGON ((0.29683 49.42021, 0.29679 49.41277, ...",14004
7777,14,Calvados,1405,5ème circonscription,14003,Agy,1,14003_0001,14003_1,"POLYGON ((-0.79124 49.22918, -0.79201 49.22974...",14005
7778,14,Calvados,1406,6ème circonscription,14005,Valambray,1,14005_0001,14005_1,"POLYGON ((-0.16746 49.07795, -0.16808 49.07779...",14006
7779,14,Calvados,1406,6ème circonscription,14005,Valambray,2,14005_0002,14005_2,"POLYGON ((-0.20891 49.08056, -0.20896 49.08074...",14006
7780,14,Calvados,1406,6ème circonscription,14005,Valambray,3,14005_0003,14005_3,"MULTIPOLYGON (((-0.22639 49.07862, -0.22682 49...",14006


In [19]:
df_bv_test['centroid'] = df_bv_test.geometry.centroid
df_bv_test['lat'] = df_bv_test["centroid"].y
df_bv_test['lon'] = df_bv_test["centroid"].x

df_bv_test['bv_lib'] = df_bv_test['nomCommune'].str.cat(df_bv_test['numeroBureauVote'], sep = ' - ')

df_bv_test.head()


  df_bv_test['centroid'] = df_bv_test.geometry.centroid
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice fro

Unnamed: 0,codeDepartement,nomDepartement,codeCirconscription,nomCirconscription,codeCommune,nomCommune,numeroBureauVote,codeBureauVote,id_bv,geometry,id_circo,centroid,lat,lon,bv_lib
7776,14,Calvados,1404,4ème circonscription,14001,Ablon,1,14001_0001,14001_1,"POLYGON ((0.29683 49.42021, 0.29679 49.41277, ...",14004,POINT (0.28710 49.39418),49.394176,0.287097,Ablon - 0001
7777,14,Calvados,1405,5ème circonscription,14003,Agy,1,14003_0001,14003_1,"POLYGON ((-0.79124 49.22918, -0.79201 49.22974...",14005,POINT (-0.77897 49.23756),49.237564,-0.778968,Agy - 0001
7778,14,Calvados,1406,6ème circonscription,14005,Valambray,1,14005_0001,14005_1,"POLYGON ((-0.16746 49.07795, -0.16808 49.07779...",14006,POINT (-0.14437 49.09592),49.095919,-0.144369,Valambray - 0001
7779,14,Calvados,1406,6ème circonscription,14005,Valambray,2,14005_0002,14005_2,"POLYGON ((-0.20891 49.08056, -0.20896 49.08074...",14006,POINT (-0.19051 49.08891),49.088911,-0.190506,Valambray - 0002
7780,14,Calvados,1406,6ème circonscription,14005,Valambray,3,14005_0003,14005_3,"MULTIPOLYGON (((-0.22639 49.07862, -0.22682 49...",14006,POINT (-0.24356 49.08671),49.086706,-0.243563,Valambray - 0003


In [20]:
for _, r in df_bv_test.iterrows():
    m = folium.Map(location=[r["lat"],r["lon"]], zoom_start=10, tiles="CartoDB positron")
    # Without simplifying the representation of each borough,
    # the map might not be displayed
    sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.001)
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": "red"})
    folium.Popup(r["bv_lib"]).add_to(geo_j)
    geo_j.add_to(m)
    m.save('/kaggle/working/bv/map/map_' +  r["codeBureauVote"] + '.html')

In [43]:
export_test = df_bv_test[['codeDepartement','nomDepartement','codeCirconscription','nomCirconscription','id_circo','codeCommune','nomCommune','numeroBureauVote','codeBureauVote','id_bv']]
export_test.to_csv('/kaggle/working/dataset_dpt_circo_bv_test.csv', index=False)

b. Données

In [22]:
df_bv_resultats = pd.read_excel('/kaggle/input/resultats-definitifs-par-bureau-de-vote/resultats-definitifs-par-bureau-de-vote.xlsx')
df_bv_resultats.rename(columns={'Code département': 'codeDepartement'}, inplace=True)


In [31]:
df_bv_resultats['codeDepartement'] = df_bv_resultats['codeDepartement'].astype(str)
df_bv_resultats.rename(columns={'Code commune': 'codeCommune'}, inplace=True)
df_bv_resultats.rename(columns={'Code BV': 'codeBV'}, inplace=True)
df_bv_resultats['codeBureauVote'] = df_bv_resultats['codeCommune'].str.cat(df_bv_resultats['codeBV'], sep = '_')

In [32]:
df_bv_resultats_test = df_bv_resultats[df_bv_resultats['codeDepartement'].isin(['14','43','78'])]

In [33]:
def save_row_to_csv(row, output_directory='/kaggle/working/bv/data/'):    
    # Define the CSV file path
    filename = f"{output_directory}/data_{row.codeBureauVote}.csv"
    # Save the row to a CSV file
    row.to_csv(filename)

In [34]:
for index, row in df_bv_resultats_test.iterrows():
    save_row_to_csv(row)

In [35]:
export_test = df_bv_resultats_test
export_test.to_csv('/kaggle/working/output_bv_resultat_test.csv', index=False)

In [36]:
import zipfile
import os
from IPython.display import FileLink

def zip_dir(directory = os.curdir, file_name = 'directory_test_240622.zip'):
    """
    zip all the files in a directory
    
    Parameters
    _____
    directory: str
        directory needs to be zipped, default is current working directory
        
    file_name: str
        the name of the zipped file (including .zip), default is 'directory.zip'
        
    Returns
    _____
    Creates a hyperlink, which can be used to download the zip file)
    """
    os.chdir(directory)
    zip_ref = zipfile.ZipFile(file_name, mode='w')
    for folder, _, files in os.walk(directory):
        for file in files:
            if file_name in file:
                pass
            else:
                zip_ref.write(os.path.join(folder, file))

    return FileLink(file_name)

In [44]:
zip_dir()

In [38]:
%%writefile app.py
import streamlit as st
import pandas as pd


# 1.Liste des départements
file_path = '/kaggle/working/dataset_dpt_circo_bv_test.csv'
df = pd.read_csv(file_path,low_memory=False)
df.rename(columns={'codeDepartement': 'id_dep','nomDepartement': 'dep_name'}, inplace=True)
df['id_dep'] = df['id_dep'].astype(str)
df['id_circo'] = df['id_circo'].astype(str)
df['id_bv'] = df['id_bv'].astype(str)

df_dpt = df.drop(columns=['id_circo','nomCirconscription','codeCommune','nomCommune','numeroBureauVote','codeBureauVote','id_bv']).drop_duplicates()
df_dpt['id_dep'] = df_dpt['id_dep'].astype(str)
df_dpt.sort_values(by='id_dep',inplace=True)
df_dpt['id_dep'] = df_dpt['id_dep'].astype(str)
df_dpt['dep_lib'] = df_dpt['id_dep'].str.cat(df_dpt['dep_name'], sep = ' - ')

dpt = df_dpt['dep_lib'].drop_duplicates().sort_values()
dpt_selected = st.sidebar.selectbox('Sélection du département:', dpt)
# ID dpt
dpt_id_selected = dpt_selected.split(" - ")[0]


# 2.Liste des circonscriptions du département sélectionné
df_circo = df[df['id_dep'] == dpt_id_selected].drop(columns=['codeCommune','nomCommune','numeroBureauVote','codeBureauVote','id_bv']).drop_duplicates()
df_circo['id_circo'] = df_circo['id_circo'].astype(str)
df_circo.sort_values(by='id_circo',inplace=True)
df_circo['circo_lib'] = df_circo['id_circo'].str.cat(df_circo['nomCirconscription'], sep = ' - ')

circo =  df_circo['circo_lib'].drop_duplicates().sort_values()
circo_selected = st.sidebar.selectbox('Sélection de la circonscription:', circo)
# ID circo
circo_id_selected = str(circo_selected).split(" - ")[0]


# 3.Liste des bureaux de vote de la circonscription sélectionnée
df_bv = df[df['id_circo'] == circo_id_selected].drop_duplicates()
df_bv['codeBureauVote'] = df_bv['codeBureauVote'].astype(str)
df_bv.sort_values(by='codeBureauVote',inplace=True)
df_bv['bv_lib'] = df_bv['codeBureauVote'].str.cat(df_bv['nomCommune'], sep = ' - ')

bv =  df_bv['bv_lib'].drop_duplicates().sort_values()
bv_selected = st.sidebar.selectbox('Sélection du bureau de vote:', bv)
# ID bv
bv_id_selected = str(bv_selected).split(" - ")[0]


# Load the HTML file
def read_html_file(filename):
    with open(filename, 'r') as f:
        return f.read()


# Read the HTML content from the file
html_content = read_html_file('/kaggle/working/circo/map/map_' + circo_id_selected + '.html')
# Display the HTML content in Streamlit
st.components.v1.html(html_content, width=700, height=500)

st.write("")

df_circo_selected = pd.read_csv('/kaggle/working/circo/data/data_' + circo_id_selected + '.csv')
df_circo_selected = df_circo_selected.set_index(df_circo_selected.columns[0]).T
st.dataframe(df_circo_selected)


# Read the HTML content from the file
html_content = read_html_file('/kaggle/working/bv/map/map_' + bv_id_selected + '.html')
# Display the HTML content in Streamlit
st.components.v1.html(html_content, width=700, height=500)


df_bv_selected = pd.read_csv('/kaggle/working/bv/data/data_' + bv_id_selected + '.csv')
df_bv_selected = df_bv_selected.set_index(df_bv_selected.columns[0]).T
st.dataframe(df_bv_selected)


Writing app.py


In [46]:
!npm install localtunnel

[K[?25hm#########[0m[100;90m.........[0m] - idealTree: [32;40mtiming[0m [35midealTree[0m Completed in 100ms[0m[K
up to date, audited 23 packages in 725ms

3 packages are looking for funding
  run `npm fund` for details

2 [33m[1mmoderate[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.


In [47]:
import urllib
print("Password/Enpoint IP for localtunnel is:",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip("\n"))

Password/Enpoint IP for localtunnel is: 34.141.228.49


In [48]:
!streamlit run app.py &>./logs.txt & npx localtunnel --port 8501

your url is: https://social-steaks-drum.loca.lt
^C
