# Creation DataSet pour les communes

## Bibliotheque utilisé

In [1]:
import pandas as pd
import os
import webbrowser
from ydata_profiling import ProfileReport

## Chargement des données

In [2]:
#creation du dataset
df_pourcent_chomage = pd.read_excel('../AutreDataDepartement/tauxChomage.xlsx').dropna()
df_pourcent_chomage = df_pourcent_chomage.iloc[:, [1, 2]]
df_pourcent_chomage.columns = ['Departement', 'tauxChomage']

df_niveauEtude = pd.read_excel('../AutreDataDepartement/NiveauEtudeJeune2017.xlsx', sheet_name='Figure 1a').dropna()
df_niveauEtude = df_niveauEtude.iloc[:, [1, 2]]
df_niveauEtude.columns = ['Departement', 'niveauEtude']

df_densitePop = pd.read_excel('../AutreDataDepartement/NbPopDepartement2023.xls', sheet_name='2023').dropna()
df_densitePop = df_densitePop.iloc[:, [1, 7]]
df_densitePop.columns = ['Departement', 'nbPersonne']

df_magasinBIO = pd.read_excel('../AutreDataDepartement/MagasinBio2016.xls', sheet_name='Figure 2').dropna()
df_magasinBIO.columns = ['Departement', 'Proximité']

df_PopImmigre = pd.read_excel('../AutreDataDepartement/PopImmigré.xlsx').dropna()
df_PopImmigre = df_PopImmigre.iloc[:, [1, 2]]
df_PopImmigre.columns = ['Departement', 'Immigré']

# Charger les données des résultats présidentiels
df_resultatPresidentielle = pd.read_excel('../resultats-par-niveau-subcom-t2-france-entiere.xlsx').dropna()
df_resultatPresidentielle = df_resultatPresidentielle.iloc[:, [1, 25, 32]]
df_resultatPresidentielle.columns = [ 'Departement', 'Macron', 'Lepen']
df_resultatPresidentielle = df_resultatPresidentielle.groupby('Departement')[['Macron', 'Lepen']].mean().reset_index()

df_concat = pd.merge(df_densitePop, df_resultatPresidentielle, on='Departement')
df_concat = pd.merge(df_concat, df_niveauEtude, on='Departement')
df_concat = pd.merge(df_concat, df_pourcent_chomage, on='Departement')
df_concat = pd.merge(df_concat, df_magasinBIO, on='Departement')
df_concat = pd.merge(df_concat, df_PopImmigre, on='Departement')

#normalisation
df_concat['Immigré'] = df_concat['Immigré'] / df_concat['nbPersonne'] * 100
print(df_concat)

                Departement nbPersonne     Macron      Lepen niveauEtude  \
0                       Ain     671937  50.982366  49.017659        41.4   
1                     Aisne     522791  38.089161  61.910914        29.7   
2                    Allier     332443  48.201987  51.798013        33.4   
3   Alpes-de-Haute-Provence     166654  47.890556  52.109697        37.2   
4              Hautes-Alpes     139942  53.414383  46.585802        41.7   
..                      ...        ...        ...        ...         ...   
89    Territoire de Belfort     136891  45.499208  54.500792        43.3   
90                  Essonne    1316053  58.871340  41.128711        47.5   
91           Hauts-de-Seine    1642002  80.148611  19.851389        71.1   
92             Val-de-Marne    1426748  71.616596  28.383404        56.1   
93               Val-d'Oise    1274374  58.109402  41.890707        45.2   

    tauxChomage Proximité    Immigré  
0           5.5      1.66  11.803488  
1        

In [3]:
#Affichage des informations pertinantes

#print(df_concat.head())
print(df_concat.info())
#print(df_concat.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94 entries, 0 to 93
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Departement  94 non-null     object 
 1   nbPersonne   94 non-null     object 
 2   Macron       94 non-null     float64
 3   Lepen        94 non-null     float64
 4   niveauEtude  94 non-null     object 
 5   tauxChomage  94 non-null     float64
 6   Proximité    94 non-null     object 
 7   Immigré      94 non-null     object 
dtypes: float64(3), object(5)
memory usage: 6.0+ KB
None


In [4]:
#sauvegarde dataset
df_concat.to_excel('Departement.xlsx', index=False)

## Creation de l'analyse des données

In [5]:
#Analyse des données
html_file_path = "rapport_correlations.html"
if os.path.exists(html_file_path):
    webbrowser.open(html_file_path)
else:
    rapport = ProfileReport(df_concat)
    rapport.to_file(html_file_path)
    webbrowser.open(html_file_path)


Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]