## Exploration CodeGeo <-> Coordonnées GPS

### Sources
GeoCodes Insee <-> Lat/Lon
- insee-coordonnees-communes-departement-region.csv
- https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/

Population
- insee-estimation-population-2024.csv
- https://www.data.gouv.fr/fr/datasets/estimations-de-population/

Ecoles
- fr-en-ecoles-effectifs-nb_classes
- https://data.education.gouv.fr/explore/dataset/fr-en-ecoles-effectifs-nb_classes/information/



In [109]:
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
import geopandas as gpd
import folium as flm
import mapclassify as mpc
from shapely import wkt

## Fetching Geo location data for communes

### All communes with centroids

In [110]:
df = pd.read_csv('../csv/insee-coordonnees-communes-departement-region.csv')
df = df.rename(columns={"code_commune_insee":"codgeo", "code_postal":"codpost","nom_de_la_commune":"nom"})
df = pd.concat([df, df._geopoint.str.split(',', expand=True).rename(columns={0:'latitude', 1:'longitude'})], axis=1)
df = df[['codgeo', 'codpost', "nom", 'longitude', 'latitude']]
df.codpost = df.codpost.astype(str)
df.loc[:,'codpost'] = np.where(df.codpost.str.len() == 4, '0' + df.codpost, df.codpost)

#df = df.drop_duplicates(subset=['codgeo'], keep='first')

In [111]:
df.head()

Unnamed: 0,codgeo,codpost,nom,longitude,latitude
0,1001,1400,L ABERGEMENT CLEMENCIAT,4.9306005,46.1517018
1,1002,1640,L ABERGEMENT DE VAREY,5.4246442,46.007131
2,1004,1500,AMBERIEU EN BUGEY,5.37056825,45.9574707
3,1005,1330,AMBERIEUX EN DOMBES,4.9118718,45.99922935
4,1006,1300,AMBLEON,5.5927847,45.74831435


In [112]:
df.describe()

Unnamed: 0,codgeo,codpost,nom,longitude,latitude
count,39192,39192,39192,39139.0,39139.0
unique,35067,6328,32758,35022.0,35020.0
top,14654,55300,ST PIERRE EN AUGE,0.0252265,48.9873325
freq,22,46,22,22.0,22.0


In [113]:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))
#gdf.explore()

Let' use arbitrary min/max values to select continental France only 

In [114]:
# used GMaps to identify these coordinates
min_lat = 42
max_lat = 52
min_lon = -5
max_lon = 8

#df_hex = df[df.latitude.between(min_lat, max_lat) & df.longitude.between(min_lon, max_lon)]
gdf = gdf.cx[min_lon:max_lon, min_lat:max_lat]

### Let's add the Communes polygons

In [115]:
communes_polygones = gpd.read_file('../csv/communes-version-simplifiee.geojson').rename(columns={'code':'codgeo','geometry':'polygon'})
communes_polygones = communes_polygones[['codgeo', 'polygon']]

In [116]:
gdf = gdf.join(communes_polygones.set_index('codgeo'), on='codgeo', how='left')
gdf.head()

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,codgeo,codpost,nom,longitude,latitude,geometry,polygon
0,1001,1400,L ABERGEMENT CLEMENCIAT,4.9306005,46.1517018,POINT (4.9306 46.1517),"POLYGON ((4.90457 46.16096, 4.91332 46.1829, 4..."
1,1002,1640,L ABERGEMENT DE VAREY,5.4246442,46.007131,POINT (5.42464 46.00713),"POLYGON ((5.42476 46.03131, 5.44129 46.0254, 5..."
2,1004,1500,AMBERIEU EN BUGEY,5.37056825,45.9574707,POINT (5.37057 45.95747),"POLYGON ((5.38619 45.93093, 5.35724 45.94863, ..."
3,1005,1330,AMBERIEUX EN DOMBES,4.9118718,45.99922935,POINT (4.91187 45.99923),"POLYGON ((4.94287 45.97914, 4.92773 45.98003, ..."
4,1006,1300,AMBLEON,5.5927847,45.74831435,POINT (5.59278 45.74831),"POLYGON ((5.57082 45.75338, 5.58429 45.76259, ..."


Let' s draw a map to get a sense of what we have

In [117]:
gdf.set_geometry('polygon', inplace=True)
#gdf.explore()

In [118]:
gdf.head()

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,codgeo,codpost,nom,longitude,latitude,geometry,polygon
0,1001,1400,L ABERGEMENT CLEMENCIAT,4.9306005,46.1517018,POINT (4.9306 46.1517),"POLYGON ((4.90457 46.16096, 4.91332 46.1829, 4..."
1,1002,1640,L ABERGEMENT DE VAREY,5.4246442,46.007131,POINT (5.42464 46.00713),"POLYGON ((5.42476 46.03131, 5.44129 46.0254, 5..."
2,1004,1500,AMBERIEU EN BUGEY,5.37056825,45.9574707,POINT (5.37057 45.95747),"POLYGON ((5.38619 45.93093, 5.35724 45.94863, ..."
3,1005,1330,AMBERIEUX EN DOMBES,4.9118718,45.99922935,POINT (4.91187 45.99923),"POLYGON ((4.94287 45.97914, 4.92773 45.98003, ..."
4,1006,1300,AMBLEON,5.5927847,45.74831435,POINT (5.59278 45.74831),"POLYGON ((5.57082 45.75338, 5.58429 45.76259, ..."


## Let's fetch population data

In [119]:
pop = pd.read_csv('../csv/insee-estimation-population-2024.csv')

In [120]:
pop.head()

Unnamed: 0,objectid,reg,dep,cv,codgeo,libgeo,p13_pop,p14_pop,p15_pop,p16_pop,p17_pop,p18_pop,p19_pop,p20_pop,p21_pop
0,115658,52,85,8502,85062,Châteauneuf,968.0,993.0,1013.0,1027.0,1056,1085.0,1114.0,1118.0,1134.0
1,115659,26,58,5808,58300,Urzy,1839.0,1835.0,1828.0,1802.0,1775,1749.0,1746.0,1747.0,1742.0
2,115660,43,70,7012,70137,Chassey-lès-Montbozon,218.0,217.0,216.0,215.0,217,215.0,215.0,220.0,225.0
3,115661,21,51,5123,51649,Vitry-le-François,13174.0,13144.0,12805.0,12552.0,12133,11743.0,11376.0,11458.0,11454.0
4,115662,11,78,7811,78638,Vaux-sur-Seine,4749.0,4715.0,4788.0,4857.0,4927,4929.0,5010.0,5020.0,5083.0


In [121]:
pop = pop[["codgeo", "libgeo", "p21_pop", 'p20_pop']]
pop['population'] = np.where(pop.p21_pop.isna(), pop.p20_pop, pop.p21_pop)

Let's join the Commune with Coordinates and the Estimation Population datasets

In [122]:
gdf_pop = gdf.join(pop.set_index('codgeo'), on='codgeo', how='left')

In [123]:
gdf_pop.head()

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,codgeo,codpost,nom,longitude,latitude,geometry,polygon,libgeo,p21_pop,p20_pop,population
0,1001,1400,L ABERGEMENT CLEMENCIAT,4.9306005,46.1517018,POINT (4.9306 46.1517),"POLYGON ((4.90457 46.16096, 4.91332 46.1829, 4...",L' Abergement-Clémenciat,832.0,806.0,832.0
1,1002,1640,L ABERGEMENT DE VAREY,5.4246442,46.007131,POINT (5.42464 46.00713),"POLYGON ((5.42476 46.03131, 5.44129 46.0254, 5...",L' Abergement-de-Varey,267.0,262.0,267.0
2,1004,1500,AMBERIEU EN BUGEY,5.37056825,45.9574707,POINT (5.37057 45.95747),"POLYGON ((5.38619 45.93093, 5.35724 45.94863, ...",Ambérieu-en-Bugey,14854.0,14288.0,14854.0
3,1005,1330,AMBERIEUX EN DOMBES,4.9118718,45.99922935,POINT (4.91187 45.99923),"POLYGON ((4.94287 45.97914, 4.92773 45.98003, ...",Ambérieux-en-Dombes,1897.0,1782.0,1897.0
4,1006,1300,AMBLEON,5.5927847,45.74831435,POINT (5.59278 45.74831),"POLYGON ((5.57082 45.75338, 5.58429 45.76259, ...",Ambléon,113.0,113.0,113.0


Let's check the locations with missing population data

In [124]:
loc_without_pop = gdf_pop[gdf_pop.p21_pop.isna() & gdf_pop.p20_pop.isna()]

In [125]:
loc_without_pop

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,codgeo,codpost,nom,longitude,latitude,geometry,polygon,libgeo,p21_pop,p20_pop,population
5506,14666,14940,SANNERVILLE,-0.2261987999999999,49.18609615,POINT (-0.2262 49.1861),,,,,


So we have 928 values with missing pop data (2020 or 2021)

Let's get a map of France colored with 2021 estimated population

In [126]:
#gdf_pop.explore(column = 'p21_pop', cmap='viridis_r')

## Let's fetch schools datapoints

In [127]:
ecoles = pd.read_parquet('../csv/fr-en-ecoles-effectifs-nb_classes.parquet')

In [128]:
ecoles.head()

Unnamed: 0,rentree_scolaire,region_academique,academie,departement,commune,numero_ecole,denomination_principale,patronyme,secteur,rep,...,nombre_eleves_preelementaire_hors_ulis,nombre_eleves_elementaire_hors_ulis,nombre_eleves_ulis,nombre_eleves_cp_hors_ulis,nombre_eleves_ce1_hors_ulis,nombre_eleves_ce2_hors_ulis,nombre_eleves_cm1_hors_ulis,nombre_eleves_cm2_hors_ulis,tri,code_postal
0,2022,AUVERGNE-ET-RHONE-ALPES,CLERMONT-FERRAND,ALLIER,ABREST,0030701W,ECOLE PRIMAIRE PUBLIQUE,,PUBLIC,0,...,56,108,0,16,22,18,31,21,78-AUVERGNE-ET-RHONE-ALPES-CLERMONT-FERRAND-AL...,3200
1,2022,AUVERGNE-ET-RHONE-ALPES,CLERMONT-FERRAND,ALLIER,AINAY-LE-CHATEAU,0030705A,ECOLE PRIMAIRE,,PUBLIC,0,...,30,69,0,13,10,14,18,14,78-AUVERGNE-ET-RHONE-ALPES-CLERMONT-FERRAND-AL...,3360
2,2022,AUVERGNE-ET-RHONE-ALPES,CLERMONT-FERRAND,ALLIER,ARCHIGNAT,0030709E,ECOLE DE NIVEAU ELEMENTAIRE,DU PARC,PUBLIC,0,...,34,11,0,0,11,0,0,0,78-AUVERGNE-ET-RHONE-ALPES-CLERMONT-FERRAND-AL...,3380
3,2022,AUVERGNE-ET-RHONE-ALPES,CLERMONT-FERRAND,ALLIER,ARFEUILLES,0030711G,ECOLE PRIMAIRE PUBLIQUE,,PUBLIC,0,...,27,26,0,7,5,6,3,5,78-AUVERGNE-ET-RHONE-ALPES-CLERMONT-FERRAND-AL...,3120
4,2022,AUVERGNE-ET-RHONE-ALPES,CLERMONT-FERRAND,ALLIER,ARPHEUILLES-SAINT-PRIEST,0030715L,ECOLE ELEMENTAIRE PUBLIQUE,,PUBLIC,0,...,0,25,0,0,0,0,9,16,78-AUVERGNE-ET-RHONE-ALPES-CLERMONT-FERRAND-AL...,3420


## Let's gather details about adjacent communes

In [129]:
communes_adj = pd.read_csv('../csv/communes_adjacentes_2022.csv')

In [130]:
communes_adj.head()

Unnamed: 0,insee,nom,nb_voisins,insee_voisins,noms_voisins,cap_voisins,type
0,1001,L'Abergement-Clémenciat,6,01412|01093|01028|01146|01351|01188,Sulignat|Châtillon-sur-Chalaronne|Baneins|Domp...,48|144|203|234|260|322,
1,1002,L'Abergement-de-Varey,6,01056|01277|01384|01007|01363|01199,Boyeux-Saint-Jérôme|Nivollet-Montgriffon|Saint...,43|115|172|265|302|344,
2,1004,Ambérieu-en-Bugey,7,01384|01421|01041|01345|01089|01007|01149,Saint-Rambert-en-Bugey|Torcieu|Bettant|Saint-D...,102|151|190|250|284|347|360,
3,1005,Ambérieux-en-Dombes,7,01382|01207|01261|01362|01318|01398|01446,Sainte-Olive|Lapeyrouse|Monthieux|Saint-Jean-d...,43|102|145|193|230|272|310,
4,1006,Ambléon,6,01358|01110|01117|01216|01233|01190,Saint-Germain-les-Paroisses|Colomieu|Conzieu|L...,37|120|164|263|308|341,


In [131]:
communes_adj['liste_voisins'] = communes_adj.insee_voisins.str.split('|')
communes_adj = communes_adj[['insee','nb_voisins','liste_voisins']].rename({'insee':'codgeo'}, axis=1)

In [132]:
communes_adj.head()

Unnamed: 0,codgeo,nb_voisins,liste_voisins
0,1001,6,"[01412, 01093, 01028, 01146, 01351, 01188]"
1,1002,6,"[01056, 01277, 01384, 01007, 01363, 01199]"
2,1004,7,"[01384, 01421, 01041, 01345, 01089, 01007, 01149]"
3,1005,7,"[01382, 01207, 01261, 01362, 01318, 01398, 01446]"
4,1006,6,"[01358, 01110, 01117, 01216, 01233, 01190]"


In [133]:
gdf_pop = gdf_pop.join(communes_adj.set_index('codgeo'), on='codgeo', how='left')
gdf_pop.head()

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,codgeo,codpost,nom,longitude,latitude,geometry,polygon,libgeo,p21_pop,p20_pop,population,nb_voisins,liste_voisins
0,1001,1400,L ABERGEMENT CLEMENCIAT,4.9306005,46.1517018,POINT (4.9306 46.1517),"POLYGON ((4.90457 46.16096, 4.91332 46.1829, 4...",L' Abergement-Clémenciat,832.0,806.0,832.0,6.0,"[01412, 01093, 01028, 01146, 01351, 01188]"
1,1002,1640,L ABERGEMENT DE VAREY,5.4246442,46.007131,POINT (5.42464 46.00713),"POLYGON ((5.42476 46.03131, 5.44129 46.0254, 5...",L' Abergement-de-Varey,267.0,262.0,267.0,6.0,"[01056, 01277, 01384, 01007, 01363, 01199]"
2,1004,1500,AMBERIEU EN BUGEY,5.37056825,45.9574707,POINT (5.37057 45.95747),"POLYGON ((5.38619 45.93093, 5.35724 45.94863, ...",Ambérieu-en-Bugey,14854.0,14288.0,14854.0,7.0,"[01384, 01421, 01041, 01345, 01089, 01007, 01149]"
3,1005,1330,AMBERIEUX EN DOMBES,4.9118718,45.99922935,POINT (4.91187 45.99923),"POLYGON ((4.94287 45.97914, 4.92773 45.98003, ...",Ambérieux-en-Dombes,1897.0,1782.0,1897.0,7.0,"[01382, 01207, 01261, 01362, 01318, 01398, 01446]"
4,1006,1300,AMBLEON,5.5927847,45.74831435,POINT (5.59278 45.74831),"POLYGON ((5.57082 45.75338, 5.58429 45.76259, ...",Ambléon,113.0,113.0,113.0,6.0,"[01358, 01110, 01117, 01216, 01233, 01190]"


## Let's add INSEES Aires d'Attraction as well

In [134]:
gdf_aa=gpd.GeoDataFrame(pd.read_csv('../csv/insee-aires-d-attractions-2017.csv'))
gdf_aa['codgeo'] = np.where(gdf_aa.iloc[:,0].str.len() == 4, '0' + gdf_aa.iloc[:,0], gdf_aa.iloc[:,0])
gdf_aa['codgeo_aa'] = gdf_aa["Code géographique de l'aire d'attraction des villes 2020"]
gdf_aa['name'] = gdf_aa["Libellé géographique de la commune"]
gdf_aa['aa_name'] = gdf_aa["Libellé géographique de l'aire d'attraction des villes 2020"]
gdf_aa['aa_cat'] = gdf_aa["Catégorie de la commune dans le zonage en aires d'attraction des villes 2020"]
gdf_aa = gdf_aa[['codgeo', 'name', 'codgeo_aa', 'aa_name', 'aa_cat']]

In [135]:
gdf_pop = gdf_pop.join(gdf_aa.set_index('codgeo'), on='codgeo', how='left')
gdf_pop.head()

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,codgeo,codpost,nom,longitude,latitude,geometry,polygon,libgeo,p21_pop,p20_pop,population,nb_voisins,liste_voisins,name,codgeo_aa,aa_name,aa_cat
0,1001,1400,L ABERGEMENT CLEMENCIAT,4.9306005,46.1517018,POINT (4.9306 46.1517),"POLYGON ((4.90457 46.16096, 4.91332 46.1829, 4...",L' Abergement-Clémenciat,832.0,806.0,832.0,6.0,"[01412, 01093, 01028, 01146, 01351, 01188]",L'Abergement-Clémenciat,524,Châtillon-sur-Chalaronne,20.0
1,1002,1640,L ABERGEMENT DE VAREY,5.4246442,46.007131,POINT (5.42464 46.00713),"POLYGON ((5.42476 46.03131, 5.44129 46.0254, 5...",L' Abergement-de-Varey,267.0,262.0,267.0,6.0,"[01056, 01277, 01384, 01007, 01363, 01199]",L'Abergement-de-Varey,0,Commune hors attraction des villes,30.0
2,1004,1500,AMBERIEU EN BUGEY,5.37056825,45.9574707,POINT (5.37057 45.95747),"POLYGON ((5.38619 45.93093, 5.35724 45.94863, ...",Ambérieu-en-Bugey,14854.0,14288.0,14854.0,7.0,"[01384, 01421, 01041, 01345, 01089, 01007, 01149]",Ambérieu-en-Bugey,243,Ambérieu-en-Bugey,11.0
3,1005,1330,AMBERIEUX EN DOMBES,4.9118718,45.99922935,POINT (4.91187 45.99923),"POLYGON ((4.94287 45.97914, 4.92773 45.98003, ...",Ambérieux-en-Dombes,1897.0,1782.0,1897.0,7.0,"[01382, 01207, 01261, 01362, 01318, 01398, 01446]",Ambérieux-en-Dombes,2,Lyon,20.0
4,1006,1300,AMBLEON,5.5927847,45.74831435,POINT (5.59278 45.74831),"POLYGON ((5.57082 45.75338, 5.58429 45.76259, ...",Ambléon,113.0,113.0,113.0,6.0,"[01358, 01110, 01117, 01216, 01233, 01190]",Ambléon,286,Belley,20.0


## Let's save all of this as a new CSV

In [136]:
gdf_pop.set_geometry('polygon', inplace=True)
gdf_pop.polygon = gdf_pop.polygon.to_wkt()
gdf_pop.to_csv('../csv/communes-avec-coords-polygons-population-voisins-aa.csv', index=False,  mode='w')

  self[name] = value
