In [1]:
import re
import numpy as np
import pandas as pd
import geopandas as gpd
from src.download_file import download_file
from src.data_lycees import get_data_lycees
from src.data_lycees import merge_data_lycees
from src.data_biblio import get_data_biblio
from src.calcul_distance import calcul_distance
from src.calcul_biblio_rayons import calcul_biblio_rayons
#from plotnine import *
#import folium

pd.set_option('display.width', 120)

In [2]:
lycees_resultats, lycees_ips, annuaire_education = get_data_lycees()
lycees_data = merge_data_lycees(lycees_resultats, lycees_ips, annuaire_education)
lycees_idf = lycees_data[lycees_data['code_region'] == "11"]


In [3]:
(
    lycees_data
    .assign(
        # test = lycees_data['taux_men_gnle'] > -200
    )
    .groupby(["code_region", "libelle_region"]).size()
)

code_region  libelle_region            
11           Ile-de-France                 422
24           Centre-Val de Loire            60
27           Bourgogne-Franche-Comté        82
28           Normandie                      94
32           Hauts-de-France               164
44           Grand Est                     167
52           Pays de la Loire              116
53           Bretagne                       99
75           Nouvelle-Aquitaine            163
76           Occitanie                     165
84           Auvergne-Rhône-Alpes          238
93           Provence-Alpes-Côte d'Azur    141
dtype: int64

In [None]:
p = (
    ggplot(
        lycees_data,
        aes(
            x = "ips_voie_gt",
            y = "taux_men_gnle",
            color = "va_men_gnle"
    )
  ) + 
  geom_point() + 
  geom_smooth(method = 'lm') +
  scale_color_gradient2() +
    facet_wrap("libelle_region") +
    theme_bw()
)

# p.save(height=8, width=12, format='svg')
p.save("plot_region.png", height=8, width=12, format='png')

In [None]:
lycees_idf.info()

In [None]:
(
    lycees_idf
    .assign(
        # test = lycees_data['taux_men_gnle'] > -200
    )
    .groupby(["code_departement", "libelle_departement"]).size()
)

In [None]:
center = [lycees_idf['latitude'].mean(), lycees_idf["longitude"].mean()]
m = folium.Map(location=center, zoom_start=12)

tooltip = folium.GeoJsonTooltip(
    fields=['libelle_etablissement', 'presents_gnle', 'taux_reu_gnle', 'taux_men_gnle', 'ips_voie_gt'],
    aliases=[
        'Nom :', 'Nombre de candidats présents :', 'Taux de réussite :', 'Taux de mention',
        'Indice de position sociale :'
    ],
    localize=True
)

style_function = lambda x: {
    'fillColor': 'red',
    'color': 'blue',
    'weight': .5,
    'fillOpacity': 0.0
}

folium.GeoJson(
    lycees_idf,
    style_function=style_function,
    tooltip=tooltip
).add_to(m)
m

In [4]:
biblio = get_data_biblio()
biblio = biblio.dropna(subset=['ADRES_LATITUDE'])
biblio = biblio[biblio['ADRES_PAYS'].isin(["FR", 'fr'])]
lycees_data.columns

Index(['uai', 'annee', 'presents_gnle', 'taux_reu_gnle', 'va_reu_gnle', 'taux_men_gnle', 'va_men_gnle', 'ips_voie_gt',
       'ecart_type_voie_gt', 'libelle_etablissement', 'code_commune', 'code_departement', 'code_academie',
       'code_region', 'nom_commune', 'libelle_departement', 'libelle_academie', 'libelle_region', 'type_etablissement',
       'statut_public_prive', 'type_contrat_prive', 'restauration', 'hebergement', 'ulis', 'apprentissage', 'segpa',
       'appartenance_education_prioritaire', 'greta', 'pial', 'voie_generale', 'voie_technologique',
       'voie_professionnelle', 'section_arts', 'section_cinema', 'section_theatre', 'section_sport',
       'section_internationale', 'section_europeenne', 'lycee_agricole', 'lycee_militaire', 'lycee_des_metiers',
       'post_bac', 'position', 'latitude', 'longitude'],
      dtype='object')

In [10]:
lycees_data.columns

Index(['uai', 'annee', 'presents_gnle', 'taux_reu_gnle', 'va_reu_gnle', 'taux_men_gnle', 'va_men_gnle', 'ips_voie_gt',
       'ecart_type_voie_gt', 'libelle_etablissement', 'code_commune', 'code_departement', 'code_academie',
       'code_region', 'nom_commune', 'libelle_departement', 'libelle_academie', 'libelle_region', 'type_etablissement',
       'statut_public_prive', 'type_contrat_prive', 'restauration', 'hebergement', 'ulis', 'apprentissage', 'segpa',
       'appartenance_education_prioritaire', 'greta', 'pial', 'voie_generale', 'voie_technologique',
       'voie_professionnelle', 'section_arts', 'section_cinema', 'section_theatre', 'section_sport',
       'section_internationale', 'section_europeenne', 'lycee_agricole', 'lycee_militaire', 'lycee_des_metiers',
       'post_bac', 'position', 'latitude', 'longitude'],
      dtype='object')

In [8]:
lyc_distance = calcul_distance(lycees_data, biblio)
lyc_rayons = calcul_biblio_rayons(lycees_data, biblio)

# Fusion finale 
data_finale = lyc_distance.merge(
    lyc_rayons,
    on="uai",
    suffixes=("", "_rayon")
)

In [9]:
data_finale.head()

Unnamed: 0,uai,annee,presents_gnle,taux_reu_gnle,va_reu_gnle,taux_men_gnle,va_men_gnle,ips_voie_gt,ecart_type_voie_gt,libelle_etablissement,...,lycee_militaire_rayon,lycee_des_metiers_rayon,post_bac_rayon,position_rayon,latitude_rayon,longitude_rayon,nb_biblio_500,nb_biblio_1000,nb_biblio_2000,nb_biblio_5000
0,0010006B,2024,104.0,98.0,1.0,63.0,-5.0,99.2,32.0,Lycée polyvalent Saint-Exupéry,...,0,1,1,POINT (917623.9 6560849.305),46.112484,5.818292,0.0,1.0,2.0,2.0
1,0010010F,2024,145.0,92.0,-5.0,62.0,-7.0,110.2,33.0,Lycée polyvalent du Bugey,...,0,0,0,POINT (909024.7 6521619.905),45.762141,5.68972,1.0,1.0,1.0,1.0
2,0010013J,2024,330.0,99.0,0.0,81.0,-3.0,123.7,35.1,Lycée Lalande,...,0,0,1,POINT (871577.8 6569320.405),46.201967,5.225431,4.0,6.0,10.0,11.0
3,0010014K,2024,234.0,94.0,-4.0,59.0,-13.0,108.6,32.9,Lycée Edgar Quinet,...,0,0,1,POINT (871668.3 6568818.605),46.197427,5.22642,1.0,7.0,8.0,11.0
4,0010016M,2024,233.0,96.0,-2.0,59.0,-12.0,110.6,33.7,Lycée polyvalent Joseph-Marie Carriat,...,0,0,1,POINT (871495.2 6568856.805),46.197815,5.22419,2.0,6.0,8.0,11.0


In [8]:
biblio.geometry.head()


0     POINT (844367.27 6519624.236)
1    POINT (953675.138 6699996.481)
2    POINT (778817.499 6797146.375)
3    POINT (652851.006 6860356.163)
4    POINT (969128.655 6811226.321)
Name: geometry, dtype: geometry

In [None]:
biblio.info()

In [None]:
biblio.head()

In [None]:
(
    biblio
    .assign(
        test = biblio['ADRES_PAYS']
    )
    .groupby(["test"]).size()
)

In [None]:
pd.crosstab(
    biblio['TYPEFAMABES_d'],
    biblio['CONDITIONACCES_d']
)

In [None]:
lycees_75 = lycees_data[lycees_data['code_departement'] == "075"]
biblio_75 = biblio[biblio['ADRES_CODEPOSTAL'].str[0:2] == "75"]

center = [biblio_75.ADRES_LATITUDE.mean(), biblio_75.ADRES_LONGITUDE.mean()]
m = folium.Map(
    location=center, 
    zoom_start=12
    )

tooltip = folium.GeoJsonTooltip(
    fields=['NOMETABLISSEMENT', 'TYPEETABABES_d', 'CONDITIONACCES_d'],
    aliases=['Nom :', 'TYPEETABABES : ', 'CONDITIONACCES_d :'],
    localize=True
)
folium.GeoJson(
    biblio_75,
    tooltip=tooltip, 
    marker=folium.Marker(icon=folium.Icon(color="green")),
).add_to(m)

tooltip = folium.GeoJsonTooltip(
    fields=['libelle_etablissement', 'presents_gnle', 'taux_reu_gnle', 'taux_men_gnle', 'ips_voie_gt'],
    aliases=[
        'Nom :', 'Nombre de candidats présents :', 'Taux de réussite :', 'Taux de mention',
        'Indice de position sociale :'
    ],
    localize=True
)
folium.GeoJson(
    lycees_75,
    tooltip=tooltip, 
    marker=folium.Marker(icon=folium.Icon(color="blue")),
).add_to(m)

m