## Intro

### Scoring Logic Description

1. We start by importing the odis dataframe from a CSV that includes all the relevant datapoint to score and display data
2. We compute the scores for each criteria specific to the commune (independant from subject)
3. We compute the scores for each criteria specific to the subject (dependand from both subject and commune) 
4. We identify all commune<->neighbour pairs (binômes) for each commune within search radius
5. We compute category scores (emploi, logement, education etc...) as an average of the all the scores for a given category
6. For each commune we compare the commune and neighbour category scores and weighted the highest one with category weights defined by subject and then keep the best weighted score for each commune
8. We display result in on a map

In [11]:
# THIS SHOULD BE THE END OF JUPYTER NOTEBOOK EXPORT
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from scipy import stats
import folium as flm #required for gdf.explore()
import shapely as shp
from shapely.wkt import loads
from shapely.geometry import Polygon
from sklearn import preprocessing

### 1. Fetching key indicators from ODIS source file

In [12]:
def init_loading_datasets(odis_file, scores_cat_file, metiers_file, formations_file, ecoles_file, maternites_file, sante_file):
    odis = gpd.GeoDataFrame(gpd.read_parquet(odis_file))
    odis.set_geometry(odis.polygon, inplace=True)
    odis = odis[~odis.polygon.isna()]

    # Index of all scores and their explanations
    scores_cat = pd.read_csv(scores_cat_file)

    #Later we need the code FAP <-> FAP Name used to classify jobs
    codfap_index = pd.read_csv(metiers_file, delimiter=';')

    # Later we need the code formation <-> Formation Name used to classify trainings
    # source: https://www.data.gouv.fr/fr/datasets/liste-publique-des-organismes-de-formation-l-6351-7-1-du-code-du-travail/
    codformations_index = pd.read_csv(formations_file).set_index('codformation')

    # Etablissements scolaires
    annuaire_ecoles = pd.read_parquet(ecoles_file)
    annuaire_ecoles.geometry = annuaire_ecoles.geometry.apply(shp.from_wkb)

    #Annuaire Maternités
    # Source: https://www.data.gouv.fr/fr/datasets/liste-des-maternites-de-france-depuis-2000/
    annuaire_maternites = pd.read_csv(maternites_file, delimiter=';')
    annuaire_maternites.drop_duplicates(subset=['FI_ET'], keep='last', inplace=True)
    annuaire_maternites.head()

    # Annuaire etablissements santé
    # Source: https://www.data.gouv.fr/fr/datasets/reexposition-des-donnees-finess/
    annuaire_sante = pd.read_parquet(sante_file)
    annuaire_sante = annuaire_sante[annuaire_sante.LibelleSph == 'Etablissement public de santé']
    annuaire_sante['geometry'] = gpd.points_from_xy(annuaire_sante.coordxet, annuaire_sante.coordyet, crs='epsg:2154')
    annuaire_sante = pd.merge(annuaire_sante, annuaire_maternites[['FI_ET']], left_on='nofinesset', right_on='FI_ET', how='left', indicator="maternite")
    annuaire_sante.drop(columns=['FI_ET'], inplace=True)
    annuaire_sante.maternite = np.where(annuaire_sante.maternite == 'both', True, False)
    annuaire_sante['codgeo'] = annuaire_sante.Departement + annuaire_sante.Commune

    return odis, scores_cat, codfap_index, codformations_index, annuaire_ecoles, annuaire_sante

### 2. Distance filter + Gathering nearby Communes Scores

In [13]:
# Filtering dataframe based on subject distance preference (to save on compute time later on)
def filter_loc_by_distance(df, distance):
    return df[df.dist_current_loc < distance * 1000]

# Put None as a score in the monome case
def monome_cleanup(df):
    mask = ~df['binome']
    for col in df.columns:
        if col.endswith('_binome'):
            df.loc[mask, col] = None
    return df

In [14]:
def adding_score_voisins(df_search, scores_cat):
    #df_search is the dataframe pre-filtered by location
    #df_source is the dataframe with all the communes
    binome_columns = ['codgeo','libgeo','polygon','epci_code','epci_nom'] + scores_cat[scores_cat.incl_binome]['score'].to_list()+scores_cat[scores_cat.incl_binome]['metric'].to_list()
    binome_columns = list(set(binome_columns) & set(df_search.columns))
    df_binomes = df_search[binome_columns].copy()

    # Adds itself to list of voisins = monome case
    # Note: this code triggers the SettingWithCopyWarning but I don't know how to fix it...
    df_search.codgeo_voisins = df_search.apply(lambda x: np.append(x.codgeo_voisins, x.codgeo), axis=1)

    # Explodes the dataframe to have a row for each voisins + itself
    df_search['codgeo_voisins_copy'] = df_search['codgeo_voisins']
    df_search_exploded = df_search.explode('codgeo_voisins_copy')
    
    # For each commune (codgeo) in search area (df_search) we add all its voisin's scores
    odis_search_exploded = pd.merge(df_search_exploded, df_binomes.add_suffix('_binome'), left_on='codgeo_voisins_copy', right_on='codgeo_binome', how='left')
    
    # Adds a column to identify binomes vs monomes + cleanup
    odis_search_exploded['binome'] = odis_search_exploded.apply(lambda x: False if x.codgeo == x.codgeo_binome else True, axis=1)
    odis_search_exploded.drop(columns={'codgeo_voisins_copy'}, inplace=True)

    #We remove all values for the monome case to avoid accounting for them in the category score calculation
    odis_search_exploded = monome_cleanup(odis_search_exploded)

    return odis_search_exploded

### 3. Criterias Scoring

In [15]:
#Computing distance from current commune 
#Using a crs that allows to compute distance in meters for metropolitan France

def distance_calc(df, ref_point):
    return int(df.distance(ref_point))

def add_distance_to_current_loc(df, current_codgeo):
    projected_crs = "EPSG:2154"
    zone_recherche = gpd.GeoDataFrame(df[df.codgeo == current_codgeo]['polygon'])
    zone_recherche.set_geometry('polygon', inplace=True)
    zone_recherche.to_crs(projected_crs, inplace=True)

    df.to_crs(projected_crs, inplace=True)
    df['dist_current_loc'] = df['polygon'].apply(distance_calc, ref_point=zone_recherche.iloc[0].polygon)
    return df

In [16]:
#Adding score specific to subject looking for a job identified as en besoin
def codes_match(df, codes_list):
    #returns a list of codfaps that matches
    if df is None:
        return []
    return list(set(df.tolist()).intersection(set(codes_list)))

def fap_names_lookup(df):
    return list(codfap_index[codfap_index['Code FAP 341'].isin(df)]['Intitulé FAP 341'])

In [17]:
def compute_criteria_scores(df, prefs): 
    df = df.copy()
    
    # Using QuantileTransfer to normalize all scores between 0 and 1 for the region
    t = preprocessing.QuantileTransformer(output_distribution="uniform")

    #met_ration est le ratio d'offres non-pourvues pour 1000 habitants
    df['met_ratio']= 1000 * df.met/df.pop_be
    df['met_scaled'] = t.fit_transform(df[['met_ratio']].fillna(0))
    #met_tension_ratio est le ratio d'offres population de la zone (pour 1000 habitants)
    df['met_tension_ratio'] = 1000 * df.met_tension/df.pop_be
    df['met_tension_scaled'] = t.fit_transform(df[['met_tension_ratio']].fillna(0))
    #svc_ratio est le ratio de services d'inclusion de la commune (pour 1000 habitants)
    df['svc_incl_ratio'] = 1000 * df.svc_incl_count/df.pop_be
    df['svc_incl_scaled'] = t.fit_transform(df[['svc_incl_ratio']].fillna(0))
    #log_vac_ratio est le ratio de logements vacants de la commune % total logements
    df['log_vac_ratio'] = df.log_vac/df.log_total
    df['log_vac_scaled'] = t.fit_transform(df[['log_vac_ratio']].fillna(0))
    #pol est le score selon la couleur politique (extreme droite = 0, gauche = 1)
    df['pol_scaled'] = df[['pol_num']].astype('float')
    
    if prefs['hebergement'] == "Chez l'habitant":
        #log_5p+_ratio est le ratio de residences principales de 5 pièces ou plus % total residences principales
        df['log_5p_ratio'] = df['rp_5+pieces']/df.log_rp
        df['log_5p_scaled'] = t.fit_transform(df[['log_5p_ratio']].fillna(0))

    if len(prefs['classe_enfants']) > 0: 
        # Risque de fermeture école: ratio de classe à risque de fermeture % nombre d'écoles
        df['risque_fermeture_ratio'] = df.risque_fermeture/df.ecoles_ct
        df['classes_ferm_scaled'] = t.fit_transform(df[['risque_fermeture_ratio']].fillna(0))


    # Subject Specific criterias

    # We compute the distance from the current location 
    df['reloc_dist_scaled'] = (1-df['dist_current_loc']/(prefs['loc_distance_km']*1000))
    df['reloc_epci_scaled'] = np.where(df['epci_code'] == df[df.codgeo == prefs['commune_actuelle']]['epci_code'].iloc[0],1,0)

    #For each adult we look for jobs categories that match what is needed
    i=1
    for adult in range(0,prefs['nb_adultes']):
        if len(prefs['codes_metiers'][adult]) > 0:
            df['met_match_codes_adult'+str(i)] = df.be_codfap_top.apply(codes_match, codes_list=prefs['codes_metiers'][adult])
            df['met_match_adult'+str(i)] = df['met_match_codes_adult'+str(i)].apply(len)
            df['met_match_adult'+str(i)+'_scaled'] = t.fit_transform(df[['met_match_adult'+str(i)]].fillna(0))
            i+=1

    j=1
    for adult in range(0,prefs['nb_adultes']):
        if len(prefs['codes_formations'][adult]) > 0:
            df['form_match_codes_adult'+str(j)] = df.codes_formations.apply(codes_match, codes_list=prefs['codes_formations'][adult])
            df['form_match_adult'+str(j)] = df['form_match_codes_adult'+str(j)].apply(len)
            df['form_match_adult'+str(j)+'_scaled'] = t.fit_transform(df[['form_match_adult'+str(j)]].fillna(0))
        j+=1

    
    return df

### 4. Category Scoring

In [18]:
def compute_cat_scores(df, scores_cat, penalty):
    df = df.copy()
    df_binome = pd.DataFrame()
    columns_in_use = set(df.columns) & set(scores_cat.score)
    columns_in_use_binome = set(df.columns) & set([score+'_binome' for score in scores_cat.score])
    for cat in set(scores_cat.cat):
        cat_scores_indices = [score for score in scores_cat[scores_cat['cat'] == cat]['score'] if score in columns_in_use]
        cat_scores_indices_binome = [score+'_binome' for score in scores_cat[scores_cat['cat'] == cat]['score'] if score+'_binome' in columns_in_use_binome]

        # Efficiently select all relevant rows at once
        cat_scores_df = df[cat_scores_indices]
        for col in cat_scores_indices_binome:
            mask = df[col].notna()
            df_binome[col] = pd.to_numeric(df[col], errors='coerce')
            df_binome.loc[mask, col] = df.loc[mask, col] * (1-penalty) 
            cat_scores_df = pd.concat([cat_scores_df, df_binome[col]], axis=1)
        df[cat + '_cat_score'] = cat_scores_df.astype(float).mean(axis=1)

    return df

### 5. Final Binome Score Weighted

In [19]:
def compute_binome_score_old(df, binome_penalty, prefs):
    scores_col = [col for col in df.columns if col.endswith('_cat_score')]
    max_scores = pd.DataFrame()
    
    for col in scores_col:
        cat_weight = prefs[col.split('_')[0]]
        max_scores[col] = cat_weight * np.where(
            df[col] >= (1-binome_penalty)*df[col+'_binome'],
            df[col],
            (1-binome_penalty)*df[col+'_binome']
            )
    
    return max_scores.mean(axis=1).round(1)


In [20]:
def compute_binome_score(df, scores_cat, prefs):
    scores_cat_col = [col for col in df.columns if col.endswith('_cat_score')]
    weighted_scores = pd.DataFrame()
    for col in scores_cat_col:
        cat_weight =  prefs['poids_'+col.split('_')[0]]
        weighted_scores[col] = cat_weight * df[col]
    
    return weighted_scores.astype(float).mean(axis=1)

In [21]:
def best_score_compute(df):
    #Keeping the best (top #1) monome or binome result for each commune
    best = df.sort_values('weighted_score', ascending=False).groupby('codgeo').head(1)
    return best

In [22]:
#Main function that aggregates most of the above in one sequence
def compute_odis_score(df, scores_cat, prefs):
    df = add_distance_to_current_loc(df, current_codgeo=prefs['commune_actuelle'])

    # We filter by distance to reduce the compute cost on a smaller odis_search dataframe
    odis_search = filter_loc_by_distance(df, distance=prefs['loc_distance_km'])

    # We compute the subject specific scores
    odis_scored = compute_criteria_scores(odis_search, prefs=prefs)

    # We add the criteria scores for all neighbor communes forming monomes and binomes
    odis_exploded = adding_score_voisins(odis_scored, scores_cat)

    # We compute the category scores for both the target and the binome
    odis_exploded = compute_cat_scores(odis_exploded, scores_cat=scores_cat, penalty=prefs['binome_penalty'])

    # We computing the final weighted score for all commune<->voisin combinations
    odis_exploded['weighted_score'] = compute_binome_score(odis_exploded, scores_cat=scores_cat, prefs=prefs)

    # We keep best monome or binome for each commune 
    odis_search_best = best_score_compute(odis_exploded)

    return odis_search_best


### 6. Generating Narrative

Here we want to generate a 'human readable' explanation about why scored high a given location.
Things to show:
- Target commune name and EPCI
- Weighted Score
- If Binome, show the binome and EPCI if different from target
- Show top 3 criterias target (weighted ?) 
- Show top 3 criterias for binome (weighted ?)

In [23]:
# THIS SHOULD BE THE END OF JUPYTER NOTEBOOK EXPORT

## Export to Python file for streamlit

In [14]:
%save -f -r ../streamlit/odis_stream2_scoring.py 1-13
# This saves the cells 0 to 22 (and their execution history unfortunately) to a python file that I can use in Streamlit
# Make sure to restart before running this cell
# Don't forget to restart streamlit after this

The following commands were written to file `../streamlit/odis_stream2_scoring.py`:
# THIS SHOULD BE THE END OF JUPYTER NOTEBOOK EXPORT
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from scipy import stats
import folium as flm #required for gdf.explore()
import shapely as shp
from shapely.wkt import loads
from shapely.geometry import Polygon
from sklearn import preprocessing
def init_loading_datasets(odis_file, scores_cat_file, metiers_file, formations_file, ecoles_file, maternites_file, sante_file):
    odis = gpd.GeoDataFrame(gpd.read_parquet(odis_file))
    odis.set_geometry(odis.polygon, inplace=True)
    odis = odis[~odis.polygon.isna()]

    # Index of all scores and their explanations
    scores_cat = pd.read_csv(scores_cat_file)

    #Later we need the code FAP <-> FAP Name used to classify jobs
    codfap_index = pd.read_csv(metiers_file, delimiter=';')

    # Later we need the code formation <-> Formation Name used to classify 

In [None]:
# Restart and run all the cells above this one

## Notebook explorations

In [None]:
# Init
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 100)

ODIS_FILE = '../csv/odis_june_2025_jacques.parquet'
SCORES_CAT_FILE = '../csv/odis_scores_cat.csv'
METIERS_FILE = '../csv/dares_nomenclature_fap2021.csv'
FORMATIONS_FILE = '../csv/index_formations.csv'
ECOLES_FILE = '../csv/annuaire_ecoles_france_mini.parquet'
MATERNITE_FILE = '../csv/annuaire_maternites_DREES.csv'
SANTE_FILE = '../csv/annuaire_sante_finess.parquet'

odis, scores_cat, codfap_index, codformations_index, annuaire_ecoles, annuaire_sante = init_loading_datasets(
    odis_file=ODIS_FILE,
    scores_cat_file=SCORES_CAT_FILE,
    metiers_file=METIERS_FILE,
    formations_file=FORMATIONS_FILE,
    ecoles_file=ECOLES_FILE,
    maternites_file=MATERNITE_FILE,
    sante_file=SANTE_FILE
    )


In [25]:
# Subject preferences weighted score computation
prefs = {
    'poids_emploi':2,
    'poids_logement':1,
    'poids_education':1,
    'poids_soutien':1,
    'poids_mobilité':0,
    'commune_actuelle':'33281',
    'hebergement':"Chez l'habitant",
    'logement':'Location',
    'loc_distance_km':100,
    'nb_adultes':1,
    'nb_enfants':1,
    'codes_metiers':[['S1X40','J0X33','A1X41'], ['T4X60','T2A60']],
    'codes_formations':[['423'], ['315','100']],
    'classe_enfants':['Maternelle', 'Collège'],
    'besoin_sante': None,
    'binome_penalty':0.1
}

In [26]:
# Step by Step Execution
from time import time

def performance_tracker(t, text, timer_mode):
    if timer_mode:
        print(str(round(time()-t,2))+'|'+text)
        return time()
t = time()
timer_mode = True

df = odis
score_cat = scores_cat
prefs = prefs
#
df = add_distance_to_current_loc(df, current_codgeo=prefs['commune_actuelle'])
t = performance_tracker(t, 'Add Distance End', timer_mode)

# We filter by distance to reduce the compute cost on a smaller odis_search dataframe
odis_search = filter_loc_by_distance(df, distance=prefs['loc_distance_km'])
t = performance_tracker(t, 'Filter Loc by Distance', timer_mode)

# We compute the subject specific scores
odis_scored = compute_criteria_scores(odis_search, prefs=prefs)
t = performance_tracker(t, 'Compute Subject Score End', timer_mode)

# We add the criteria scores for all neighbor communes forming monomes and binomes
odis_exploded = adding_score_voisins(odis_scored, scores_cat)
t = performance_tracker(t, 'Adding Score Voisin', timer_mode)

# We compute the category scores for both the target and the binome
odis_exploded = compute_cat_scores(odis_exploded, scores_cat=scores_cat, penalty=prefs['binome_penalty'])
t = performance_tracker(t, 'Compute Cat Score End', timer_mode)

# We computing the final weighted score for all commune<->voisin combinations
odis_exploded['weighted_score'] = compute_binome_score(odis_exploded, scores_cat=scores_cat, prefs=prefs)
t = performance_tracker(t, 'Compute Weighted Score End', timer_mode)

# We keep best monome or binome for each commune 
odis_search_best = best_score_compute(odis_exploded)
t = performance_tracker(t, 'Compute Best Score End', timer_mode)


1.04|Add Distance End
0.0|Filter Loc by Distance
0.12|Compute Subject Score End
0.44|Adding Score Voisin
0.12|Compute Cat Score End
0.01|Compute Weighted Score End
0.05|Compute Best Score End


In [27]:
odis_search_best.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 1328 entries, 166 to 5632
Data columns (total 90 columns):
 #   Column                           Non-Null Count  Dtype   
---  ------                           --------------  -----   
 0   codgeo                           1328 non-null   object  
 1   libgeo                           1328 non-null   object  
 2   typecom                          1328 non-null   object  
 3   reg_code                         1328 non-null   object  
 4   reg_nom                          1328 non-null   object  
 5   dep_code                         1328 non-null   object  
 6   dep_nom                          1328 non-null   object  
 7   epci_code                        1328 non-null   object  
 8   epci_nom                         1328 non-null   object  
 9   niveau_equipements_services      1328 non-null   float64 
 10  academie_code                    1328 non-null   object  
 11  code_postal                      1328 non-null   object  
 12  c

In [91]:
liste_classe_enfants = ['Maternelle', 'Collège']



def filter_ecoles_by_distance(_current_geo, annuaire_ecoles, prefs):
    # we go twice the distance around the selected geo 
    MAX_DISTANCE_M = prefs['loc_distance_km']*1000
    annuaire_ecoles = gpd.GeoDataFrame(annuaire_ecoles, geometry='geometry', crs='EPSG:4326')
    annuaire_ecoles.to_crs("EPSG:2154", inplace=True)
    current_geo = gpd.GeoDataFrame(_current_geo[['codgeo','polygon']], geometry='polygon')
    current_geo.to_crs("EPSG:2154", inplace=True)

    filtered_ecoles = gpd.sjoin_nearest(
        annuaire_ecoles,
        current_geo,
        how="left",
        max_distance=MAX_DISTANCE_M,
        distance_col="distance_to_current_geo" # Column to store computed distance
    )
    filtered_ecoles=filtered_ecoles[filtered_ecoles['distance_to_current_geo'].notna()]
    return filtered_ecoles


def filter_ecoles(_current_geo, annuaire_ecoles, prefs):
    # we consider all the etablissements soclaires in the target codgeos and the ones around (voisins)
    target_codgeos = set(
        odis_search_best.codgeo.tolist() 
        +[x for y in odis_search_best.codgeo_voisins.tolist() for x in y]
        )
    niveaux_enfants = set(liste_classe_enfants)
    if 'Maternelle' in niveaux_enfants:
        maternelle_df = annuaire_ecoles[annuaire_ecoles.ecole_maternelle > 0]
    else:
        maternelle_df = None
    if 'Primaire' in niveaux_enfants:
        primaire_df = annuaire_ecoles[annuaire_ecoles.ecole_elementaire > 0]
    else:
        primaire_df = None
    if 'Collège' in niveaux_enfants:
        college_df = annuaire_ecoles[annuaire_ecoles.type_etablissement == 'Collège']
    else:
        college_df = None
    if 'Lycée' in niveaux_enfants:
        lycee_df = annuaire_ecoles[annuaire_ecoles.type_etablissement == 'Lycée']
    else:
        lycee_df = None    
    annuaire_ecoles = pd.concat([maternelle_df, primaire_df, college_df, lycee_df])
    mask = annuaire_ecoles['code_commune'].isin(target_codgeos)
    filtered_ecoles=annuaire_ecoles[mask]
    return filtered_ecoles

def build_local_ecoles_layer(_current_geo, _annuaire_ecoles, prefs, m):
    t = time()
    category_colors = {
        'Ecole': 'blue',
        'Collège': 'red',
        'Lycée': 'green',
        'default': 'gray' # Fallback color for unexpected categories
        }
    fg_ecoles = flm.FeatureGroup(name="Établissements Scolaires")
    filtered_ecoles = _annuaire_ecoles[_annuaire_ecoles.type_etablissement.isin(['Ecole', 'Collège', 'Lycée'])]
    t = performance_tracker(t, 'filter_ecoles Start', timer_mode)
    filtered_ecoles = filter_ecoles(_current_geo, filtered_ecoles, prefs)
    t = performance_tracker(t, 'filter_ecoles End', timer_mode)
    # Now let's add these schools to the map in the fg_ecoles feature group
    filtered_ecoles = gpd.GeoDataFrame(filtered_ecoles, geometry='geometry', crs='EPSG:4326')
    filtered_ecoles.to_crs("EPSG:2154", inplace=True)
    print(filtered_ecoles.shape)
    t = performance_tracker(t, 'GeoJson Iteration Start', timer_mode)
    for row in filtered_ecoles.itertuples(index=False):
        if row.geometry is None:
            continue
        lat = row.geometry.y
        lon = row.geometry.x

        popup_content = (
            f"<b>{row.nom_etablissement}</b><br>"
            f"Category: {row.type_etablissement}<br>"
            f"Rating: {row.nombre_d_eleves}"
        )
        # Create and add the marker
        marker_colors= category_colors.get(row.type_etablissement, 'blue')
        ecole = flm.Marker(
            location=[lat, lon],
            popup=popup_content,
            tooltip=row.nom_etablissement, # Appears on hover
            # marker=flm.Circle(radius=250, fillColor=marker_colors, fill_opacity=1.0, opacity=0.0, weight=1),
            icon=flm.Icon(color=marker_colors, icon='info-sign')
        ).add_to(m)
    # for index, row in filtered_ecoles.iterrows():
    #     ecole = filtered_ecoles[filtered_ecoles.index==index]
    #     ecole = flm.GeoJson(
    #         data=ecole,
    #         tooltip=flm.GeoJsonTooltip(fields=["nom_etablissement", "type_etablissement", "statut_public_prive", "ecole_maternelle"]),
    #         marker=flm.Circle(radius=250, fillColor=category_colors[ecole['type_etablissement'].iloc[0]], fill_opacity=1.0, opacity=0.0, weight=1),
    #     )
        # fg_ecoles.add_child(ecole)

    t = performance_tracker(t, 'GeoJson Iteration End', timer_mode)
    # print(fg_ecoles)
    return fg_ecoles


In [92]:
current_geo = odis_search_best[odis_search_best.codgeo == prefs['commune_actuelle']]
center_lat, center_lon = 44.8378, -0.5798
# fg_ecoles = flm.FeatureGroup(name="Ecoles")

m = flm.Map(
    location=[center_lat, center_lon], 
    zoom_start=10,
    )

flm.LayerControl().add_to(m)
fg_ecoles = build_local_ecoles_layer(current_geo, annuaire_ecoles, prefs, m)
fg_ecoles.add_to(m)



0.03|filter_ecoles Start
1.22|filter_ecoles End
(1617, 13)
0.01|GeoJson Iteration Start
0.27|GeoJson Iteration End


<folium.map.FeatureGroup at 0x743dd0eac7f0>

In [93]:
m

In [94]:
annuaire_sante

Unnamed: 0,nofinesset,nofinessej,RaisonSociale,RaisonSocialeLongue,ComplementRaisonSociale,ComplementDistribution,NumeroVoie,TypeVoie,LibelleVoie,ComplementVoie,LieuDitBp,Commune,Departement,LibelleDepartement,LigneAcheminement,Telephone,Telecopie,Categorie,LibelleCategorie,CategorieAgregat,LibelleCategorieAgregat,Siret,CodeApe,CodeMft,LibelleMft,CodeSph,LibelleSph,DateOuverture,DateAutorisation,DateMajStructure,NumeroUAI,Emetteur,coordxet,coordyet,sourcecoordet,datemaj,geometry,maternite,codgeo
0,010000024,010780054,CH DE FLEYRIAT,CENTRE HOSPITALIER DE BOURG-EN-BRESSE FLEYRIAT,,,900,RTE,DE PARIS,,,451,01,AIN,01440 VIRIAT,0474454647,0474454114,355,Centre Hospitalier (C.H.),1102,Centres Hospitaliers,26010004500012,8610Z,03,ARS établissements Publics de santé dotation g...,1,Etablissement public de santé,1979-02-13,1979-02-13,2020-02-04,,,870199.87500,6571579.0,"1,ATLASANTE,81,BAN_14-05-2025,L93_METROPOLE",2025-05-07,POINT (870199.875 6571579),True,01451
1,010000032,010780062,CH BUGEY SUD,CENTRE HOSPITALIER BUGEY SUD,,,700,AV,DE NARVIK,,BP 139,034,01,AIN,01300 BELLEY,0479425959,0479425996,355,Centre Hospitalier (C.H.),1102,Centres Hospitaliers,26010003700068,8610Z,03,ARS établissements Publics de santé dotation g...,1,Etablissement public de santé,1901-01-01,1901-01-01,2021-07-07,,,908313.31250,6520061.0,"1,ATLASANTE,81,BAN_14-05-2025,L93_METROPOLE",2025-05-07,POINT (908313.312 6520061),True,01034
2,010000065,010780096,CH DE TREVOUX - MONTPENSIER,CENTRE HOSPITALIER DE TREVOUX - MONTPENSIER,,,14,R,DE L'HOPITAL,,BP 615,427,01,AIN,01606 TREVOUX CEDEX,0474105000,0474105019,355,Centre Hospitalier (C.H.),1102,Centres Hospitaliers,26010028400017,8610Z,03,ARS établissements Publics de santé dotation g...,1,Etablissement public de santé,1901-01-01,1901-01-01,2018-01-12,,,837272.31250,6539470.5,"2,ATLASANTE,100,BDADRESSE_v2.2_03-2024,L93_MET...",2025-05-07,POINT (837272.312 6539470.5),False,01427
3,010000081,010780112,CH DU PAYS DE GEX,CENTRE HOSPITALIER DU PAYS DE GEX,,,160,R,MARC PANISSOD,,BP 437,173,01,AIN,01174 GEX CEDEX,0450403838,0450403825,106,"Centre hospitalier, ex Hôpital local",1106,Hôpitaux Locaux,26010010200011,8610Z,03,ARS établissements Publics de santé dotation g...,1,Etablissement public de santé,1901-01-01,1901-01-01,2020-02-04,,,935201.87500,6584824.5,"1,ATLASANTE,100,BDADRESSE_v2.2_03-2024,L93_MET...",2025-05-07,POINT (935201.875 6584824.5),False,01173
4,010000099,010780120,CH DE MEXIMIEUX,CENTRE HOSPITALIER DE MEXIMIEUX,,,13,AV,DU DOCTEUR BOYER,,,244,01,AIN,01800 MEXIMIEUX,0474610177,0474347184,106,"Centre hospitalier, ex Hôpital local",1106,Hôpitaux Locaux,26010013600019,8610Z,03,ARS établissements Publics de santé dotation g...,1,Etablissement public de santé,1945-01-01,1945-01-01,2020-06-30,,,870118.18750,6536469.5,"1,ATLASANTE,85,BAN_14-05-2025,L93_METROPOLE",2025-05-07,POINT (870118.188 6536469.5),False,01244
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6024,980500755,980500003,INSTITUT DE FORMATION SOINS INFIRMIERS,INSITUT ETUDE ET FORMATION SOINS INFIRMIERS MA...,,,,R,DE L'HOPITAL,,,611,9F,MAYOTTE,97600 MAMOUDZOU,,,300,Ecoles Formant aux Professions Sanitaires,6101,Etablissements de Formation des Personnels San...,,,03,ARS établissements Publics de santé dotation g...,1,Etablissement public de santé,2001-09-01,2001-02-28,2013-06-17,,,524819.87500,8587195.0,"2,ATLASANTE,100,BDADRESSE_v2.2_03-2024,UTM_S38...",2025-05-07,POINT (524819.875 8587195),False,9F611
6025,980501829,980500003,CHM PLATEFORME LOGISTIQUE,,LONGONI,,,,,,LONGONI,610,9F,MAYOTTE,97600 KOUNGOU,0269617928,,355,Centre Hospitalier (C.H.),1102,Centres Hospitaliers,22985001100011,8610Z,03,ARS établissements Publics de santé dotation g...,1,Etablissement public de santé,2020-09-17,2020-09-16,2020-09-17,,,517542.68750,8592117.0,"3,ATLASANTE,100,BDADRESSE_v2.2_03-2024,UTM_S38...",2025-05-07,POINT (517542.688 8592117),False,9F610
6026,980501837,980500003,SITE MARTIAL HENRY DE PETITE-TERRE,SOINS DE SUITE ET DE READAPTATION/ CENTRE DE R...,,,,R,DES JARDINS,,PAMANDZI,615,9F,MAYOTTE,97615 PAMANDZI,,,355,Centre Hospitalier (C.H.),1102,Centres Hospitaliers,,,03,ARS établissements Publics de santé dotation g...,1,Etablissement public de santé,2021-01-01,2019-12-13,2021-09-30,,,530128.00000,8585537.0,"2,ATLASANTE,86,BDADRESSE_v2.2_03-2024,UTM_S38_...",2025-05-07,POINT (530128 8585537),False,9F615
6027,980600027,980600019,HOPITAL DE SIA,,,,,,,,BP 4 - MATA'UTU,613,9J,WALLIS ET FUTUNA,98600 UVEA,,,355,Centre Hospitalier (C.H.),1102,Centres Hospitaliers,13000323900014,,99,Indéterminé,1,Etablissement public de santé,2024-10-30,2024-10-30,2024-10-30,,,589277.37500,8531397.0,"4,ATLASANTE,.,MAPS 06-11-2024,UTM_S1_WALLIS_FU...",2025-05-07,POINT (589277.375 8531397),False,9J613


In [None]:
mylist = ','.join(odis[odis.dep_code == '78'].codes_formations)

In [None]:
formations.head()

In [None]:
hebergement_options = ["Chez l'habitant", 'Location', 'Foyer']
hebergement_index = hebergement_options.index("Chez l'habitant")
hebergement_index

In [None]:
codfap_index.head()

In [None]:
codfap_index[codfap_index['Intitulé FAP 341'].str.contains('menuis')]['Intitulé FAP 341']

In [9]:
fg_dict = {
    'test':1,
    'Top1':2,
    'Top3':3
}
[k for k in fg_dict.keys() if k.startswith('Top')]

['Top1', 'Top3']

In [None]:
annuaire_sante.value_counts('maternite')

In [None]:
annuaire_sante2[annuaire_sante2.maternite][['RaisonSociale','LibelleCategorieAgregat','geometry']].explore(column='LibelleCategorieAgregat', categorical=True)

In [None]:
scores_cat_prefs=scores_cat.copy()
for index, row in scores_cat.iterrows():
    if row.loc['incl_binome']:
        row_to_add = scores_cat.loc[[index]]
        row_to_add['score'] = row_to_add['score'] + '_binome'
        row_to_add['incl_binome'] = False
        scores_cat_prefs = pd.concat([scores_cat_prefs, row_to_add])
scores_cat_prefs['weight'] = scores_cat_prefs['cat'].apply(lambda x: prefs[x])
scores_cat_prefs
# row_to_add

In [None]:
odis_exploded['weighted_score'] = compute_binome_score(odis_exploded, prefs=prefs)
odis_exploded['weighted_score']

In [5]:
def test_func(**kwargs):
    print(kwargs['test'])

test_func(test='hello')

hello


In [None]:

penalty=0.1
df = odis_exploded.copy()
df_binome = pd.DataFrame()
columns_in_use = set(df.columns) & set(scores_cat.score)
columns_in_use_binome = set(df.columns) & set([score+'_binome' for score in scores_cat.score])
for cat in set(scores_cat.cat):
    cat_scores_indices = [score for score in scores_cat[scores_cat['cat'] == cat]['score'] if score in columns_in_use]
    cat_scores_indices_binome = [score+'_binome' for score in scores_cat[scores_cat['cat'] == cat]['score'] if score+'_binome' in columns_in_use_binome]

    # Efficiently select all relevant rows at once
    cat_scores_df = df[cat_scores_indices]
    for col in cat_scores_indices_binome:
        mask = df[col].notna()
        df_binome[col] = pd.to_numeric(df[col], errors='coerce')
        df_binome.loc[mask, col] = df.loc[mask, col] * (1-penalty) 
        cat_scores_df = pd.concat([cat_scores_df, df_binome[col]], axis=1)
    df[cat + '_cat_score'] = 100 * cat_scores_df.astype(float).mean(axis=1)


In [None]:
df.head(1).to_csv('../csv/test.csv')

In [None]:




# We provide the scores columns as a parameter to compute faster
#scores_col = [col for col in odis_exploded.columns if col.endswith('_cat_score')]

# We computing the final weighted score for all commune<->voisin combinations
# odis_exploded['weighted_score'] = compute_binome_score(
#     odis_exploded,
#     binome_penalty=prefs['binome_penalty'],
#     weights=prefs
#     )

# We keep best monome or binome for each commune 
odis_search_best = best_score_compute(odis_exploded)

In [None]:
# Showing results on an interactive map
cols_to_show = (
        ['codgeo','libgeo','weighted_score','binome','libgeo_binome','dist_current_loc','polygon']
        +[col for col in odis_search_best.columns if '_codes_' in col]
        +[col for col in odis_search_best.columns if '_cat_score' in col]
        )
#odis_search_best[cols_to_show].explore('weighted_score', popup=True)
#odis_search_best.plot('weighted_score')
#odis_search_best[cols_to_show].explore('weighted_score', tooltip=True)

In [None]:
scores_cat

## 9. Export to SuperSet

In [None]:
# def concatenate_strings(row):
#   return '{"type": "Feature","geometry":' + shp.to_geojson(row['polygon']) + '}'


# odis_search_best_export = gpd.GeoDataFrame(odis_search_best.copy())
# odis_search_best_export.set_geometry(odis_search_best_export.polygon, crs='EPSG:2154', inplace=True)
# odis_search_best_export.to_crs(epsg=4326, inplace=True)
# odis_search_best_export["polygon_as_json"] = odis_search_best_export.apply(concatenate_strings, axis=1)
# odis_search_best_export.drop(['polygon','polygon_binome'], axis=1, inplace=True)

# cols = ['met_match_codes','met_match_codes_binome','be_codfap_top','be_libfap_top','codgeo_voisins_binome','pitch']
# for col in cols:
#     odis_search_best_export[col] = odis_search_best_export[col].apply(lambda x: x.tolist() if type(x) == np.ndarray else x)

In [None]:
# from sqlalchemy import create_engine, text

# db_host = "localhost"  # Replace with the actual host (e.g., 'superset_db' if in the same Docker network, or 'localhost' if exposed)
# db_port = "5433"  # Replace with the actual port (usually 5432)
# db_user = "superset"  # Replace with the database user (often 'superset')
# db_password = "superset"  # Replace with the database password
# db_name = "examples"  # Replace with the database name (often 'superset')

# engine = create_engine(f'postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')

In [None]:
# table_name = "odis_stream2_result"  # Choose a name for the table in PostgreSQL
# odis_search_best_export.to_sql(table_name, engine, if_exists='replace', schema='public', index=False)
# sql = text("GRANT SELECT ON odis_stream2_result TO examples")

# with engine.begin() as connection:
#     connection.execute(sql)

# print(f"DataFrame successfully written to table '{table_name}' in the Superset database.")

Note to myself:
Après avoir importé les données dans Postgres il faut donner les droits au user 'examples' sur la table
> docker exec -it superset_db psql -h superset_db -p 5432 -U superset -d examples

> GRANT SELECT ON odis_stream2_result TO examples;

> GRANT USAGE ON SCHEMA public TO examples;
