## Intro

### Scoring Logic Description

1. We start by importing the odis dataframe from a CSV that includes all the relevant datapoint to score and display data
2. We compute the scores for each criteria specific to the commune (independant from subject)
3. We compute the scores for each criteria specific to the subject (dependand from both subject and commune) 
4. We identify all commune<->neighbour pairs (binômes) for each commune within search radius
5. We compute category scores (emploi, logement, education etc...) as an average of the all the scores for a given category
6. For each commune we compare the commune and neighbour category scores and weighted the highest one with category weights defined by subject and then keep the best weighted score for each commune
8. We display result in on a map

In [1]:
# THIS SHOULD BE THE END OF JUPYTER NOTEBOOK EXPORT
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from scipy import stats
import folium as flm #required for gdf.explore()
import shapely as shp
from shapely.wkt import loads
from shapely.geometry import Polygon
from sklearn import preprocessing

### 1. Fetching key indicators from ODIS source file

In [2]:
def init_loading_datasets(odis_file, scores_cat_file, metiers_file, formations_file, ecoles_file, maternites_file, sante_file):
    odis = gpd.GeoDataFrame(gpd.read_parquet(odis_file))
    odis.set_geometry(odis.polygon, inplace=True)
    odis = odis[~odis.polygon.isna()]
    odis.set_index('codgeo', inplace=True)

    # Index of all scores and their explanations
    scores_cat = pd.read_csv(scores_cat_file)

    #Later we need the code FAP <-> FAP Name used to classify jobs
    codfap_index = pd.read_csv(metiers_file, delimiter=';')

    # Later we need the code formation <-> Formation Name used to classify trainings
    # source: https://www.data.gouv.fr/fr/datasets/liste-publique-des-organismes-de-formation-l-6351-7-1-du-code-du-travail/
    codformations_index = pd.read_csv(formations_file).set_index('codformation')

    # Etablissements scolaires
    annuaire_ecoles = pd.read_parquet(ecoles_file)
    annuaire_ecoles.geometry = annuaire_ecoles.geometry.apply(shp.from_wkb)

    #Annuaire Maternités
    # Source: https://www.data.gouv.fr/fr/datasets/liste-des-maternites-de-france-depuis-2000/
    annuaire_maternites = pd.read_csv(maternites_file, delimiter=';')
    annuaire_maternites.drop_duplicates(subset=['FI_ET'], keep='last', inplace=True)
    annuaire_maternites.head()

    # Annuaire etablissements santé
    # Source: https://www.data.gouv.fr/fr/datasets/reexposition-des-donnees-finess/
    annuaire_sante = pd.read_parquet(sante_file)
    annuaire_sante = annuaire_sante[annuaire_sante.LibelleSph == 'Etablissement public de santé']
    annuaire_sante['geometry'] = gpd.points_from_xy(annuaire_sante.coordxet, annuaire_sante.coordyet, crs='epsg:2154')
    annuaire_sante = pd.merge(annuaire_sante, annuaire_maternites[['FI_ET']], left_on='nofinesset', right_on='FI_ET', how='left', indicator="maternite")
    annuaire_sante.drop(columns=['FI_ET'], inplace=True)
    annuaire_sante.maternite = np.where(annuaire_sante.maternite == 'both', True, False)
    annuaire_sante['codgeo'] = annuaire_sante.Departement + annuaire_sante.Commune

    return odis, scores_cat, codfap_index, codformations_index, annuaire_ecoles, annuaire_sante

### 2. Distance filter + Gathering nearby Communes Scores

In [3]:
# Filtering dataframe based on subject distance preference (to save on compute time later on)
def filter_loc_by_distance(df, distance):
    return df[df.dist_current_loc < distance * 1000]

# Put None as a score in the monome case
def monome_cleanup(df):
    mask = ~df['binome']
    for col in df.columns:
        if col.endswith('_binome'):
            df.loc[mask, col] = None
    return df

In [4]:
def adding_score_voisins(df_search, scores_cat):
    #df_search is the dataframe pre-filtered by location

    binome_columns = ['codgeo','libgeo','polygon','epci_code','epci_nom'] + scores_cat[scores_cat.incl_binome]['score'].to_list()+scores_cat[scores_cat.incl_binome]['metric'].to_list()
    # We take the subset of possible score columns that actually exist in our dataframe
    binome_columns = list(set(binome_columns) & set(df_search.columns))
    df_binomes = df_search[binome_columns].copy()

    # Adds itself to list of voisins = monome case
    # Note: this code triggers the SettingWithCopyWarning but I don't know how to fix it...
    df_search.codgeo_voisins = df_search.apply(lambda x: np.append(x.codgeo_voisins, x.name), axis=1)

    # Explodes the dataframe to have a row for each voisins + itself
    df_search['codgeo_voisins_copy'] = df_search['codgeo_voisins']
    df_search_exploded = df_search.explode('codgeo_voisins_copy')
    df_search_exploded.rename(columns={'codgeo_voisins_copy':'codgeo_binome'}, inplace=True)
    
    # For each commune (codgeo) in search area (df_search) we add all its voisin's scores
    odis_search_exploded = pd.merge(
        df_search_exploded, 
        df_binomes.add_suffix('_binome'), 
        left_on='codgeo_binome', 
        right_index=True, 
        how='inner', 
        validate="many_to_one")
    
    # Adds a column to identify binomes vs monomes + cleanup
    odis_search_exploded['binome'] = np.where(odis_search_exploded.index == odis_search_exploded.codgeo_binome, True, False)

 
    #We remove all values for the monome case to avoid accounting for them in the category score calculation
    odis_search_exploded = monome_cleanup(odis_search_exploded)

    return odis_search_exploded

### 3. Criterias Scoring

In [5]:
#Computing distance from current commune 
#Using a crs that allows to compute distance in meters for metropolitan France

def distance_calc(df, ref_point):
    return int(df.distance(ref_point))

def add_distance_to_current_loc(df, current_codgeo):
    projected_crs = "EPSG:2154"
    # We first need to change CRS to a projected CRS
    df_projected = gpd.GeoDataFrame(df)
    df_projected = df_projected.to_crs(projected_crs)

    zone_recherche = df_projected[df_projected.index == current_codgeo].copy()
    zone_recherche['centroid'] = zone_recherche.centroid
    zone_recherche = gpd.GeoDataFrame(zone_recherche, geometry='centroid')
    zone_recherche.to_crs(projected_crs, inplace=True)
    
    df_projected = df_projected.sjoin_nearest(zone_recherche, distance_col="dist_current_loc")[['dist_current_loc']]
    df = pd.merge(df, df_projected, left_index=True, right_index=True, how='left')
    
    return df

In [6]:
#Adding score specific to subject looking for a job identified as en besoin
def codes_match(df, codes_list):
    #returns a list of codfaps that matches
    if df is None:
        return []
    return list(set(df.tolist()).intersection(set(codes_list)))

def fap_names_lookup(df):
    return list(codfap_index[codfap_index['Code FAP 341'].isin(df)]['Intitulé FAP 341'])

In [None]:
def compute_criteria_scores(df, prefs): 
    df = df.copy()
    
    # Using QuantileTransfer to normalize all scores between 0 and 1 for the region
    t = preprocessing.QuantileTransformer(output_distribution="uniform")

    #met_ration est le ratio d'offres non-pourvues pour 1000 habitants
    df['met_ratio']= 1000 * df.met/df.pop_be
    df['met_scaled'] = t.fit_transform(df[['met_ratio']].fillna(0))
    #met_tension_ratio est le ratio d'offres population de la zone (pour 1000 habitants)
    df['met_tension_ratio'] = 1000 * df.met_tension/df.pop_be
    df['met_tension_scaled'] = t.fit_transform(df[['met_tension_ratio']].fillna(0))
    #svc_ratio est le ratio de services d'inclusion de la commune (pour 1000 habitants)
    df['svc_incl_ratio'] = 1000 * df.svc_incl_count/df.pop_be
    df['svc_incl_scaled'] = t.fit_transform(df[['svc_incl_ratio']].fillna(0))
    #log_vac_ratio est le ratio de logements vacants de la commune % total logements
    df['log_vac_ratio'] = df.log_vac/df.log_total
    df['log_vac_scaled'] = t.fit_transform(df[['log_vac_ratio']].fillna(0))
    #pol est le score selon la couleur politique (extreme droite = 0, gauche = 1)
    df['pol_scaled'] = df[['pol_num']].astype('float')
    
    if prefs['hebergement'] == "Chez l'habitant":
        #log_5p+_ratio est le ratio de residences principales de 5 pièces ou plus % total residences principales
        df['log_5p_ratio'] = df['rp_5+pieces']/df.log_rp
        df['log_5p_scaled'] = t.fit_transform(df[['log_5p_ratio']].fillna(0))

    if len(prefs['classe_enfants']) > 0: 
        # Risque de fermeture école: ratio de classe à risque de fermeture % nombre d'écoles
        df['risque_fermeture_ratio'] = df.risque_fermeture/df.ecoles_ct
        df['classes_ferm_scaled'] = t.fit_transform(df[['risque_fermeture_ratio']].fillna(0))


    # Subject Specific criterias

    # We compute the distance from the current location 
    df['reloc_dist_scaled'] = (1-df['dist_current_loc']/(prefs['loc_distance_km']*1000))
    df['reloc_epci_scaled'] = np.where(df['epci_code'] == df.loc[prefs['commune_actuelle']]['epci_code'],1,0)

    #For each adult we look for jobs categories that match what is needed
    for adult in range(0,prefs['nb_adultes']):
        if len(prefs['codes_metiers'][adult]) > 0:
            df['met_match_codes_adult'+str(adult+1)] = df.be_codfap_top.apply(codes_match, codes_list=prefs['codes_metiers'][adult])
            df['met_match_adult'+str(adult+1)] = df['met_match_codes_adult'+str(adult+1)].apply(len)
            df['met_match_adult'+str(adult+1)+'_scaled'] = t.fit_transform(df[['met_match_adult'+str(adult+1)]].fillna(0))
    
    #For each adult we look for jobs categories that match what is needed
    for adult in range(0,prefs['nb_adultes']):
        if len(prefs['codes_formations'][adult]) > 0:
            df['form_match_codes_adult'+str(adult+1)] = df.codes_formations.apply(codes_match, codes_list=prefs['codes_formations'][adult])
            df['form_match_adult'+str(adult+1)] = df['form_match_codes_adult'+str(adult+1)].apply(len)
            df['form_match_adult'+str(adult+1)+'_scaled'] = t.fit_transform(df[['form_match_adult'+str(adult+1)]].fillna(0))
            print(adult)


    
    return df

### 4. Category Scoring

In [8]:
def compute_cat_scores(df, scores_cat, penalty):
    df = df.copy()
    df_binome = pd.DataFrame()
    columns_in_use = set(df.columns) & set(scores_cat.score)
    columns_in_use_binome = set(df.columns) & set([score+'_binome' for score in scores_cat.score])
    for cat in set(scores_cat.cat):
        cat_scores_indices = [score for score in scores_cat[scores_cat['cat'] == cat]['score'] if score in columns_in_use]
        cat_scores_indices_binome = [score+'_binome' for score in scores_cat[scores_cat['cat'] == cat]['score'] if score+'_binome' in columns_in_use_binome]

        # Efficiently select all relevant rows at once
        cat_scores_df = df[cat_scores_indices]
        for col in cat_scores_indices_binome:
            mask = df[col].notna()
            df_binome[col] = pd.to_numeric(df[col], errors='coerce')
            df_binome.loc[mask, col] = df.loc[mask, col] * (1-penalty) 
            cat_scores_df = pd.concat([cat_scores_df, df_binome[col]], axis=1)
        df[cat + '_cat_score'] = cat_scores_df.astype(float).mean(axis=1)

    return df

### 5. Final Binome Score Weighted

In [9]:
def compute_binome_score_old(df, binome_penalty, prefs):
    scores_col = [col for col in df.columns if col.endswith('_cat_score')]
    max_scores = pd.DataFrame()
    
    for col in scores_col:
        cat_weight = prefs[col.split('_')[0]]
        max_scores[col] = cat_weight * np.where(
            df[col] >= (1-binome_penalty)*df[col+'_binome'],
            df[col],
            (1-binome_penalty)*df[col+'_binome']
            )
    
    return max_scores.mean(axis=1).round(1)


In [10]:
def compute_binome_score(df, scores_cat, prefs):
    scores_cat_col = [col for col in df.columns if col.endswith('_cat_score')]
    weighted_scores = pd.DataFrame()
    for col in scores_cat_col:
        cat_weight =  prefs['poids_'+col.split('_')[0]]
        weighted_scores[col] = cat_weight * df[col]
    
    return weighted_scores.astype(float).mean(axis=1)

In [11]:
def best_score_compute(df):
    #Keeping the best (top #1) monome or binome result for each commune
    best = df.sort_values('weighted_score', ascending=False).groupby('codgeo').head(1)
    return best

In [12]:
#Main function that aggregates most of the above in one sequence
def compute_odis_score(df, scores_cat, prefs):
    df = add_distance_to_current_loc(df, current_codgeo=prefs['commune_actuelle'])

    # We filter by distance to reduce the compute cost on a smaller odis_search dataframe
    odis_search = filter_loc_by_distance(df, distance=prefs['loc_distance_km'])

    # We compute the subject specific scores
    odis_scored = compute_criteria_scores(odis_search, prefs=prefs)

    # We add the criteria scores for all neighbor communes forming monomes and binomes
    odis_exploded = adding_score_voisins(odis_scored, scores_cat)

    # We compute the category scores for both the target and the binome
    odis_exploded = compute_cat_scores(odis_exploded, scores_cat=scores_cat, penalty=prefs['binome_penalty'])

    # We computing the final weighted score for all commune<->voisin combinations
    odis_exploded['weighted_score'] = compute_binome_score(odis_exploded, scores_cat=scores_cat, prefs=prefs)

    # We keep best monome or binome for each commune 
    odis_search_best = best_score_compute(odis_exploded)

    return odis_search_best


### 6. Generating Narrative

Here we want to generate a 'human readable' explanation about why scored high a given location.
Things to show:
- Target commune name and EPCI
- Weighted Score
- If Binome, show the binome and EPCI if different from target
- Show top 3 criterias target (weighted ?) 
- Show top 3 criterias for binome (weighted ?)

In [13]:
# THIS SHOULD BE THE END OF JUPYTER NOTEBOOK EXPORT

## Export to Python file for streamlit

In [14]:
%save -f -r ../streamlit/odis_stream2_scoring.py 1-13
# This saves the cells 0 to 22 (and their execution history unfortunately) to a python file that I can use in Streamlit
# Make sure to restart before running this cell
# Don't forget to restart streamlit after this

The following commands were written to file `../streamlit/odis_stream2_scoring.py`:
# THIS SHOULD BE THE END OF JUPYTER NOTEBOOK EXPORT
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from scipy import stats
import folium as flm #required for gdf.explore()
import shapely as shp
from shapely.wkt import loads
from shapely.geometry import Polygon
from sklearn import preprocessing
def init_loading_datasets(odis_file, scores_cat_file, metiers_file, formations_file, ecoles_file, maternites_file, sante_file):
    odis = gpd.GeoDataFrame(gpd.read_parquet(odis_file))
    odis.set_geometry(odis.polygon, inplace=True)
    odis = odis[~odis.polygon.isna()]
    odis.set_index('codgeo', inplace=True)

    # Index of all scores and their explanations
    scores_cat = pd.read_csv(scores_cat_file)

    #Later we need the code FAP <-> FAP Name used to classify jobs
    codfap_index = pd.read_csv(metiers_file, delimiter=';')

    # Later we need the code for

In [69]:
# Restart and run all the cells above this one

## Notebook explorations

In [14]:
# Init
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 100)

ODIS_FILE = '../csv/odis_june_2025_jacques.parquet'
SCORES_CAT_FILE = '../csv/odis_scores_cat.csv'
METIERS_FILE = '../csv/dares_nomenclature_fap2021.csv'
FORMATIONS_FILE = '../csv/index_formations.csv'
ECOLES_FILE = '../csv/annuaire_ecoles_france_mini.parquet'
MATERNITE_FILE = '../csv/annuaire_maternites_DREES.csv'
SANTE_FILE = '../csv/annuaire_sante_finess.parquet'

odis, scores_cat, codfap_index, codformations_index, annuaire_ecoles, annuaire_sante = init_loading_datasets(
    odis_file=ODIS_FILE,
    scores_cat_file=SCORES_CAT_FILE,
    metiers_file=METIERS_FILE,
    formations_file=FORMATIONS_FILE,
    ecoles_file=ECOLES_FILE,
    maternites_file=MATERNITE_FILE,
    sante_file=SANTE_FILE
    )


In [15]:
# Subject preferences weighted score computation
prefs = {
    'poids_emploi':2,
    'poids_logement':1,
    'poids_education':1,
    'poids_soutien':1,
    'poids_mobilité':0,
    'commune_actuelle':'75056', # 75056=Paris, 33063=Bordeaux, 18033=Bourges
    'hebergement':"Chez l'habitant",
    'logement':'Location',
    'loc_distance_km':20,
    'nb_adultes':1,
    'nb_enfants':1,
    'codes_metiers':[['S1X40','J0X33','A1X41'], ['T4X60','T2A60']],
    'codes_formations':[['423'], ['315','100']],
    'classe_enfants':['Maternelle', 'Collège'],
    'besoin_sante': None,
    'binome_penalty':0.1
}

In [16]:
# Step by Step Execution
from time import time

def performance_tracker(t, text, timer_mode):
    if timer_mode:
        print(str(round(time()-t,2))+'|'+text)
        return time()
t = time()
timer_mode = True

df = odis
score_cat = scores_cat
prefs = prefs
#
df = add_distance_to_current_loc(df, current_codgeo=prefs['commune_actuelle'])
t = performance_tracker(t, 'Add Distance End', timer_mode)

# We filter by distance to reduce the compute cost on a smaller odis_search dataframe
odis_search = filter_loc_by_distance(df, distance=prefs['loc_distance_km'])
t = performance_tracker(t, 'Filter Loc by Distance', timer_mode)

# We compute the subject specific scores
odis_scored = compute_criteria_scores(odis_search, prefs=prefs)
t = performance_tracker(t, 'Compute Subject Score End', timer_mode)

# We add the criteria scores for all neighbor communes forming monomes and binomes
odis_exploded = adding_score_voisins(odis_scored, scores_cat)
t = performance_tracker(t, 'Adding Score Voisin', timer_mode)

# We compute the category scores for both the target and the binome
odis_exploded = compute_cat_scores(odis_exploded, scores_cat=scores_cat, penalty=prefs['binome_penalty'])
t = performance_tracker(t, 'Compute Cat Score End', timer_mode)

# We computing the final weighted score for all commune<->voisin combinations
odis_exploded['weighted_score'] = compute_binome_score(odis_exploded, scores_cat=scores_cat, prefs=prefs)
t = performance_tracker(t, 'Compute Weighted Score End', timer_mode)

# We keep best monome or binome for each commune 
odis_search_best = best_score_compute(odis_exploded)
t = performance_tracker(t, 'Compute Best Score End', timer_mode)


0.66|Add Distance End
0.0|Filter Loc by Distance
0.09|Compute Subject Score End
0.09|Adding Score Voisin




0.15|Compute Cat Score End
0.01|Compute Weighted Score End
0.01|Compute Best Score End


In [28]:
odis_search_best[odis_search_best.met_match_codes_adult1.str.len() > 0].head(20)

Unnamed: 0_level_0,libgeo,typecom,reg_code,reg_nom,dep_code,dep_nom,epci_code,epci_nom,niveau_equipements_services,academie_code,code_postal,codes_postaux,type_commune_unite_urbaine,population,codze,zone_emploi,latitude_mairie,longitude_mairie,polygon,codbe,libbe,met,met_tension,codfap,be_codfap_top,be_libfap_top,pop_be,codes_formations,noms_formations,svc_incl_count,log_total,log_rp,log_vac,rp_5+pieces,risque_fermeture,ecoles_ct,nuance_politique,famille_nuance,pol_num,codgeo_voisins,nb_voisins,dist_current_loc,met_ratio,met_scaled,met_tension_ratio,met_tension_scaled,svc_incl_ratio,svc_incl_scaled,log_vac_ratio,log_vac_scaled,pol_scaled,log_5p_ratio,log_5p_scaled,risque_fermeture_ratio,classes_ferm_scaled,reloc_dist_scaled,reloc_epci_scaled,met_match_codes_adult1,met_match_adult1,met_match_adult1_scaled,form_match_codes_adult1,form_match_adult1,form_match_adult1_scaled,codgeo_binome,svc_incl_scaled_binome,met_ratio_binome,libgeo_binome,form_match_adult1_scaled_binome,form_match_adult1_binome,log_5p_scaled_binome,log_5p_ratio_binome,met_match_adult1_binome,met_tension_ratio_binome,risque_fermeture_ratio_binome,met_tension_scaled_binome,log_vac_ratio_binome,epci_code_binome,svc_incl_ratio_binome,epci_nom_binome,log_vac_scaled_binome,met_match_adult1_scaled_binome,met_scaled_binome,polygon_binome,classes_ferm_scaled_binome,binome,soutien_cat_score,logement_cat_score,mobilité_cat_score,emploi_cat_score,education_cat_score,weighted_score
codgeo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1
93066,Saint-Denis,COM,11,Île-de-France,93,Seine-Saint-Denis,200054781,Métropole du Grand Paris,4.0,24,93210,"92103, 93456, 93364, 93458, 93522, 93526, 9363...",UNITE URBAINE,113942,75056,1109,48.936001,2.358,"POLYGON ((2.36585 48.90161, 2.35187 48.90153, ...",1147,T6 Plaine Commune,18668,194.0,162.0,"[U1X91, T3X61, L2X60, T1X60, S1X20, T4X60, J3X...","[Artistes (musique, danse, spectacles), Agents...",451934.0,,,189.0,50559.675184,46379.961585,3295.783982,3356.211796,397.0,149.0,LSOC,Gauche,1.0,"[93059, 93072, 93027, 93001, 75056, 93070, 930...",9.0,5054.182488,41.306916,0.89738,0.429266,0.893013,0.418203,0.991266,0.065186,0.637555,1.0,0.072363,0.026201,2.66443,0.454148,0.747291,1,[S1X40],1,0.781659,[],0,0.0,,,,,,,,,,,,,,,,,,,,,,False,0.995633,0.331878,0.873645,0.643013,0.454148,0.613537
94002,Alfortville,COM,11,Île-de-France,94,Val-de-Marne,200054781,Métropole du Grand Paris,3.0,24,94140,"94142, 94709, 94146, 94141, 94143, 94140",UNITE URBAINE,45046,75056,1109,48.805,2.42,"POLYGON ((2.42803 48.77603, 2.41479 48.77727, ...",1150,T11 Grand Paris Sud Est Avenir,8982,40.0,148.0,"[U1X91, T4X60, V5X81, T2B60, S1X20, T1X60, A1X...","[Artistes (musique, danse, spectacles), Agents...",323676.0,,,44.0,23406.643638,21260.288151,1909.285837,1771.060091,56.0,17.0,LUG,Gauche,1.0,"[94046, 94028, 94022, 94081, 94041, 94018, 94002]",6.0,6589.990978,27.749972,0.451965,0.12358,0.222707,0.135938,0.899563,0.08157,0.868996,1.0,0.083304,0.052402,3.294118,0.927948,0.6705,1,[A1X41],1,0.781659,[],0,0.0,,,,,,,,,,,,,,,,,,,,,,False,0.949782,0.460699,0.83525,0.364083,0.927948,0.613319
93059,Pierrefitte-sur-Seine,COM,11,Île-de-France,93,Seine-Saint-Denis,200054781,Métropole du Grand Paris,2.0,24,93380,"93381, 93382, 93380",UNITE URBAINE,32379,75056,1109,48.964001,2.36,"POLYGON ((2.37594 48.97202, 2.36567 48.95826, ...",1147,T6 Plaine Commune,18668,194.0,162.0,"[U1X91, T3X61, L2X60, T1X60, S1X20, T4X60, J3X...","[Artistes (musique, danse, spectacles), Agents...",451934.0,,,24.0,12284.845951,11494.6803,699.150807,1733.011115,48.0,17.0,LSOC,Gauche,1.0,"[95585, 93072, 93066, 93079, 95427, 93059]",5.0,10320.257553,41.306916,0.89738,0.429266,0.893013,0.053105,0.637555,0.056912,0.41048,1.0,0.150766,0.270742,2.823529,0.582969,0.483987,1,[S1X40],1,0.781659,[],0,0.0,,,,,,,,,,,,,,,,,,,,,,False,0.818777,0.340611,0.741994,0.643013,0.582969,0.605677
93072,Stains,COM,11,Île-de-France,93,Seine-Saint-Denis,200054781,Métropole du Grand Paris,3.0,24,93240,"93245, 93241, 93240",UNITE URBAINE,40359,75056,1109,48.955002,2.382,"POLYGON ((2.40738 48.95612, 2.40445 48.95132, ...",1147,T6 Plaine Commune,18668,194.0,162.0,"[U1X91, T3X61, L2X60, T1X60, S1X20, T4X60, J3X...","[Artistes (musique, danse, spectacles), Agents...",451934.0,,,58.0,14956.859598,13991.629841,918.567571,1847.632909,68.0,26.0,LCOM,Gauche,1.0,"[95268, 93030, 93027, 93066, 93059, 95585, 93072]",6.0,10153.190642,41.306916,0.89738,0.429266,0.893013,0.128337,0.868996,0.061414,0.537118,1.0,0.132053,0.20524,2.615385,0.40393,0.49234,1,[S1X40],1,0.781659,[],0,0.0,,,,,,,,,,,,,,,,,,,,,,False,0.934498,0.371179,0.74617,0.643013,0.40393,0.599127
77083,Champs-sur-Marne,COM,11,Île-de-France,77,Seine-et-Marne,200057958,CA Paris - Vallée de la Marne,3.0,24,77420,"77454, 77427, 77437, 77447, 77420, 77436, 7742...",UNITE URBAINE,25695,77108,1106,48.853001,2.603,"POLYGON ((2.57384 48.85338, 2.58733 48.86501, ...",1117,Marne la Vallée,11972,143.0,179.0,"[S1X20, T1X60, V0X60, T3X61, J0X33, T4X60, R2X...",[Aides de cuisine et employés polyvalents de l...,441801.0,,,11.0,11234.319311,10574.484211,491.402988,2222.273036,62.0,20.0,LDVG,Gauche,1.0,"[77108, 77337, 77169, 93051, 93033, 77083]",5.0,16953.741449,27.098173,0.349345,0.323675,0.855895,0.024898,0.467249,0.043741,0.165939,1.0,0.210154,0.436681,3.1,0.851528,0.152313,0,"[J0X33, A1X41]",2,1.0,[],0,0.0,,,,,,,,,,,,,,,,,,,,,,False,0.733624,0.30131,0.076156,0.55131,0.851528,0.597817
91589,Savigny-sur-Orge,COM,11,Île-de-France,91,Essonne,200054781,Métropole du Grand Paris,3.0,25,91600,"91601, 91602, 91600, 91605",UNITE URBAINE,37371,75056,1109,48.68,2.346,"POLYGON ((2.34195 48.66757, 2.33813 48.66737, ...",1129,T12 Grand-Orly Seine Bièvre,21174,139.0,296.0,"[J4X62, J0X33, S1X20, U1X91, V1X80, V0X60, J3X...",[Employés administratifs et commerciaux des tr...,722579.0,,,71.0,17192.100139,15608.808708,1340.070659,5001.670735,56.0,18.0,,,0.0,"[91479, 91027, 91326, 91687, 91434, 91667, 912...",9.0,17134.709086,29.30337,0.532751,0.192367,0.521834,0.098259,0.803493,0.077947,0.825328,0.0,0.320439,0.733624,3.111111,0.862445,0.143265,1,[J0X33],1,0.781659,[],0,0.0,,,,,,,,,,,,,,,,,,,,,,False,0.401747,0.779476,0.571632,0.459061,0.862445,0.592358
93070,Saint-Ouen-sur-Seine,COM,11,Île-de-France,93,Seine-Saint-Denis,200054781,Métropole du Grand Paris,3.0,24,93400,"93582, 93401, 93489, 93589, 93400, 93402, 9348...",UNITE URBAINE,53207,75056,1109,48.911999,2.334,"POLYGON ((2.31982 48.91594, 2.33284 48.92292, ...",1147,T6 Plaine Commune,18668,194.0,162.0,"[U1X91, T3X61, L2X60, T1X60, S1X20, T4X60, J3X...","[Artistes (musique, danse, spectacles), Agents...",451934.0,,,52.0,27630.793402,23977.579551,2839.097953,1486.253441,60.0,25.0,LSOC,Gauche,1.0,"[93066, 75056, 92024, 92004, 93039, 93070]",5.0,4970.684111,41.306916,0.89738,0.429266,0.893013,0.115061,0.842795,0.102751,0.978166,1.0,0.061985,0.0131,2.4,0.231441,0.751466,1,[S1X40],1,0.781659,[],0,0.0,,,,,,,,,,,,,,,,,,,,,,False,0.921397,0.495633,0.875733,0.643013,0.231441,0.5869
94003,Arcueil,COM,11,Île-de-France,94,Val-de-Marne,200054781,Métropole du Grand Paris,2.0,24,94110,"94748, 94112, 94110, 94117, 94743, 94115, 9474...",UNITE URBAINE,21557,75056,1109,48.806,2.337,"POLYGON ((2.34385 48.79766, 2.32394 48.80155, ...",1129,T12 Grand-Orly Seine Bièvre,21174,139.0,296.0,"[J4X62, J0X33, S1X20, U1X91, V1X80, V0X60, J3X...",[Employés administratifs et commerciaux des tr...,722579.0,,,19.0,11169.790952,10159.483542,730.927728,997.749019,35.0,11.0,LVEC,Gauche,1.0,"[94037, 94043, 94076, 94016, 92007, 92049, 94003]",6.0,4852.236782,29.30337,0.532751,0.192367,0.521834,0.026295,0.480349,0.065438,0.641921,1.0,0.098209,0.091703,3.181818,0.899563,0.757388,1,[J0X33],1,0.781659,[],0,0.0,,,,,,,,,,,,,,,,,,,,,,False,0.740175,0.366812,0.878694,0.459061,0.899563,0.584934
93027,La Courneuve,COM,11,Île-de-France,93,Seine-Saint-Denis,200054781,Métropole du Grand Paris,3.0,24,93120,"93123, 93127, 93121, 93122, 93126, 93120",UNITE URBAINE,47160,75056,1109,48.926998,2.39,"POLYGON ((2.42203 48.92963, 2.41349 48.91925, ...",1147,T6 Plaine Commune,18668,194.0,162.0,"[U1X91, T3X61, L2X60, T1X60, S1X20, T4X60, J3X...","[Artistes (musique, danse, spectacles), Agents...",451934.0,,,87.0,16989.416425,16098.281763,777.75741,1706.007019,67.0,25.0,LCOM,Gauche,1.0,"[93030, 93013, 93029, 93008, 93055, 93001, 930...",8.0,7831.802206,41.306916,0.89738,0.429266,0.893013,0.192506,0.951965,0.045779,0.200873,1.0,0.105974,0.113537,2.68,0.473799,0.60841,1,[S1X40],1,0.781659,[],0,0.0,,,,,,,,,,,,,,,,,,,,,,False,0.975983,0.157205,0.804205,0.643013,0.473799,0.578603
94076,Villejuif,COM,11,Île-de-France,94,Val-de-Marne,200054781,Métropole du Grand Paris,3.0,24,94800,"94805, 94813, 94804, 94802, 94814, 94815, 9480...",UNITE URBAINE,57753,75056,1109,48.792,2.363,"POLYGON ((2.34385 48.79766, 2.34778 48.8015, 2...",1129,T12 Grand-Orly Seine Bièvre,21174,139.0,296.0,"[J4X62, J0X33, S1X20, U1X91, V1X80, V0X60, J3X...",[Employés administratifs et commerciaux des tr...,722579.0,,,58.0,28348.421889,25590.077767,1779.25079,2950.929266,70.0,24.0,"LCOM,LDVG",Gauche,1.0,"[94041, 94081, 94021, 94038, 94016, 94003, 940...",7.0,5675.569129,29.30337,0.532751,0.192367,0.521834,0.080268,0.759825,0.062764,0.567686,1.0,0.115315,0.161572,2.916667,0.679039,0.716222,1,[J0X33],1,0.781659,[],0,0.0,,,,,,,,,,,,,,,,,,,,,,False,0.879913,0.364629,0.858111,0.459061,0.679039,0.568341


## 9. Export to SuperSet

In [None]:
# def concatenate_strings(row):
#   return '{"type": "Feature","geometry":' + shp.to_geojson(row['polygon']) + '}'


# odis_search_best_export = gpd.GeoDataFrame(odis_search_best.copy())
# odis_search_best_export.set_geometry(odis_search_best_export.polygon, crs='EPSG:2154', inplace=True)
# odis_search_best_export.to_crs(epsg=4326, inplace=True)
# odis_search_best_export["polygon_as_json"] = odis_search_best_export.apply(concatenate_strings, axis=1)
# odis_search_best_export.drop(['polygon','polygon_binome'], axis=1, inplace=True)

# cols = ['met_match_codes','met_match_codes_binome','be_codfap_top','be_libfap_top','codgeo_voisins_binome','pitch']
# for col in cols:
#     odis_search_best_export[col] = odis_search_best_export[col].apply(lambda x: x.tolist() if type(x) == np.ndarray else x)

In [None]:
# from sqlalchemy import create_engine, text

# db_host = "localhost"  # Replace with the actual host (e.g., 'superset_db' if in the same Docker network, or 'localhost' if exposed)
# db_port = "5433"  # Replace with the actual port (usually 5432)
# db_user = "superset"  # Replace with the database user (often 'superset')
# db_password = "superset"  # Replace with the database password
# db_name = "examples"  # Replace with the database name (often 'superset')

# engine = create_engine(f'postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')

In [None]:
# table_name = "odis_stream2_result"  # Choose a name for the table in PostgreSQL
# odis_search_best_export.to_sql(table_name, engine, if_exists='replace', schema='public', index=False)
# sql = text("GRANT SELECT ON odis_stream2_result TO examples")

# with engine.begin() as connection:
#     connection.execute(sql)

# print(f"DataFrame successfully written to table '{table_name}' in the Superset database.")

Note to myself:
Après avoir importé les données dans Postgres il faut donner les droits au user 'examples' sur la table
> docker exec -it superset_db psql -h superset_db -p 5432 -U superset -d examples

> GRANT SELECT ON odis_stream2_result TO examples;

> GRANT USAGE ON SCHEMA public TO examples;
