# Libs import

In [3]:
import altair as alt
import pandas as pd
import numpy as np
from itertools import product
import ipywidgets as widgets
from IPython.display import display, clear_output
import geopandas as gpd # Requires geopandas -- e.g.: conda install -c conda-forge geopandas
alt.data_transformers.enable('json') # Let Altair/Vega-Lite work with large data sets

pass

# Data loading and pre-processing

In [4]:
# Load the data
names = pd.read_csv("../data/dpt2020.csv", sep=";")
# Rename columns and drop some rows
names.rename(columns={'annais': 'decade', 'nombre': 'count', 'sexe': 'gender'}, inplace=True)
names.drop(names[names.preusuel == '_PRENOMS_RARES'].index, inplace=True)
names.drop(names[names.dpt.isin(['XX', '971', '972', '973', '974'])].index, inplace=True)
names.drop(names[names.decade == 'XXXX'].index, inplace=True)
# Gather years as decades
names['decade'] = names['decade'].astype(int)//10*10
names = names.groupby(['gender', 'preusuel', 'decade', 'dpt'])['count'].sum().reset_index()
# Separate by gender
names['gender'] = names['gender'].astype(str)
names.loc[names['gender'] == '1', 'gender'] = 'Male'
names.loc[names['gender'] == '2', 'gender'] = 'Female'
names_m = names[names.gender == 'Male']
names_f = names[names.gender == 'Female']
# # Fill missing values - male
# decades = names_m['decade'].unique()
# dpts = names_m['dpt'].unique()
# preusuels = names_m['preusuel'].unique()
# all_combinations = pd.DataFrame(list(product(preusuels, dpts, decades)), columns=['preusuel', 'dpt', 'decade'])
# names_m = all_combinations.merge(names_m, on=['preusuel', 'dpt', 'decade'], how='left')
# names_m['count'] = names_m['count'].fillna(int(0))
# names_m['gender'] = names_m['gender'].fillna('Male')
# names_m['count'] = names_m['count'].astype(int)
# # Fill missing values - female
# decades = names_f['decade'].unique()
# dpts = names_f['dpt'].unique()
# preusuels = names_f['preusuel'].unique()
# all_combinations = pd.DataFrame(list(product(preusuels, dpts, decades)), columns=['preusuel', 'dpt', 'decade'])
# names_f = all_combinations.merge(names_f, on=['preusuel', 'dpt', 'decade'], how='left')
# names_f['count'] = names_f['count'].fillna(int(0))
# names_f['gender'] = names_f['gender'].fillna('Female')
# names_f['count'] = names_f['count'].astype(int)

In [5]:
names

Unnamed: 0,gender,preusuel,decade,dpt,count
0,Male,AADIL,1980,84,3
1,Male,AADIL,1990,92,3
2,Male,AAHIL,2010,95,3
3,Male,AARON,1960,75,3
4,Male,AARON,1970,75,3
...,...,...,...,...,...
711244,Female,ÉVELYNE,1950,88,3
711245,Female,ÉVY,2010,42,3
711246,Female,ÉVY,2010,57,3
711247,Female,ÉVY,2010,69,4


In [6]:
# Concatenate
names = pd.concat([names_m, names_f])
names = names[['gender', 'preusuel', 'decade', 'dpt', 'count']].sort_values(by=['gender', 'preusuel', 'decade', 'dpt'], ascending=[False, True, True, True]).reset_index(drop=True)
names['rank'] = names.groupby(['gender', 'decade', 'dpt'])['count'].rank(method='first', ascending=False).astype(int)
names

Unnamed: 0,gender,preusuel,decade,dpt,count,rank
0,Male,AADIL,1980,84,3,324
1,Male,AADIL,1990,92,3,795
2,Male,AAHIL,2010,95,3,1042
3,Male,AARON,1960,75,3,821
4,Male,AARON,1970,75,3,857
...,...,...,...,...,...,...
711244,Female,ÉVELYNE,1950,88,3,241
711245,Female,ÉVY,2010,42,3,861
711246,Female,ÉVY,2010,57,3,928
711247,Female,ÉVY,2010,69,4,1383


In [7]:
# Load the data
depts_france = gpd.read_file('../data/departements-version-simplifiee.geojson')
# depts = gpd.read_file('../data/departements-avec-outre-mer.geojson')
# Rename columns and gather Corse data
depts_france.rename(columns={'nom': 'department'}, inplace=True)
depts_france.loc[depts_france['code'] == '2A', 'code'] = '20'
depts_france.loc[depts_france['code'] == '2B', 'code'] = '20'
depts_france.loc[depts_france['code'] == '20', 'department'] = 'Corse'
depts_france = depts_france.dissolve(by='code', as_index=False)
depts_france = depts_france[['code', 'department', 'geometry']]
depts_france

Unnamed: 0,code,department,geometry
0,01,Ain,"POLYGON ((4.78021 46.17668, 4.79458 46.21832, ..."
1,02,Aisne,"POLYGON ((4.04797 49.40564, 4.03991 49.39740, ..."
2,03,Allier,"POLYGON ((3.03207 46.79491, 3.04907 46.75808, ..."
3,04,Alpes-de-Haute-Provence,"POLYGON ((5.67604 44.19143, 5.69209 44.18648, ..."
4,05,Hautes-Alpes,"POLYGON ((6.26057 45.12685, 6.29922 45.10855, ..."
...,...,...,...
90,91,Essonne,"POLYGON ((2.22656 48.77610, 2.23298 48.76620, ..."
91,92,Hauts-de-Seine,"POLYGON ((2.29097 48.95097, 2.32697 48.94536, ..."
92,93,Seine-Saint-Denis,"POLYGON ((2.55306 49.00982, 2.58031 48.99159, ..."
93,94,Val-de-Marne,"POLYGON ((2.33190 48.81701, 2.36395 48.81632, ..."


In [8]:
names = depts_france.merge(names, how='right', left_on='code', right_on='dpt')
names

Unnamed: 0,code,department,geometry,gender,preusuel,decade,dpt,count,rank
0,84,Vaucluse,"MULTIPOLYGON (((4.65347 44.30210, 4.65062 44.3...",Male,AADIL,1980,84,3,324
1,92,Hauts-de-Seine,"POLYGON ((2.29097 48.95097, 2.32697 48.94536, ...",Male,AADIL,1990,92,3,795
2,95,Val-d'Oise,"POLYGON ((2.59052 49.07965, 2.57203 49.06149, ...",Male,AAHIL,2010,95,3,1042
3,75,Paris,"POLYGON ((2.41634 48.84924, 2.46226 48.84254, ...",Male,AARON,1960,75,3,821
4,75,Paris,"POLYGON ((2.41634 48.84924, 2.46226 48.84254, ...",Male,AARON,1970,75,3,857
...,...,...,...,...,...,...,...,...,...
711244,88,Vosges,"POLYGON ((5.47006 48.42093, 5.51099 48.41822, ...",Female,ÉVELYNE,1950,88,3,241
711245,42,Loire,"POLYGON ((3.89953 46.27591, 3.90940 46.25773, ...",Female,ÉVY,2010,42,3,861
711246,57,Moselle,"POLYGON ((5.89340 49.49691, 5.93994 49.50097, ...",Female,ÉVY,2010,57,3,928
711247,69,Rhône,"POLYGON ((4.38808 46.21979, 4.39205 46.26302, ...",Female,ÉVY,2010,69,4,1383


In [9]:
# Creation of a blank map dataframe in order to still display the departments even if there is no data
blank_map = names[['department', 'dpt', 'geometry']].drop_duplicates().reset_index(drop=True)
blank_map['count'] = int(0)
blank_map

Unnamed: 0,department,dpt,geometry,count
0,Vaucluse,84,"MULTIPOLYGON (((4.65347 44.30210, 4.65062 44.3...",0
1,Hauts-de-Seine,92,"POLYGON ((2.29097 48.95097, 2.32697 48.94536, ...",0
2,Val-d'Oise,95,"POLYGON ((2.59052 49.07965, 2.57203 49.06149, ...",0
3,Paris,75,"POLYGON ((2.41634 48.84924, 2.46226 48.84254, ...",0
4,Finistère,29,"MULTIPOLYGON (((-3.65428 48.61697, -3.64110 48...",0
...,...,...,...,...
90,Gers,32,"POLYGON ((0.07605 43.98314, 0.14096 43.99468, ...",0
91,Haute-Loire,43,"POLYGON ((3.89741 45.35708, 3.91694 45.33940, ...",0
92,Lozère,48,"POLYGON ((3.36134 44.97141, 3.38637 44.95274, ...",0
93,Creuse,23,"POLYGON ((2.16779 46.42407, 2.19757 46.42830, ...",0


# Construction progressive de la visualisation

In [10]:
import pandas as pd
import altair as alt
import geopandas as gpd
from shapely import wkt

# Convertir la colonne 'geometry' en objets géométriques
blank_map['geometry'] = blank_map['geometry'].apply(lambda x: wkt.loads(str(x)))

# Créer un GeoDataFrame
gdf = gpd.GeoDataFrame(blank_map, geometry='geometry')

# Simplifier les géométries pour des performances optimales
gdf['geometry'] = gdf['geometry'].simplify(0.01)

# Assurer que les géométries sont en coordonnées latitude/longitude
gdf = gdf.to_crs(epsg=4326)

# Sélection interactive pour les départements au survol de la souris
selection = alt.selection_point(fields=['department'], on='mouseover', empty='none', clear='mouseout')

# Créer la carte avec Altair, en initialisant tous les départements en bleu
base_chart = alt.Chart(gdf).mark_geoshape(
    stroke='black',
    strokeWidth=1
).encode(
    color=alt.value('steelblue'),  # Initialement tous les départements en bleu
    tooltip=['department:N', 'count:Q']
).properties(
    width=800,
    height=600,
    title='Carte de la France avec les données de noms'
).project(
    'mercator'
)

# Ajouter une couche supplémentaire pour rendre les départements non sélectionnés en gris très clair
highlight_chart = alt.Chart(gdf).mark_geoshape(
    stroke='black',
    strokeWidth=1
).encode(
    color=alt.condition(selection, alt.value('steelblue'), alt.value('#f0f0f0')),  # Utiliser un gris très clair
    tooltip=['department:N', 'count:Q']
).add_selection(
    selection
).transform_filter(
    selection
)

# Ajouter une couche supplémentaire pour rendre les départements non sélectionnés en gris très clair avec moins d'opacité
deemphasize_chart = alt.Chart(gdf).mark_geoshape(
    stroke='black',
    strokeWidth=1
).encode(
    color=alt.value('#f0f0f0'),  # Utiliser un gris très clair proche du blanc
    opacity=alt.condition(selection, alt.value(0.2), alt.value(1)),  # Diminuer l'opacité
    tooltip=['department:N', 'count:Q']
).transform_filter(
    ~selection
)

# Combiner les couches pour obtenir l'effet désiré
final_chart = alt.layer(base_chart, deemphasize_chart, highlight_chart).properties(
    width=800,
    height=600,
    title='Carte de la France interractive'
)

# Afficher la carte
final_chart



In [12]:
# Fonction pour créer des graphiques en barres pour les prénoms
def create_bar_chart(gender, decade, color):
    filtered_data = names[(names['gender'] == gender) & (names['decade'] == decade)]
    top_names = filtered_data.nlargest(10, 'count')
    bar_chart = alt.Chart(top_names).mark_bar(color=color).encode(
        y=alt.Y('preusuel:N', sort='-x', title='Noms'),
        x=alt.X('count:Q', title='Nombre'),
        tooltip=['preusuel', 'count']
    ).properties(
        width=300,
        height=250,
        title=f'Top prénoms {gender.lower()}s pendant les années {decade}s'
    )
    return bar_chart

# Fonction pour mettre à jour les graphiques en barres lorsqu'une valeur de widget change
def update_bar_charts(change):
    with output:
        clear_output(wait=True)
        try:
            decade = int(decade_slider.value.split('-')[0])
            male_chart = create_bar_chart('Male', decade, 'lightblue')
            female_chart = create_bar_chart('Female', decade, 'pink')
            combined_chart = alt.hconcat(male_chart, female_chart).resolve_scale(y='independent')
            display(combined_chart)
        except Exception as e:
            print(f"Une erreur est survenue: {e}")

# Configuration des widgets
decade_slider = widgets.SelectionSlider(
    options=['1900-1909', '1910-1919', '1920-1929', '1930-1939', '1940-1949', '1950-1959',
             '1960-1969', '1970-1979', '1980-1989', '1990-1999', '2000-2009', '2010-2019'],
    value='1900-1909',
    description='Décennie:',
    continuous_update=False
)
play = widgets.Play(value=1900, min=1900, max=2010, step=1, interval=100, description="Press play")
output = widgets.Output()

# Lier le widget de lecture au sélecteur de décennie
def on_play_change(change):
    year = change.new
    decade_string = f"{year}-{year+9}"
    if decade_string in decade_slider.options:
        decade_slider.value = decade_string
play.observe(on_play_change, names='value')

# Afficher les widgets
widgets_display = widgets.VBox([widgets.HBox([decade_slider, play]), output])
display(widgets_display)

# Définir les observateurs
decade_slider.observe(update_bar_charts, names='value')
play.observe(on_play_change, names='value')

# Initialiser l'affichage
update_bar_charts(None)


VBox(children=(HBox(children=(SelectionSlider(continuous_update=False, description='Décennie:', options=('1900…

In [23]:
# Fonction pour créer des graphiques en barres pour les prénoms
def create_bar_chart(gender, decade, color):
    # Filtrer les données selon le sexe et la décennie
    filtered_data = names[(names['gender'] == gender) & (names['decade'] == decade)]
    # Obtenir les 10 prénoms les plus fréquents
    top_names = filtered_data.nlargest(10, 'count')
    # Créer le graphique en barres avec Altair
    bar_chart = alt.Chart(top_names).mark_bar(color=color).encode(
        y=alt.Y('preusuel:N', sort='-x', title='Noms'),
        x=alt.X('count:Q', title='Nombre'),
        tooltip=['preusuel', 'count']
    ).properties(
        width=300,
        height=250,
        title=f'Top prénoms {gender.lower()}s pendant les années {decade}s'
    )
    return bar_chart

# Fonction pour mettre à jour les graphiques en barres lorsqu'une valeur de widget change
def update_bar_charts(change):
    with output:
        clear_output(wait=True)
        try:
            # Obtenir la décennie sélectionnée
            decade = int(decade_slider.value.split('-')[0])
            # Créer les graphiques pour les hommes et les femmes
            male_chart = create_bar_chart('Male', decade, 'lightblue')
            female_chart = create_bar_chart('Female', decade, 'pink')
            # Combiner les deux graphiques côte à côte
            combined_chart = alt.hconcat(male_chart, female_chart).resolve_scale(y='independent')
            display(combined_chart)
        except Exception as e:
            print(f"Une erreur est survenue: {e}")

# Configuration des widgets
decade_slider = widgets.SelectionSlider(
    options=['1900-1909', '1910-1919', '1920-1929', '1930-1939', '1940-1949', '1950-1959',
             '1960-1969', '1970-1979', '1980-1989', '1990-1999', '2000-2009', '2010-2019'],
    value='1900-1909',
    description='Décennie:',
    continuous_update=False
)
play = widgets.Play(value=1900, min=1900, max=2010, step=1, interval=100, description="Press play")
output = widgets.Output()

# Lier le widget de lecture au sélecteur de décennie
def on_play_change(change):
    year = change.new
    decade_string = f"{year}-{year+9}"
    if decade_string in decade_slider.options:
        decade_slider.value = decade_string
play.observe(on_play_change, names='value')

# Afficher les widgets
widgets_display = widgets.VBox([widgets.HBox([decade_slider, play]), output])
display(widgets_display)

# Définir les observateurs
decade_slider.observe(update_bar_charts, names='value')
play.observe(on_play_change, names='value')

# Initialiser l'affichage
update_bar_charts(None)

VBox(children=(HBox(children=(SelectionSlider(continuous_update=False, description='Décennie:', options=('1900…

# Make visualization