# THEME 1 : NAMES OVER TIME

In [1]:
import altair as alt
import pandas as pd
import numpy as np
from itertools import product
import ipywidgets as widgets
from IPython.display import display, clear_output
import geopandas as gpd  # Requires geopandas -- e.g.: conda install -c conda-forge geopandas

alt.data_transformers.enable("json")  # Let Altair/Vega-Lite work with large data sets

pass

In [2]:
names = pd.read_csv("../data/dpt2020.csv", sep=";")

# clean XXXX in annais
names = names[names.annais != "XXXX"]
# clean XX in dpt
names = names[names.dpt != "XX"]

In [3]:
names.sample(5)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
1054646,1,LUCIEN,2000,69,8
211256,1,BASILE,2014,7,3
3295092,2,NATACHA,1958,75,5
1708168,1,YOUSSEF,1965,75,18
2558587,2,HALIMA,1974,42,4


In [4]:
group_names = names.groupby(["preusuel", "annais"]).nombre.sum().reset_index()

In [5]:
min_year = group_names.annais.min()
max_year = group_names.annais.max()

min_year, max_year

('1900', '2020')

In [6]:
names = group_names["preusuel"].unique()

births_per_year = {}
for name in names:
    births = group_names[group_names["preusuel"] == name]
    births = births[["annais", "nombre"]].sort_values("annais")

    # sort from min to max
    births_per_year[name] = [
        (int(year), int(birth))
        for year, birth in zip(births["annais"], births["nombre"])
    ]

In [7]:
scale = [y for y in range(int(min_year), int(max_year) + 1)]

In [8]:
for name, data in births_per_year.items():
    years = [y for y, _ in data]
    for year in scale:
        if year not in years:
            data.append((year, 0))
    data.sort(key=lambda x: x[0])

df_data = pd.DataFrame(births_per_year)

In [9]:
import pandas as pd
import altair as alt

# Example DataFrame creation
data = {
    "names": df_data.columns.tolist(),
    "data": [df_data[name].tolist() for name in df_data.columns.tolist()],
}

df = pd.DataFrame(data)

# Expand the tuples into separate rows
expanded_rows = []
for name in df["names"]:
    for year, births in df[df["names"] == name]["data"].values[0]:
        expanded_rows.append({"name": name, "year": year, "births": births})

expanded_df = pd.DataFrame(expanded_rows)

In [10]:
import altair as alt
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output


# Define initial selection
initial_name = "ANDRÉ"
view_df = expanded_df[expanded_df["name"] == initial_name]

In [11]:
years = list((range(expanded_df["year"].min(), expanded_df["year"].max() + 1)))

In [14]:
expanded_df.sample(5)

Unnamed: 0,name,year,births
1174840,MAXEN,1951,0
270024,CARLOS-MANUEL,1973,0
1166569,MATILIN,1908,0
1804759,YSABEL,1944,0
1577773,SHAYNE,1954,0


In [21]:
# Create the search box with suggestions using ipywidgets.Combobox
search_box = widgets.Combobox(
    placeholder="Type a name",
    options=list(expanded_df["name"].unique()),
    description="Search:",
    ensure_option=True,
    disabled=False,
)

slider_up = widgets.IntSlider(
    value=expanded_df["year"].min(),
    min=expanded_df["year"].min(),
    max=expanded_df["year"].max(),
    step=1,
    description="Scale Year up",
    continuous_update=True,
)

slider_down = widgets.IntSlider(
    value=expanded_df["year"].max(),
    min=expanded_df["year"].min(),
    max=expanded_df["year"].max(),
    step=1,
    description="Scale Year down",
    continuous_update=True,
)

# Create submit and clear buttons
submit_button = widgets.Button(description="Submit")
clear_button = widgets.Button(description="Clear")
all_names_button = widgets.Button(description="Add random Names")
# Display the initial chart
output = widgets.Output()


def on_all_names(change):
    global view_df
    sampled_names = expanded_df["name"].drop_duplicates().sample(500)
    view_df = expanded_df[expanded_df["name"].isin(sampled_names)]
    min_year = slider_up.value
    max_year = slider_down.value
    with output:
        clear_output(wait=True)
        chart = update_chart(None, min_year, max_year)
        display(chart)


def get_filtered_years(min_year, max_year, step=5):
    return list(range(min_year, max_year + 1, step))


def group_years(year, period=5):
    return year - (year % period)


def update_chart(selected_name, min_year, max_year):
    global view_df
    new_selection = expanded_df[expanded_df["name"] == selected_name]
    view_df = (
        pd.concat([view_df, new_selection]).drop_duplicates().reset_index(drop=True)
    )

    # Get the filtered years
    filtered_years = get_filtered_years(min_year, max_year)

    # Filter data to include only years within the specified range and in the filtered years list
    filtered_df = view_df[
        (view_df["year"] >= min_year)
        & (view_df["year"] <= max_year)
        & (view_df["year"].isin(filtered_years))
    ]

    # Highlight selection for hover interaction
    highlight = alt.selection_point(on="pointerover", fields=["name"], nearest=True)

    # Calculate the dynamic height based on the number of years displayed
    chart_height = 20 * len(filtered_years)  # Increased multiplier for better spacing

    # Create the base chart with filtered_df
    base = (
        alt.Chart(filtered_df)
        .encode(
            x=alt.X(
                "year:O",
                title="Year",
                axis=alt.Axis(labelAngle=0, labelFontSize=8),
                scale=alt.Scale(domain=filtered_years),
            ),
            y="births:Q",
            color="name:N",
            tooltip=["name", "year", "births"],
        )
        .properties(
            width=800,
            height=chart_height,
            title="Births by Year (Every 5 Years)",
        )
    )

    # Define points with highlight interaction and tooltips
    points = (
        base.mark_circle(size=100).encode(opacity=alt.value(0)).add_params(highlight)
    )

    # Define lines with conditional size based on highlight
    lines = base.mark_line().encode(
        size=alt.condition(~highlight, alt.value(1), alt.value(3))
    )

    # Merge the points and lines
    chart = points + lines

    return chart


def on_submit(change):
    with output:
        clear_output(wait=True)
        search_query = search_box.value.strip().upper()
        min_year = slider_up.value
        max_year = slider_down.value
        if search_query in expanded_df["name"].unique():
            chart = update_chart(search_query, min_year, max_year)
            display(chart)
        else:
            print(f"No data found for {search_query}")


def on_clear(change):
    global view_df
    view_df = pd.DataFrame(columns=expanded_df.columns)
    slider_up.value = expanded_df["year"].min()  # Reset the slider to the minimum year
    slider_down.value = expanded_df[
        "year"
    ].max()  # Reset the slider to the maximum year
    with output:
        clear_output(wait=True)
        print("Graph cleared")


def on_slider_change(change):
    with output:
        clear_output(wait=True)
        min_year = slider_up.value
        max_year = slider_down.value
        if not search_box.value:
            display("Please select a name.")
        else:
            chart = update_chart(search_box.value.strip().upper(), min_year, max_year)
            display(chart)


# Attach the event handlers to the buttons
submit_button.on_click(on_submit)
clear_button.on_click(on_clear)
all_names_button.on_click(on_all_names)
slider_up.observe(on_slider_change, names="value")  # Attach the slider event
slider_down.observe(on_slider_change, names="value")  # Attach the slider event

# Display the search box, slider, submit button, clear button, and output
display(
    widgets.VBox(
        [
            widgets.HBox([search_box, submit_button, clear_button, all_names_button]),
            slider_up,
            slider_down,
        ]
    )
)
display(output)

# Show the initial chart with all years
with output:
    display(update_chart(initial_name, slider_up.min, slider_down.max))

VBox(children=(HBox(children=(Combobox(value='', description='Search:', ensure_option=True, options=('AADIL', …

Output()