In [2]:
import altair as alt
import pandas as pd
import numpy as np
from itertools import product
import ipywidgets as widgets
from IPython.display import display, clear_output
import geopandas as gpd  # Requires geopandas -- e.g.: conda install -c conda-forge geopandas

alt.data_transformers.enable("json")  # Let Altair/Vega-Lite work with large data sets

pass

In [3]:
names = pd.read_csv("dpt2020.csv", sep=";")

# clean XXXX in annais
names = names[names.annais != "XXXX"]
# clean XX in dpt
names = names[names.dpt != "XX"]

In [4]:
names.sample(5)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
1181697,1,MAXIMILIEN,2003,2,3
3345770,2,OCÉANE,1990,42,19
137965,1,ANTOINE,1911,13,116
1928232,2,ANNETTE,1949,16,12
3503648,2,SALOMÉ,2012,87,10


In [5]:
group_names = names.groupby(["preusuel", "annais"]).nombre.sum().reset_index()

In [6]:
min_year = group_names.annais.min()
max_year = group_names.annais.max()

min_year, max_year

('1900', '2020')

In [7]:
names = group_names["preusuel"].unique()

births_per_year = {}
for name in names:
    births = group_names[group_names["preusuel"] == name]
    births = births[["annais", "nombre"]].sort_values("annais")

    # sort from min to max
    births_per_year[name] = [
        (int(year), int(birth))
        for year, birth in zip(births["annais"], births["nombre"])
    ]

In [8]:
scale = [y for y in range(int(min_year), int(max_year) + 1)]

In [9]:
for name, data in births_per_year.items():
    years = [y for y, _ in data]
    for year in scale:
        if year not in years:
            data.append((year, 0))
    data.sort(key=lambda x: x[0])

df_data = pd.DataFrame(births_per_year)

In [10]:
import pandas as pd
import altair as alt

# Example DataFrame creation
data = {
    "names": df_data.columns.tolist(),
    "data": [df_data[name].tolist() for name in df_data.columns.tolist()],
}

df = pd.DataFrame(data)

# Expand the tuples into separate rows
expanded_rows = []
for name in df["names"]:
    for year, births in df[df["names"] == name]["data"].values[0]:
        expanded_rows.append({"name": name, "year": year, "births": births})

expanded_df = pd.DataFrame(expanded_rows)

In [11]:
import altair as alt
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output


# Define initial selection
initial_name = "ANDRÉ"
view_df = expanded_df[expanded_df["name"] == initial_name]

In [12]:
years = list((range(expanded_df["year"].min(), expanded_df["year"].max() + 1)))

In [13]:
years

[1900,
 1901,
 1902,
 1903,
 1904,
 1905,
 1906,
 1907,
 1908,
 1909,
 1910,
 1911,
 1912,
 1913,
 1914,
 1915,
 1916,
 1917,
 1918,
 1919,
 1920,
 1921,
 1922,
 1923,
 1924,
 1925,
 1926,
 1927,
 1928,
 1929,
 1930,
 1931,
 1932,
 1933,
 1934,
 1935,
 1936,
 1937,
 1938,
 1939,
 1940,
 1941,
 1942,
 1943,
 1944,
 1945,
 1946,
 1947,
 1948,
 1949,
 1950,
 1951,
 1952,
 1953,
 1954,
 1955,
 1956,
 1957,
 1958,
 1959,
 1960,
 1961,
 1962,
 1963,
 1964,
 1965,
 1966,
 1967,
 1968,
 1969,
 1970,
 1971,
 1972,
 1973,
 1974,
 1975,
 1976,
 1977,
 1978,
 1979,
 1980,
 1981,
 1982,
 1983,
 1984,
 1985,
 1986,
 1987,
 1988,
 1989,
 1990,
 1991,
 1992,
 1993,
 1994,
 1995,
 1996,
 1997,
 1998,
 1999,
 2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019,
 2020]

In [15]:
# Create the search box with suggestions using ipywidgets.Combobox
search_box = widgets.Combobox(
    placeholder="Type a name",
    options=list(expanded_df["name"].unique()),
    description="Search:",
    ensure_option=True,
    disabled=False,
)

slider = widgets.IntSlider(
    value=expanded_df["year"].min(),
    min=expanded_df["year"].min(),
    max=expanded_df["year"].max(),
    step=1,
    description="Scale Year",
    continuous_update=True,
)

# Create submit and clear buttons
submit_button = widgets.Button(description="Submit")
clear_button = widgets.Button(description="Clear")

# Display the initial chart
output = widgets.Output()


def update_chart(selected_name, min_year):
    global view_df
    new_selection = expanded_df[expanded_df["name"] == selected_name]
    view_df = (
        pd.concat([view_df, new_selection]).drop_duplicates().reset_index(drop=True)
    )

    # Filter data to include only years within the specified range
    filtered_df = view_df[view_df["year"] >= min_year]

    # Highlight selection for hover interaction
    highlight = alt.selection_point(on="pointerover", fields=["name"], nearest=True)

    # Generate the list of years for the Y-axis domain
    years = list(range(min_year, expanded_df["year"].max() + 1))

    # Calculate the dynamic height based on the number of years displayed
    chart_height = 8 * len(years)  # Adjust the multiplier as needed

    # Create the base chart with filtered_df
    base = (
        alt.Chart(filtered_df)
        .encode(
            y=alt.Y(
                "year:O",
                title="Year",
                axis=alt.Axis(labelAngle=0, labelFontSize=8),
                scale=alt.Scale(domain=years),
            ),
            x="births:Q",
            color="name:N",
            tooltip=["name", "year", "births"],
        )
        .properties(
            width=400,  # Set the width of the chart
            height=chart_height,  # Set the dynamic height of the chart
            title="Births by Year",
        )
    )

    # Define points with highlight interaction and tooltips
    points = (
        base.mark_circle(size=100).encode(opacity=alt.value(0)).add_params(highlight)
    )

    # Define lines with conditional size based on highlight
    lines = base.mark_line().encode(
        size=alt.condition(~highlight, alt.value(1), alt.value(3))
    )

    # Merge the points and lines
    chart = points + lines

    return chart


def on_submit(change):
    with output:
        clear_output(wait=True)
        search_query = search_box.value.strip().upper()
        min_year = slider.value
        if search_query in expanded_df["name"].unique():
            chart = update_chart(search_query, min_year)
            display(chart)
        else:
            print(f"No data found for {search_query}")


def on_clear(change):
    global view_df
    view_df = pd.DataFrame(columns=expanded_df.columns)
    slider.value = expanded_df["year"].min()  # Reset the slider to the minimum year
    with output:
        clear_output(wait=True)
        print("Graph cleared")


def on_slider_change(change):
    with output:
        clear_output(wait=True)
        min_year = slider.value
        if not search_box.value:
            display("Please select a name.")
        else:
            chart = update_chart(search_box.value.strip().upper(), min_year)
            display(chart)


# Attach the event handlers to the buttons
submit_button.on_click(on_submit)
clear_button.on_click(on_clear)
slider.observe(on_slider_change, names="value")  # Attach the slider event

# Display the search box, slider, submit button, clear button, and output
display(widgets.VBox([widgets.HBox([search_box, submit_button, clear_button]), slider]))
display(output)

# Show the initial chart with all years
with output:
    display(update_chart(initial_name, slider.min))

VBox(children=(HBox(children=(Combobox(value='', description='Search:', ensure_option=True, options=('AADIL', …

Output()