In [None]:
import pandas as pd
import plotly.express as px

# Load the datasets
df_data = pd.read_csv("DaycareData.csv")
df_locations = pd.read_csv("DaycareLocations.csv", encoding='ISO-8859-1')

# Merge on 'daycare_id'
df_merged = pd.merge(df_data, df_locations, on="daycare_id", how="inner")

# Filter to active daycares and ensure correct data types
df_merged["year"] = df_merged["year"].astype(int)
df_merged = df_merged[df_merged["inactive"] == False]

# --- 1. DROPDOWN: Average Enrollment Over Time by Country ---

# Prepare country-year average enrollment
enrollment_trend = (
    df_merged.groupby(["country_code", "year"])["enrollment"]
    .mean()
    .reset_index()
)

# Create line plot
fig_trend = px.line(
    enrollment_trend,
    x="year",
    y="enrollment",
    color="country_code",
    title="Average Enrollment Over Time by Country",
    labels={"enrollment": "Avg Enrollment", "year": "Year", "country_code": "Country"},
)

fig_trend.update_traces(mode="lines+markers")
fig_trend.show()

# --- 2. BAR: Enrollment and Staffing in Most Recent Year ---

# Latest year available
latest_year = df_merged["year"].max()

# Filter for latest year
df_latest = df_merged[df_merged["year"] == latest_year]

# Create bar chart
fig_bar = px.bar(
    df_latest,
    x="daycare_name",
    y=["enrollment", "staff"],
    barmode="group",
    title=f"Enrollment and Staffing in {latest_year}",
    labels={"value": "Count", "daycare_name": "Daycare", "variable": "Metric"},
)

fig_bar.update_layout(xaxis_tickangle=45)
fig_bar.show()
