In [None]:
# Wizualizacja Danych, Lab 4 - Jan Banot
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode

import ipywidgets as widgets
from ipywidgets import interactive, VBox

init_notebook_mode(connected=True)

In [None]:
# https://docs.owid.io/projects/etl/api/covid/#download-data
DATA_PATH = "/Users/janbanot/Dev/uni/msc-cs-code/sem3/WD/data/covid-compact.csv"

try:
    df = pd.read_csv(DATA_PATH, parse_dates=["date"])
    print("Dane wczytane pomyślnie.")
    print(f"Wymiary danych: {df.shape}")
except FileNotFoundError:
    print(f"BŁĄD: Plik {DATA_PATH} nie został znaleziony.")

# Podstawowe czyszczenie: wypełnianie NaN w kluczowych kolumnach
cols_to_fill_zero = [
    "new_cases",
    "new_deaths",
    "new_vaccinations",
    "people_vaccinated",
    "people_fully_vaccinated",
    "total_boosters",
    "hosp_patients",
    "icu_patients",
]
# Wypełniamy tylko te kolumny, które istnieją w DataFrame
for col in cols_to_fill_zero:
    if col in df.columns:
        df[col] = df[col].fillna(0)

# Utwórz listę unikalnych krajów do użycia w widgetach
unique_countries = df[df["continent"].notna()]["country"].unique()
unique_countries.sort()

In [None]:
def prepare_waterfall_data(dataframe, selected_country, metric):
    """
    Agreguje dane do widoku tygodniowego i oblicza różnicę
    pomiędzy kolejnymi tygodniami dla danego kraju i metryki.
    """
    country_df = dataframe[dataframe["country"] == selected_country].copy()

    country_df.set_index("date", inplace=True)

    weekly_df = country_df[[metric]].resample("W").sum()

    weekly_df["diff"] = weekly_df[metric].diff()

    weekly_df["diff"] = weekly_df["diff"].fillna(weekly_df[metric])

    return weekly_df.reset_index()

In [None]:
def prepare_pie_data(dataframe, selected_country):
    """
    Oblicza proporcje statusu szczepień dla wybranego kraju
    na podstawie ostatnich dostępnych danych.
    Zwraca tuple: (pd.Series z danymi, data ostatniego wpisu)
    """
    # Filtruj dane i wypełnij braki metodą 'forward fill'
    # Szczepienia nie są raportowane codziennie, więc ffill jest kluczowy
    vax_cols = [
        "date",
        "population",
        "people_vaccinated",
        "people_fully_vaccinated",
        "total_boosters",
    ]
    country_df = dataframe[dataframe["country"] == selected_country][vax_cols].ffill()

    if country_df.empty:
        return pd.Series(dtype="float64"), None

    # Znajdź ostatni wiersz gdzie people_vaccinated > 0
    country_df_with_data = country_df[country_df["people_vaccinated"] > 0]

    if country_df_with_data.empty:
        return pd.Series(dtype="float64"), None

    latest_data = country_df_with_data.iloc[-1]
    latest_date = latest_data["date"]

    population = latest_data["population"]
    total_boosted = latest_data["total_boosters"]
    total_fully = latest_data["people_fully_vaccinated"]
    total_one_dose = latest_data["people_vaccinated"]

    boosted = total_boosted
    fully_no_booster = max(0, total_fully - total_boosted)
    one_dose_only = max(0, total_one_dose - total_fully)
    unvaccinated = max(0, population - total_one_dose)

    labels = [
        "Z dawką przypominającą",
        "W pełni zaszczepieni (bez przypominającej)",
        "Tylko pierwsza dawka",
        "Niezaszczepieni",
    ]
    values = [boosted, fully_no_booster, one_dose_only, unvaccinated]

    return pd.Series(values, index=labels, name="Status Szczepień"), latest_date

In [None]:
# Wizualizacja 1: Nowe przypadki i zgony w czasie
line_countries_widget = widgets.SelectMultiple(
    options=unique_countries,
    value=["Poland", "Germany", "United States"],
    description="Kraje:",
    rows=6,
)

line_chart_fig = go.FigureWidget(
    layout=go.Layout(
        title="Nowe przypadki i zgony w czasie",
        hovermode="x unified",
        legend_title_text="Metryka",
    )
)
line_chart_fig.add_trace(go.Scatter(name="Placeholder", x=[], y=[]))


def update_line_chart(change):
    selected_countries = change["new"]
    if not selected_countries:
        line_chart_fig.data = []
        return

    line_chart_fig.data = []

    colors = {"cases": "blue", "deaths": "red"}

    filtered_df = df[df["country"].isin(selected_countries)]

    for country in selected_countries:
        country_df = filtered_df[filtered_df["country"] == country]

        line_chart_fig.add_trace(
            go.Scatter(
                x=country_df["date"].dt.strftime("%Y-%m-%d"),
                y=country_df["new_cases_smoothed"],
                name=f"{country} (Przypadki)",
                line=dict(color=colors["cases"]),
                fill="tozeroy",
                opacity=0.1,
            )
        )

        line_chart_fig.add_trace(
            go.Scatter(
                x=country_df["date"].dt.strftime("%Y-%m-%d"),
                y=country_df["new_deaths_smoothed"],
                name=f"{country} (Zgony)",
                line=dict(color=colors["deaths"]),
                yaxis="y2",
                fill="tozeroy",
                opacity=0.1,
            )
        )

    line_chart_fig.layout.update(
        xaxis=dict(title="Data", type="date"),
        yaxis=dict(title="Nowe Przypadki (wygładzone)"),
        yaxis2=dict(
            title="Nowe Zgony (wygładzone)",
            overlaying="y",
            side="right",
            showgrid=False,
        ),
        margin=dict(r=150),
        legend=dict(
            x=1.20,
            xanchor="left",
        ),
    )


line_countries_widget.observe(update_line_chart, names="value")

print("### Wizualizacja 1: Nowe przypadki i zgony w czasie")
display(VBox([line_countries_widget, line_chart_fig]))

update_line_chart({"new": line_countries_widget.value})

In [None]:
print("### Wizualizacja 2: Globalna mapa przypadków (per milion)")

map_df = df[df["code"].notna()]

map_df["date_str"] = map_df["date"].dt.strftime("%Y-%m-%d")
map_df = map_df.sort_values(by="date")

map_fig = px.choropleth(
    map_df,
    locations="code",
    color="total_cases_per_million",
    hover_name="country",
    animation_frame="date_str",
    color_continuous_scale=px.colors.sequential.Reds,
    projection="natural earth",
    title="Całkowita liczba przypadków na milion mieszkańców",
)

map_fig.update_layout(coloraxis_colorbar=dict(title="Przypadki na milion"))
map_fig.show()

In [None]:
# Wizualizacja 3: Status Szczepień
pie_country_widget = widgets.Dropdown(
    options=unique_countries,
    value="Poland",
    description="Kraj:",
)

pie_chart_fig = go.FigureWidget(layout=go.Layout(title="Status Szczepień"))
pie_chart_fig.add_trace(go.Pie(labels=[], values=[]))


def update_pie_chart(change):
    country = change["new"]

    pie_data, latest_date = prepare_pie_data(df, country)

    with pie_chart_fig.batch_update():
        if not pie_data.empty and latest_date is not None:
            pie_chart_fig.data[0].labels = pie_data.index
            pie_chart_fig.data[0].values = pie_data.values
            date_str = latest_date.strftime("%Y-%m-%d")
            pie_chart_fig.layout.title = f"Status Szczepień w: {country}<br><sub>Dane z: {date_str}</sub>"
        else:
            pie_chart_fig.data[0].labels = []
            pie_chart_fig.data[0].values = []
            pie_chart_fig.layout.title = f"Brak danych o szczepieniach dla: {country}"


pie_country_widget.observe(update_pie_chart, names="value")

print("\n### Wizualizacja 3: Status Szczepień")
display(VBox([pie_country_widget, pie_chart_fig]))

update_pie_chart({"new": pie_country_widget.value})

In [None]:
# Wizualizacja 4: Dzienna liczba szczepień
bar_countries_widget = widgets.SelectMultiple(
    options=unique_countries, value=["Poland", "Germany"], description="Kraje:", rows=4
)

bar_chart_fig = go.FigureWidget(
    layout=go.Layout(
        title="Dzienna liczba szczepień",
        barmode="stack",
    )
)
bar_chart_fig.add_trace(go.Bar(name="Placeholder", x=[], y=[]))


def update_bar_chart(change):
    selected_countries = change["new"]

    with bar_chart_fig.batch_update():
        bar_chart_fig.data = []

        if not selected_countries:
            return

        filtered_df = df[df["country"].isin(selected_countries)]

        for country in selected_countries:
            country_df = filtered_df[filtered_df["country"] == country]
            # Filtruj tylko dane gdzie new_vaccinations > 0
            country_df = country_df[country_df["new_vaccinations"] > 0]

            if not country_df.empty:
                bar_chart_fig.add_trace(
                    go.Bar(
                        x=country_df["date"].tolist(),
                        y=country_df["new_vaccinations"].tolist(),
                        name=country,
                    )
                )

        bar_chart_fig.layout.xaxis.type = "date"
        bar_chart_fig.layout.xaxis.title = "Data"
        bar_chart_fig.layout.yaxis.title = "Liczba szczepień"

        bar_chart_fig.layout.updatemenus = [
            dict(
                type="buttons",
                direction="left",
                buttons=list(
                    [
                        dict(
                            args=[{"yaxis.type": "linear"}],
                            label="Skala Liniowa",
                            method="relayout",
                        ),
                        dict(
                            args=[{"yaxis.type": "log"}],
                            label="Skala Logarytmiczna",
                            method="relayout",
                        ),
                    ]
                ),
                pad={"r": 10, "t": 10},
                showactive=True,
                x=0.1,
                y=1.15,
                xanchor="left",
                yanchor="top",
            )
        ]


bar_countries_widget.observe(update_bar_chart, names="value")

print("\n### Wizualizacja 4: Dzienna liczba szczepień")
display(VBox([bar_countries_widget, bar_chart_fig]))

update_bar_chart({"new": bar_countries_widget.value})

In [None]:
# Wizualizacja 5: Wykres Wodospadowy (Tygodniowe Zmiany)
waterfall_country_widget = widgets.Dropdown(
    options=unique_countries,
    value="Poland",
    description="Kraj:",
)
waterfall_metric_widget = widgets.RadioButtons(
    options=[("Nowe przypadki", "new_cases"), ("Nowe zgony", "new_deaths")],
    value="new_cases",
    description="Metryka:",
)

waterfall_fig = go.FigureWidget(
    layout=go.Layout(title="Tygodniowa zmiana liczby przypadków/zgonów")
)
waterfall_fig.add_trace(go.Waterfall())


def update_waterfall(country, metric):
    waterfall_data = prepare_waterfall_data(df, country, metric)

    # 'diff'(y), tydzień (x)
    measures = ["relative"] * len(waterfall_data)
    if measures:
        measures[0] = "absolute"

    with waterfall_fig.batch_update():
        waterfall_fig.data = []  # Wyczyść
        waterfall_fig.add_trace(
            go.Waterfall(
                name=f"Zmiana {metric}",
                orientation="v",
                measure=measures,
                x=waterfall_data["date"].dt.strftime("%Y-%m-%d"),
                y=waterfall_data["diff"],
                text=[f"{v:+.2f}" for v in waterfall_data["diff"]],
                textposition="outside",
                connector={"line": {"color": "rgb(63, 63, 63)"}},
            )
        )
        waterfall_fig.layout.title = f"Tygodniowa zmiana ({metric}) w: {country}"
        waterfall_fig.layout.xaxis.title = "Tydzień"
        waterfall_fig.layout.yaxis.title = "Wartość zmiany"


print("\n### Wizualizacja 5: Wykres Wodospadowy (Tygodniowe Zmiany)")
interactive_waterfall = interactive(
    update_waterfall, country=waterfall_country_widget, metric=waterfall_metric_widget
)

controls = VBox([waterfall_country_widget, waterfall_metric_widget])

output = waterfall_fig
display(VBox([controls, output]))

update_waterfall(waterfall_country_widget.value, waterfall_metric_widget.value)

In [None]:
# Wizualizacja 6: Wykres Rozrzutu z Linią Trendu
scatter_countries_widget = widgets.SelectMultiple(
    options=unique_countries,
    value=["Poland", "Italy", "Brazil"],
    description="Kraje:",
    rows=4,
)

scatter_fig_widget = go.FigureWidget(
    layout=go.Layout(title="Zależność między nowymi przypadkami a nowymi zgonami")
)
scatter_fig_widget.add_trace(go.Scatter(mode="markers"))  # Placeholder


def update_scatter_chart(change):
    selected_countries = change["new"]
    if not selected_countries:
        scatter_fig_widget.data = []
        return

    # Wygładzone dane, aby zredukować szum
    # Filtrujemy wartości > 0, aby uniknąć problemów z trendem
    filtered_df = df[
        (df["country"].isin(selected_countries))
        & (df["new_cases_smoothed"] > 0)
        & (df["new_deaths_smoothed"] > 0)
    ]

    scatter_fig = px.scatter(
        filtered_df,
        x="new_cases_smoothed",
        y="new_deaths_smoothed",
        color="country",
        trendline="ols",
        title="Zależność między nowymi przypadkami a nowymi zgonami",
    )

    with scatter_fig_widget.batch_update():
        scatter_fig_widget.data = []

        for trace in scatter_fig.data:
            scatter_fig_widget.add_trace(trace)

        scatter_fig_widget.layout.update(scatter_fig.layout)

        scatter_fig_widget.layout.xaxis.type = "log"
        scatter_fig_widget.layout.yaxis.type = "log"
        scatter_fig_widget.layout.xaxis.title = "Nowe przypadki (wygładzone, skala log)"
        scatter_fig_widget.layout.yaxis.title = "Nowe zgony (wygładzone, skala log)"

scatter_countries_widget.observe(update_scatter_chart, names="value")

print("\n### Wizualizacja 6: Wykres Rozrzutu z Linią Trendu")
display(VBox([scatter_countries_widget, scatter_fig_widget]))

update_scatter_chart({"new": scatter_countries_widget.value})