In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
mpl.rcParams['agg.path.chunksize'] = 10000
plt.rcParams["figure.dpi"] = 200

X=4
Y=2
fig_size = (X*Y, Y)

# Make Matplotlib's automatic date tick formatting show time
mpl.rcParams['date.autoformatter.hour'] = '%H:%M:%S'
mpl.rcParams['date.autoformatter.minute'] = '%H:%M:%S'
mpl.rcParams['date.autoformatter.second'] = '%H:%M:%S'
mpl.rcParams['date.autoformatter.microsecond'] = '%H:%M:%S.%f'  # optional

In [None]:
df = pd.read_parquet('data/dwd-wetterdaten-Kiel-2023-06-01T12-2025-07-01T12-normalized.parquet.gzip')
df.sort_values(by=['@timestamp'], inplace=True)
df.set_index('@timestamp', inplace=True, drop=False)
df.info(verbose=True, show_counts=True)

In [None]:
#df = df[['station_name', 'daily_mean_of_temperature_previous_day.value', 'minimum_of_temperature_for_previous_day.value', 'maximum_of_temperature_for_previous_day.value', 'maximum_wind_speed_last_hour.value', 'precipitation_amount_last_hour.value', 'relative_humidity.value', 'temperature_at_5_cm_above_ground.value']]

In [None]:
#df.query('station_name == "Kiel-Holtenau"').info(verbose=True, show_counts=True)
df.query('station_name == "Kiel-Holtenau"', inplace=True)#.describe(include='all')

In [None]:
#df.query('station_name == "Leuchtturm Kiel"').info(verbose=True, show_counts=True)
#df.query('station_name == "Leuchtturm Kiel"').describe(include='all')

In [None]:
# 2024-11-07
# 2025-05-06

#start_date = '2024-11-07 00:00:00'
#end_date = '2024-11-08 00:00:00'

start_date = '2025-05-06 00:00:00'
end_date = '2025-05-07 00:00:00'

mask = (df['@timestamp'] > start_date) & (df['@timestamp'] <= end_date)
df = df.loc[mask]
df.info(verbose=True, show_counts=True)

In [None]:
df.head()

In [None]:
df['maximum_wind_speed_as_10_minutes_mean_during_last_hour.value'].plot(figsize=fig_size)

In [None]:
df['daily_mean_of_temperature_previous_day.value'].plot(figsize=fig_size)

In [None]:
df['minimum_of_temperature_for_previous_day.value'].plot(figsize=fig_size)

In [None]:
df['maximum_of_temperature_for_previous_day.value'].plot(figsize=fig_size)

In [None]:
df['maximum_wind_speed_last_hour.value'].plot(figsize=fig_size)

In [None]:
df['precipitation_amount_last_hour.value'].plot(figsize=fig_size)

In [None]:
df['relative_humidity.value'].plot(figsize=fig_size)

In [None]:
df['temperature_at_5_cm_above_ground.value'].plot(figsize=fig_size)

In [None]:
import re
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt


# --- 1) Ensure datetime index ---
if not isinstance(df.index, pd.DatetimeIndex):
    df.index = pd.to_datetime(df.index, errors="coerce")
df = df.sort_index()


# --- 2) Choose which numeric columns to plot ---
# Option A: plot *all* numeric columns
num_cols = df.select_dtypes(include="number").columns

# Option B (often nicer for your schema): plot only the ".value" numeric columns
# num_cols = [c for c in df.select_dtypes(include="number").columns if c.endswith(".value")]


def unit_for_value_col(frame: pd.DataFrame, value_col: str) -> str | None:
    """
    Try to find the matching unit column for a metric "<metric>.value" -> "<metric>.unit".
    Returns unit string (e.g., "Â°C") or None if not found/empty.
    """
    unit_col = value_col.replace(".value", ".unit") if value_col.endswith(".value") else None
    if unit_col and unit_col in frame.columns:
        u = frame[unit_col].dropna()
        if len(u) > 0:
            # unit column is object dtype -> string is fine
            return str(u.iloc[0]).strip() or None
    return None


# --- 3) Plot each numeric column, only if it has non-zero length after dropping NaNs ---
for col in num_cols:
    s = df[col].dropna()
    if len(s) == 0:   # "non zero length"
        continue

    unit = unit_for_value_col(df, col)
    ylabel = f"{col} [{unit}]" if unit else col

    fig, ax = plt.subplots(figsize=(8, 2))
    s.plot(ax=ax)

    ax.set_title(col)
    ax.set_xlabel("Time")
    ax.set_ylabel(ylabel)
    ax.grid(True, alpha=0.3)
    fig.autofmt_xdate()
    plt.tight_layout()
    plt.show()
