In [35]:
# Imports

%pip install seaborn
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

Note: you may need to restart the kernel to use updated packages.


In [36]:
df_wetter_dwd = pd.read_csv('additional-data/Klimadaten_DWD_Kiel_Holtenau.csv')

In [37]:
# robust konvertieren: erst str, dann format='%Y%m%d'
df_wetter_dwd['MESS_DATUM'] = pd.to_datetime(
    df_wetter_dwd['MESS_DATUM'].astype(str),
    format='%Y%m%d',
    errors='coerce'
)
# optional: auf Tagesbeginn normalisieren (entfernt Uhrzeiten)
df_wetter_dwd['MESS_DATUM'] = df_wetter_dwd['MESS_DATUM'].dt.normalize()

# Kontrolle
print("NaT nach konvertierung:", df_wetter_dwd['MESS_DATUM'].isna().sum())
print("min/max after:", df_wetter_dwd['MESS_DATUM'].min(), df_wetter_dwd['MESS_DATUM'].max())

NaT nach konvertierung: 0
min/max after: 1974-01-01 00:00:00 2024-12-31 00:00:00


In [45]:
# Inspect
print(df_wetter_dwd.head(8))
print("dtype:", df_wetter_dwd['MESS_DATUM'].dtype)
print("NaT count:", df_wetter_dwd['MESS_DATUM'].isna().sum())
# Länge der Einträge (hilft zu sehen ob z.B. 20180801 als int kommt)
print(df_wetter_dwd['MESS_DATUM'].astype(str).str.len().value_counts().head())
# Falls schon datetime: min/max
try:
    print("min/max:", df_wetter_dwd['MESS_DATUM'].min(), df_wetter_dwd['MESS_DATUM'].max())
except Exception as e:
    print("min/max check failed:", e)


print(df_wetter_dwd.columns)


       STATIONS_ID MESS_DATUM  QN_3     FX     FM  QN_4   RSK  RSKF   SDK  \
16144         2564 2018-08-01    10    7.7    3.0     9   0.0     0   9.8   
16145         2564 2018-08-02    10    7.2    2.3     9   0.0     0  13.8   
16146         2564 2018-08-03    10    7.3    2.4     9   0.0     0  14.1   
16147         2564 2018-08-04    10   10.3    3.5     9   0.0     0   7.9   
16148         2564 2018-08-05    10   11.3    4.5     9   0.0     6   4.4   
16149         2564 2018-08-06    10 -999.0 -999.0     9   0.0     0  13.0   
16150         2564 2018-08-07    10    7.3    2.2     9   0.0     0  14.1   
16151         2564 2018-08-08    10   13.7    3.4     9   0.0     0   6.6   

       SHK_TAG     NM    VPM      PM   TMK    UPM   TXK   TNK   TGK  eor  
16144     -999    2.4   16.5  1018.0  20.8   68.0  25.6  16.0  14.5  eor  
16145     -999    0.6   15.6  1020.0  21.8   63.0  27.9  14.1  12.2  eor  
16146     -999    0.4   16.8  1018.3  22.8   63.0  30.5  14.9  13.2  eor  
16147 

In [47]:
df_wetter_dwd.columns = df_wetter_dwd.columns.str.strip()

df_wetter_dwd = df_wetter_dwd[(df_wetter_dwd['MESS_DATUM'] >= '2018-08-01') & (df_wetter_dwd['MESS_DATUM'] <= '2019-07-30')]

df_wetter_dwd = df_wetter_dwd[['MESS_DATUM','TMK','SDK','RSK']]

print(df_wetter_dwd.head())


      MESS_DATUM   TMK   SDK  RSK
16144 2018-08-01  20.8   9.8  0.0
16145 2018-08-02  21.8  13.8  0.0
16146 2018-08-03  22.8  14.1  0.0
16147 2018-08-04  21.6   7.9  0.0
16148 2018-08-05  18.5   4.4  0.0


In [48]:
# Diagnose & Vorbereitung: Vereinheitliche df_wetter_dwd und berechne Tagesmittel (Temperatur_DWD)
import os

# Falls df_wetter_dwd nicht definiert ist, versuche die Datei zu laden
if 'df_wetter_dwd' not in globals():
    filepath = 'additional-data/Klimadaten_DWD_Kiel_Holtenau.csv'
    if os.path.exists(filepath):
        print(f'Loading {filepath} into df_wetter_dwd')
        df_wetter_dwd = pd.read_csv(filepath)
    else:
        raise RuntimeError(f"df_wetter_dwd is not defined and file '{filepath}' not found")

# Zeige Spalten und Datentypen zur Diagnose
print('Columns in df_wetter_dwd:', list(df_wetter_dwd.columns))
print(df_wetter_dwd.dtypes)
display(df_wetter_dwd.head(5))

# Flexible Erkennung von Datums- und Temperaturspalte
date_candidates = ['MESS_DATUM', 'MESS_DATUM_UTC', 'Datum', 'date']
temp_candidates = ['TMK', 'Temperatur', 'Temperatur_DWD', 't']

date_col = next((c for c in date_candidates if c in df_wetter_dwd.columns), None)
temp_col = next((c for c in temp_candidates if c in df_wetter_dwd.columns), None)
if date_col is None:
    raise KeyError(f'No date column found. Available: {list(df_wetter_dwd.columns)}')
if temp_col is None:
    raise KeyError(f'No temperature column found. Available: {list(df_wetter_dwd.columns)}')

print(f'Using date_col={date_col}, temp_col={temp_col}')

# Wähle nur die relevanten Spalten und benenne für Konsistenz um
df_wetter_dwd = df_wetter_dwd[[date_col, temp_col]].copy()
df_wetter_dwd.rename(columns={date_col: 'Datum', temp_col: 'Temperatur_DWD'}, inplace=True)

# Datum robust konvertieren (MESS_DATUM oft YYYYMMDD als Zahl)
df_wetter_dwd['Datum'] = pd.to_datetime(df_wetter_dwd['Datum'].astype(str), format='%Y%m%d', errors='coerce')
mask_na_dates = df_wetter_dwd['Datum'].isna()
if mask_na_dates.any():
    # Fallback: generische Konvertierung für die verbleibenden
    df_wetter_dwd.loc[mask_na_dates, 'Datum'] = pd.to_datetime(df_wetter_dwd.loc[mask_na_dates, 'Datum'], errors='coerce')

# Normalisiere auf Tagesbeginn für sicheres Mapping
df_wetter_dwd['Datum'] = df_wetter_dwd['Datum'].dt.normalize()

print('NaT in Datum:', df_wetter_dwd['Datum'].isna().sum())
print('NaN in Temperatur_DWD:', df_wetter_dwd['Temperatur_DWD'].isna().sum())
display(df_wetter_dwd.head(10))

# Tagesmittel bilden (falls mehrere Messwerte pro Tag existieren)
temp_by_date = df_wetter_dwd.groupby('Datum')['Temperatur_DWD'].mean()
print('temp_by_date sample:')
print(temp_by_date.head(8))
print('df_wetter_dwd prepared.')

Columns in df_wetter_dwd: ['MESS_DATUM', 'TMK', 'SDK', 'RSK']
MESS_DATUM    datetime64[ns]
TMK                  float64
SDK                  float64
RSK                  float64
dtype: object


Unnamed: 0,MESS_DATUM,TMK,SDK,RSK
16144,2018-08-01,20.8,9.8,0.0
16145,2018-08-02,21.8,13.8,0.0
16146,2018-08-03,22.8,14.1,0.0
16147,2018-08-04,21.6,7.9,0.0
16148,2018-08-05,18.5,4.4,0.0


Using date_col=MESS_DATUM, temp_col=TMK
NaT in Datum: 364
NaN in Temperatur_DWD: 0


Unnamed: 0,Datum,Temperatur_DWD
16144,NaT,20.8
16145,NaT,21.8
16146,NaT,22.8
16147,NaT,21.6
16148,NaT,18.5
16149,NaT,20.0
16150,NaT,26.0
16151,NaT,24.6
16152,NaT,21.0
16153,NaT,17.2


temp_by_date sample:
Series([], Name: Temperatur_DWD, dtype: float64)
df_wetter_dwd prepared.
