In [582]:
from tqdm.notebook import tqdm
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import os
import json
import re
import numpy as np
import ast

In [583]:
# Dresden  Station ID = 1048 , Stations-Kennziffer 10488 (Klotzsche)
# Freiburg: Station ID - 1443, 10803  - ? 
 
# 10 minute data https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/

In [584]:
# Columns
# TT_10 temperature
# RF_10 relative humidity 
# RWS_10 Sum of the precipitation height of the previous 10 minutes
# FF_10 mean wind speed during the previous 10 minutes



# Dresden

In [585]:
df_air_DD = pd.read_csv("../data/weather/Dresden/Dresden temperature.txt", sep=";")

In [None]:
df_air_DD.head()

In [None]:
df_air_DD.describe()

In [588]:
df_precip_DD = pd.read_csv("../data/weather/Dresden/Dresden precipitation.txt", sep=";")

In [None]:
df_precip_DD.head()

In [None]:
df_precip_DD.describe()

In [591]:
df_wind_DD = pd.read_csv("../data/weather/Dresden/Dresden wind.txt", sep=";")

In [None]:
df_wind_DD.head()

In [None]:
for df in [df_air_DD, df_precip_DD, df_wind_DD]:
    df.columns = [col.strip() for col in df.columns]
    df["MESS_DATUM"] = pd.to_datetime(df["MESS_DATUM"], format= "%Y%m%d%H%M")
    df.drop(columns=["QN", "eor"], inplace=True, errors="ignore")
    print(len(df))

In [None]:
df_air_DD

In [595]:
df_tmp = pd.merge(df_air_DD, df_precip_DD, on=["MESS_DATUM", "STATIONS_ID"])

In [596]:
df_DD = pd.merge(df_tmp, df_wind_DD, on=["MESS_DATUM", "STATIONS_ID"])

In [None]:
len(df_DD)

In [598]:
assert len(df_DD.STATIONS_ID.unique().tolist())==1

In [None]:
df_DD["MESS_DATUM"].min()

In [None]:
df_DD["MESS_DATUM"].max()

In [None]:
pd.to_datetime("2024-01-01")

In [None]:
DD_dt_flt = ((df_DD["MESS_DATUM"] >= "2024-01-01") & (df_DD["MESS_DATUM"] < "2024-04-01")) | \
 ((df_DD["MESS_DATUM"] >= "2024-09-01") & (df_DD["MESS_DATUM"] < "2024-11-01")) 
len(df_DD[DD_dt_flt])

In [None]:
len(df_DD)

In [604]:
df_DD = df_DD[DD_dt_flt]

In [None]:
df_DD

In [606]:
df_DD = df_DD[["MESS_DATUM", "TT_10", "RF_10", "RWS_10", "FF_10"]]

In [607]:
#df_DD['city']="Dresden"

In [None]:
df_DD.head()

# Freiburg

In [609]:
df_air_recent_FB = pd.read_csv("../data/weather/Freiburg/Freiburg recent temperature.txt", sep=";")

In [None]:
df_air_recent_FB.head()

In [611]:
df_air_hist_FB = pd.read_csv("../data/weather/Freiburg/Freiburg historical air temperature.txt", sep=";")

In [612]:
df_air_FB = pd.concat([df_air_recent_FB, df_air_hist_FB])

In [613]:
df_air_FB = df_air_FB.drop_duplicates()

In [None]:
df_air_FB.head()

In [615]:
df_precip_FB_recent = pd.read_csv("../data/weather/Freiburg/Freiburg recent precipitation.txt", sep=";")

In [616]:
df_precip_FB_hist = pd.read_csv("../data/weather/Freiburg/Freiburg historical precipitation.txt", sep=";")

In [617]:
df_precip_FB = pd.concat([df_precip_FB_recent, df_precip_FB_hist])

In [None]:
len(df_precip_FB)

In [619]:
df_precip_FB = df_precip_FB.drop_duplicates()

In [620]:
df_wind_FB_recent = pd.read_csv("../data/weather/Freiburg/Freiburg recent wind.txt", sep=";")

In [621]:
df_wind_FB_hist = pd.read_csv("../data/weather/Freiburg/Freiburg historical wind.txt", sep=";")

In [622]:
df_wind_FB = pd.concat([df_wind_FB_recent, df_wind_FB_hist])

In [623]:
df_wind_FB = df_wind_FB.drop_duplicates()

In [None]:
for df in [df_air_FB, df_precip_FB, df_wind_FB]:
    df.columns = [col.strip() for col in df.columns]
    df["MESS_DATUM"] = pd.to_datetime(df["MESS_DATUM"], format= "%Y%m%d%H%M")
    df.drop(columns=["QN", "eor"], inplace=True, errors="ignore")
    print(len(df))

In [625]:
df_tmp = pd.merge(df_air_FB, df_precip_FB, on=["MESS_DATUM", "STATIONS_ID"])

In [626]:
df_FB = pd.merge(df_tmp, df_wind_FB, on=["MESS_DATUM", "STATIONS_ID"])

In [None]:
df_FB.head()

In [628]:
assert len(df_FB.STATIONS_ID.unique().tolist())==1

In [None]:
df_FB["MESS_DATUM"].min()

In [None]:
df_FB["MESS_DATUM"].max()

In [None]:
FB_dt_flt = ((df_FB["MESS_DATUM"] >= "2023-06-01") & (df_FB["MESS_DATUM"] < "2023-08-01")) | \
 ((df_FB["MESS_DATUM"] >= "2024-09-01") & (df_FB["MESS_DATUM"] < "2024-11-01")) 
len(df_FB[FB_dt_flt])

In [None]:
len(df_FB)

In [633]:
df_FB = df_FB[FB_dt_flt]

In [None]:
df_FB

In [635]:
df_FB = df_FB[["MESS_DATUM", "TT_10", "RF_10", "RWS_10", "FF_10"]]

In [636]:
# df_FB['city']="Freiburg"

# unite

In [None]:
df_FB.head()

In [None]:
df_DD.head()

In [639]:
col_renaming = { "MESS_DATUM": "Date", "TT_10": "Temperature", "RF_10": "Humidity", "RWS_10": "Precipitation", "FF_10": "Wind" }

In [None]:
df_DD.rename(columns=col_renaming, inplace=True)

In [None]:
df_FB.rename(columns=col_renaming, inplace=True)

In [642]:
df_DD.set_index("Date", inplace=True)

In [643]:
df_FB.set_index("Date", inplace=True)

In [None]:
len(df_DD)

In [None]:
21888/14640

In [646]:
grouped = df_DD.resample("30min").mean()

In [647]:
grouped_FB = df_FB.resample("30min").mean()

In [None]:
FB_dt_flt = ((grouped_FB.index >= "2023-06-01") & (grouped_FB.index < "2023-08-01")) | \
 ((grouped_FB.index >= "2024-09-01") & (grouped_FB.index < "2024-11-01")) 
len(grouped_FB[FB_dt_flt])

In [649]:
grouped_FB = grouped_FB[FB_dt_flt]

In [None]:
grouped_FB.describe()

In [None]:
grouped_FB[grouped_FB.Precipitation == -999]

In [652]:
assert len(df_FB)/len(grouped_FB)==3

In [None]:
grouped_FB.tail()

In [None]:
len(df_DD.resample("30min").mean())

In [None]:
21888/3

In [None]:
df_DD.head()

In [None]:
df_DD.resample("30min").mean()

In [658]:
DD_dt_flt = ((grouped.index >= "2024-01-01") ) & (grouped.index < "2024-04-01") | \
 ((grouped.index >= "2024-09-01") & (grouped.index < "2024-11-01")) 
len(grouped[DD_dt_flt])
grouped = grouped[DD_dt_flt]

In [659]:
assert len(df_DD)/len(grouped)==3

In [79]:
df_DD

Unnamed: 0_level_0,Temperature,Humidity,Precipitation,Wind
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01-01 00:00:00,5.3,77.6,0.0,3.5
2024-01-01 00:10:00,5.3,77.8,0.0,3.8
2024-01-01 00:20:00,5.2,78.4,0.0,3.6
2024-01-01 00:30:00,5.2,78.3,0.0,3.6
2024-01-01 00:40:00,5.7,75.7,0.0,3.1
...,...,...,...,...
2024-10-31 23:10:00,8.3,91.9,0.0,3.5
2024-10-31 23:20:00,8.0,92.3,0.0,3.5
2024-10-31 23:30:00,8.0,92.7,0.0,3.5
2024-10-31 23:40:00,7.8,92.9,0.0,3.6


In [660]:
time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
grouped.to_pickle(f"../data/weather/df_Dresden_weather {time}.pkl")
grouped.to_csv(f"../data/weather/df_Dresden_weather {time}.csv")

grouped_FB.to_pickle(f"../data/weather/df_Freiburg_weather {time}.pkl")
grouped_FB.to_csv(f"../data/weather/df_Freiburg_weather {time}.csv")

