# Packages

In [1]:
import pandas as pd
from tqdm.notebook import tqdm
from functools import reduce

# Functions

In [2]:
def flatten(A):
    rt = []
    for i in A:
        if isinstance(i,list): rt.extend(flatten(i))
        else: rt.append(i)
    return rt

In [3]:
def uniquize(seq):
    seen = set()
    seen_add = seen.add
    return [x for x in seq if not (x in seen or seen_add(x))]

# Data Reading

In [10]:
# River discharge

gerinneabfluss = pd.read_csv('gerinneabfluss.txt', delim_whitespace=True, skiprows=1, header=None, names=['YY', 'MM', 'DD', 'HH', '11502', '10304', '20203'])
gerinneabfluss.drop(columns=['HH'], inplace=True)
gerinneabfluss['Date'] = pd.to_datetime(gerinneabfluss[['YY', 'MM', 'DD']].astype(str).agg('-'.join, axis=1), format='%Y-%m-%d', errors='coerce')
gerinneabfluss.drop(['YY', 'MM', 'DD', '10304', '20203'], axis=1) # 11502 - Iller/Kempten
gerinneabfluss = gerinneabfluss[['Date', '11502']]

In [11]:
# Radiation

glorad = pd.read_csv('glorad.txt', delim_whitespace=True, skiprows=1, header=None, names=['YY', 'MM', 'DD', 'HH', '11502', '10304', '20203'])
glorad.drop(columns=['HH'], inplace=True)
glorad['Date'] = pd.to_datetime(glorad[['YY', 'MM', 'DD']].astype(str).agg('-'.join, axis=1), format='%Y-%m-%d', errors='coerce')
glorad.drop(['YY', 'MM', 'DD', '10304', '20203'], axis=1) # 11502 - Iller/Kempten
glorad = glorad[['Date', '11502']]

In [12]:
# Air temperature

airtmp = pd.read_csv('airtmp.txt', delim_whitespace=True, skiprows=1, header=None, names=['YY', 'MM', 'DD', 'HH', '11502', '10304', '20203'])
airtmp.drop(columns=['HH'], inplace=True)
airtmp['Date'] = pd.to_datetime(airtmp[['YY', 'MM', 'DD']].astype(str).agg('-'.join, axis=1), format='%Y-%m-%d', errors='coerce')
airtmp.drop(['YY', 'MM', 'DD', '10304', '20203'], axis=1) # 11502 - Iller/Kempten
airtmp = airtmp[['Date', '11502']]

In [13]:
# Relative humidity

relhum = pd.read_csv('relhum.txt', delim_whitespace=True, skiprows=1, header=None, names=['YY', 'MM', 'DD', 'HH', '11502', '10304', '20203'])
relhum.drop(columns=['HH'], inplace=True)
relhum['Date'] = pd.to_datetime(relhum[['YY', 'MM', 'DD']].astype(str).agg('-'.join, axis=1), format='%Y-%m-%d', errors='coerce')
relhum.drop(['YY', 'MM', 'DD', '10304', '20203'], axis=1) # 11502 - Iller/Kempten
relhum = relhum[['Date', '11502']]

In [14]:
# Precipitation

precip = pd.read_csv('precip.txt', delim_whitespace=True, skiprows=1, header=None, names=['YY', 'MM', 'DD', 'HH', '11502', '10304', '20203'])
precip.drop(columns=['HH'], inplace=True)
precip['Date'] = pd.to_datetime(precip[['YY', 'MM', 'DD']].astype(str).agg('-'.join, axis=1), format='%Y-%m-%d', errors='coerce')
precip.drop(['YY', 'MM', 'DD', '10304', '20203'], axis=1) # 11502 - Iller/Kempten
precip = precip[['Date', '11502']]

# Data Preprocessing

## Groupby Monthly & Fix Day

In [15]:
datasets = {'precip': precip, 'relhum': relhum, 'airtmp': airtmp, 'glorad': glorad, 'gerinneabfluss': gerinneabfluss}

monthly_results = {}

for var_name, df in datasets.items():
    monthly_df = df.groupby(df.set_index('Date').index.to_period("M")).mean().drop(columns=['Date']).reset_index()
    monthly_df['Date'] = pd.to_datetime(monthly_df['Date'].dt.strftime('%Y-%m') + '-15').dt.date
    
    # Set the result in the dictionary
    monthly_results[var_name + '_monthly'] = monthly_df

precip_monthly = monthly_results['precip_monthly']
relhum_monthly = monthly_results['relhum_monthly']
airtmp_monthly = monthly_results['airtmp_monthly']
glorad_monthly = monthly_results['glorad_monthly']
gerinneabfluss_monthly = monthly_results['gerinneabfluss_monthly']
    
precip_monthly.rename(columns={'11502': 'Precipitation (mm)'}, inplace=True)
relhum_monthly.rename(columns={'11502': 'Relative Humidity (x100%)'}, inplace=True)
airtmp_monthly.rename(columns={'11502': 'Air Temperature'}, inplace=True)
glorad_monthly.rename(columns={'11502': 'Radiation (W/m²)'}, inplace=True)
gerinneabfluss_monthly.rename(columns={'11502': 'River Discharge (m³/s)'}, inplace=True)

dataframes_monthly = [airtmp_monthly, relhum_monthly, precip_monthly, glorad_monthly, gerinneabfluss_monthly]

# Round numerical values of each df iteratively

for df in dataframes_monthly:
    df.iloc[:, 1:] = df.iloc[:, 1:].apply(lambda x: round(x, 3))

# Concatenate & Store Final Data

In [18]:
result_df = reduce(lambda left, right: pd.merge(left, right, on='Date'), dataframes_monthly)
result_df['Date'] = pd.to_datetime(result_df['Date'])
result_df['Date'] = result_df['Date'].dt.strftime('%d/%m/%Y')

In [20]:
result_df.to_csv('MeteorologicalDataFull.csv', encoding='utf-8', index=False)