# Generate synthetic hourly data set from daily extreme data (CHMU)


In [1]:
import datetime
import math
import numpy as np
import pandas as pd

In [2]:
station = "Turany"
start_year = 2016
end_year = 2021

In [3]:
def get_factor(x: float) -> float:
    """ 
    For input on the range of [0, 1], return factor on the range of [0, 1] with "smooth" sine
    shape such that 0 -> 0, 1 -> 1, 0.5 -> 0.5.
    """
    input = (1 - x) * math.pi
    output = math.cos(input)
    # Output is on the range of [-1, 1], rescale to [0, 1]:
    return (output + 1) / 2

def read_temperature_csv(filename, metric):
    df = pd.read_csv(filename, delimiter=";", decimal=",")
    df['date'] = pd.to_datetime({"year": df["Year"], "month": df["Month"], "day": df["Day"]})
    df.set_index('date', inplace=True)
    val = df.loc[(start_year <= val.index.year) & (val.index.year <= end_year), ['Value']]
    return val.rename(columns={'Value': metric})


min = read_temperature_csv(station + "-min.csv", "min")
max = read_temperature_csv(station + "-max.csv", "max")
df = min.join(max, how='inner')
df['next_min'] = df['min'].shift(-1)
df['last_max'] = df['max'].shift(1)
df.fillna({'next_min': df['min'], 'last_max': df['max']}, inplace=True)

# Create a synthetic table of hourly factors (min value at 5:00, max value at 15:00).
factors_15_23 = [get_factor(0.071 * x) for x in range(9)]
factors_0_4 = [get_factor(0.071 * (x + 9)) for x in range(5)]
factors_6_14 = [get_factor(1 - 0.1 * x) for x in range(10)]
factor_min = factors_0_4 + factors_6_14 + [0 for x in range(9)]
factor_next_min = [0 for x in range(15)] + factors_15_23
factor_last_max = [1 - x for x in factors_0_4] + [0 for x in range(19)]
factor_max = [0 for x in range(5)] + [1 - x for x in factors_6_14] + [1 - x for x in factors_15_23]
daily = pd.DataFrame({'hour': range(24), 'factor_min': factor_min, 'factor_next_min': factor_next_min,
                     'factor_last_max': factor_last_max, 'factor_max': factor_max})

# Create the synthetic temperature record as a cross product.
cross = df.reset_index().merge(daily, how="cross")
# With temperature being calculated from min and max
cross["temperature"] = cross["min"] * cross["factor_min"] + cross["next_min"] * cross["factor_next_min"] \
    + cross["last_max"] * cross["factor_last_max"] + cross["max"] * cross["factor_max"]
cross['datetime'] = pd.to_datetime(cross['date'].apply(str) + " " + cross['hour'].apply(str) + ":00")
cross.set_index('datetime', inplace=True)
result = cross.loc[:, ['temperature']]
result.to_csv(station + '.csv', float_format='%.2f')
