In [8]:
import json
import os
from datetime import timedelta

import pandas as pd

In [9]:
target_gauge_id = 3465
predict_next_hours = 24
use_last_hours = 72

In [10]:
with open("../dataset/hydro/network.json", encoding="utf-8") as file:
    network = json.load(file)[str(target_gauge_id)]
    network.insert(0, [target_gauge_id, 0.0])

In [11]:
dataframes = []
distances = []

for gauge, distance in network:
    df = pd.read_csv(f"../dataset/hydro/aggregated/{gauge}.csv")
    df["datum"] = pd.to_datetime(df["datum"])
    df.set_index("datum", inplace=True)
    dataframes.append(df["H"])
    distances.append(distance)

In [12]:
columns = ["DATE", "TARGET"]
columns.extend([f"DISTANCE_{i}" for i in range(len(distances))])
columns.extend([f"LEVEL_{i}_{j}" for i in range(len(dataframes)) for j in range(use_last_hours)])

data = []

range_start = use_last_hours
range_end = -predict_next_hours if predict_next_hours else None

for date in dataframes[0].index[range_start:range_end]:
    values = [date, dataframes[0][date + timedelta(hours=predict_next_hours)], *distances]
    for i, df in enumerate(dataframes):
        values.extend(df[df.index <= date].tail(use_last_hours))
    data.append(values)

data = pd.DataFrame(data, columns=columns)
data.set_index("DATE", inplace=True)

In [13]:
data

Unnamed: 0_level_0,TARGET,DISTANCE_0,DISTANCE_1,DISTANCE_2,DISTANCE_3,DISTANCE_4,DISTANCE_5,DISTANCE_6,DISTANCE_7,DISTANCE_8,...,LEVEL_9_62,LEVEL_9_63,LEVEL_9_64,LEVEL_9_65,LEVEL_9_66,LEVEL_9_67,LEVEL_9_68,LEVEL_9_69,LEVEL_9_70,LEVEL_9_71
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-04 00:00:00,150.000000,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,...,78.500000,78.333333,80.166667,81.666667,81.666667,82.500000,84.166667,79.000000,71.166667,69.166667
2023-01-04 01:00:00,143.833333,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,...,78.333333,80.166667,81.666667,81.666667,82.500000,84.166667,79.000000,71.166667,69.166667,68.333333
2023-01-04 02:00:00,137.000000,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,...,80.166667,81.666667,81.666667,82.500000,84.166667,79.000000,71.166667,69.166667,68.333333,62.166667
2023-01-04 03:00:00,131.666667,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,...,81.666667,81.666667,82.500000,84.166667,79.000000,71.166667,69.166667,68.333333,62.166667,59.500000
2023-01-04 04:00:00,127.666667,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,...,81.666667,82.500000,84.166667,79.000000,71.166667,69.166667,68.333333,62.166667,59.500000,59.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-29 19:00:00,138.666667,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,...,89.333333,88.000000,87.000000,83.666667,79.833333,79.333333,79.000000,79.000000,80.833333,84.166667
2024-04-29 20:00:00,139.166667,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,...,88.000000,87.000000,83.666667,79.833333,79.333333,79.000000,79.000000,80.833333,84.166667,86.000000
2024-04-29 21:00:00,140.166667,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,...,87.000000,83.666667,79.833333,79.333333,79.000000,79.000000,80.833333,84.166667,86.000000,86.000000
2024-04-29 22:00:00,141.000000,0.0,47.974465,47.701469,23.750652,29.836259,11.215813,21.741801,40.931579,7.65356,...,83.666667,79.833333,79.333333,79.000000,79.000000,80.833333,84.166667,86.000000,86.000000,81.833333


In [15]:
directory = f"../dataset/processed/simple/{target_gauge_id}"
os.makedirs(directory, exist_ok=True)
data.to_csv(f"{directory}/{predict_next_hours}.gz", compression="gzip")