In [42]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from joblib import load
import holidays
from datetime import datetime

### Location Config

In [27]:
location = "lovrenska-jezera"
revision = 2

### Data Preprocessing

In [28]:
features = ["temperature", "rain", "month", "day_of_month", "day_of_week", "is_national_holiday", "is_school_holiday"]
target = "count"

In [29]:
hikers_data = pd.read_csv(f"../data/hikers/test/{location}.csv")
weather_data = pd.read_csv(f"../data/weather/aggregated/{location}.csv")

In [30]:
# List of national holidays
national_holidays = holidays.SI()

In [31]:
# List of school holidays and code to check if a date is holiday

school_holidays = [["2021-12-25", "2022-01-02"], ["2022-02-21", "2022-02-25"], ["2022-02-28", "2022-03-04"], ["2022-04-27", "2022-05-02"], ["2022-06-25", "2022-08-31"], ["2022-10-31", "2022-11-04"], ["2022-12-26", "2023-01-02"], ["2023-01-30", "2023-02-01"], ["2023-02-06", "2023-02-10"], ["2023-04-27", "2023-05-02"], ["2023-06-26", "2023-08-31"],["2023-10-30", "2023-11-3"], ["2023-12-25", "2024-01-02"], ["2024-02-19", "2024-02-23"], ["2024-02-26", "2023-03-01"], ["2024-4-27", "2023-05-02"], ["2024-06-26", "2024-08-31"]]
school_holidays = [[datetime.strptime(start, "%Y-%m-%d"), datetime.strptime(end, "%Y-%m-%d")] for start, end in school_holidays]

def is_school_holiday(date):
    for start, end in school_holidays:
        if start <= date <= end:
            return True
    return False

In [33]:
# Calculate our features and prepare data

def prepare_data(row):
    dt = datetime.fromisoformat(row["datetime"])
    row["month"] = dt.month
    row["day_of_month"] = dt.day
    row["day_of_week"] = dt.weekday()
    row["is_national_holiday"] = dt in national_holidays or dt.weekday() >= 5
    row["is_school_holiday"] = is_school_holiday(dt)

    if not (counts := hikers_data.loc[hikers_data["datum"] == row["datetime"]]).empty:
        row["count"] = counts["vhodi"].values[0] + counts["izhodi"].values[0]

    return row

data = weather_data.apply(prepare_data, axis=1).dropna()

In [34]:
# Data before filtering
data

Unnamed: 0,count,datetime,day_of_month,day_of_week,is_national_holiday,is_school_holiday,location,month,rain,temperature
177,318.0,2022-06-27,27,0,False,True,lovrenska-jezera,6,0.0,19.469444
178,66.0,2022-06-28,28,1,False,True,lovrenska-jezera,6,0.0,17.882639
179,214.0,2022-06-29,29,2,False,True,lovrenska-jezera,6,0.6,18.314583
180,288.0,2022-06-30,30,3,False,True,lovrenska-jezera,6,0.0,17.002778
181,379.0,2022-07-01,1,4,False,True,lovrenska-jezera,7,0.0,18.290278
...,...,...,...,...,...,...,...,...,...,...
815,9.0,2024-03-26,26,1,False,False,lovrenska-jezera,3,0.0,1.080556
816,43.0,2024-03-27,27,2,False,False,lovrenska-jezera,3,11.7,3.449306
817,864.0,2024-03-28,28,3,False,False,lovrenska-jezera,3,15.9,0.959722
818,965.0,2024-03-29,29,4,False,False,lovrenska-jezera,3,0.1,4.364583


In [35]:
# Prepare x and y data
x_data = data[features]
y_data = data[target]

In [36]:
x_data

Unnamed: 0,temperature,rain,month,day_of_month,day_of_week,is_national_holiday,is_school_holiday
177,19.469444,0.0,6,27,0,False,True
178,17.882639,0.0,6,28,1,False,True
179,18.314583,0.6,6,29,2,False,True
180,17.002778,0.0,6,30,3,False,True
181,18.290278,0.0,7,1,4,False,True
...,...,...,...,...,...,...,...
815,1.080556,0.0,3,26,1,False,False
816,3.449306,11.7,3,27,2,False,False
817,0.959722,15.9,3,28,3,False,False
818,4.364583,0.1,3,29,4,False,False


In [37]:
y_data

177    318.0
178     66.0
179    214.0
180    288.0
181    379.0
       ...  
815      9.0
816     43.0
817    864.0
818    965.0
819     30.0
Name: count, Length: 640, dtype: float64

### Prepare Model

In [38]:
class HikerPredictor(nn.Module):
    def __init__(self, input_size, hidden_size_1, hidden_size_2, output_size):
        super().__init__()

        self.fc1 = nn.Linear(input_size, hidden_size_1)
        self.relu1 = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size_2, output_size)
        self.softplus = nn.Softplus()

        self.double()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.softplus(x)
        return x

In [39]:
# Instantiate the model
input_size = len(features)
hidden_size_1 = 64
hidden_size_2 = 32
output_size = 1
model = HikerPredictor(input_size, hidden_size_1, hidden_size_2, output_size)

In [40]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

### Load Model

In [47]:
# Load our scaler
scaler = load(f"../models/{location}-{revision:0>2}.bin")

# Load our model
model.load_state_dict(torch.load(f"../models/{location}-{revision:0>2}.pth"))
model.eval()

HikerPredictor(
  (fc1): Linear(in_features=7, out_features=64, bias=True)
  (relu1): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (softplus): Softplus(beta=1, threshold=20)
)

### Evaluate Model

In [44]:
# Generate for each date
predictions = []

for _, neki in data.iterrows():
    neki2 = data.loc[data["datetime"] == neki["datetime"]]
    parameters = scaler.transform(neki2[features])
    with torch.no_grad():
        input_features = torch.tensor([parameters], dtype=torch.float64)
        predicted_count = model(input_features)
        predictions.append(f"{neki['datetime']},{float(predicted_count)}\n")

with open(f"../predictions/{location}-02.csv", "w") as file:
    file.writelines(predictions)