# Imports

In [2]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
os.environ["KERAS_BACKEND"] = "jax"
from sklearn.metrics import mean_squared_error
import keras

# First try
## Preprocessing and data import

In [4]:
df = pd.read_csv("../data/finalData2.csv")
df['time'] = pd.to_datetime(df['time'])
df.drop(['oerlikon', 'time', 'Unnamed: 0'], inplace=True, axis = 1)
X,y = df.drop('city', axis = 1), df['city']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

Unnamed: 0,city,weekday,minutes_since_midnight,opened,temperature_2m (°C),relative_humidity_2m (%),apparent_temperature (°C),precipitation (mm),cloud_cover (%),wind_speed_10m (km/h),shortwave_radiation (W/m²),is_day ()
0,155,2,1003,True,11.496778,74.594444,7.112444,0.243778,100.0,25.006,49.373333,1.0
1,154,2,1003,True,11.490944,74.636111,7.109111,0.245444,100.0,24.991,49.273333,1.0
2,163,2,1005,True,11.468972,74.793056,7.096556,0.251722,100.0,24.9345,48.896667,1.0
3,163,2,1006,True,11.462944,74.836111,7.093111,0.253444,100.0,24.919,48.793333,1.0
4,162,2,1006,True,11.457111,74.877778,7.089778,0.255111,100.0,24.904,48.693333,1.0


## Model architecture
Just a simple feedforward neural network

In [5]:
model = keras.Sequential()
model.add(keras.layers.InputLayer(shape=(X_train.shape[1],)))
model.add(keras.layers.Dense(1024, activation="relu"))
model.add(keras.layers.Dense(512, activation="relu"))
model.add(keras.layers.Dense(512, activation="relu"))
model.add(keras.layers.Dense(512, activation="relu"))
model.add(keras.layers.Dense(256, activation="relu"))
model.add(keras.layers.Dense(128, activation="relu"))
model.add(keras.layers.Dense(65, activation="relu"))
model.add(keras.layers.Dense(1, activation="linear"))
model.summary()

## Training settings

In [6]:
model.compile(
    loss=keras.losses.MeanSquaredError(name="MSE"),
    optimizer=keras.optimizers.Adam(learning_rate=1e-4)
)

callbacks = [
    keras.callbacks.ModelCheckpoint(filepath="../models/model_at_epoch_{epoch}.keras"),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=4),
]


## Training

In [7]:

batch_size = 1000
epochs = 100
model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.15,
    callbacks=callbacks,
)

[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 66ms/step - loss: 447.3337 - val_loss: 520.3091
Epoch 29/100
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 68ms/step - loss: 433.5539 - val_loss: 425.6593
Epoch 30/100
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 70ms/step - loss: 407.4283 - val_loss: 404.5194
Epoch 31/100
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 70ms/step - loss: 404.4570 - val_loss: 525.0416
Epoch 32/100
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 102ms/step - loss: 458.2545 - val_loss: 419.2563
Epoch 33/100
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 174ms/step - loss: 380.7608 - val_loss: 406.8996
Epoch 34/100
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 208ms/step - loss: 372.0610 - val_loss: 355.7110
Epoch 35/100
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 126ms/step - loss: 363.8622 - val_

<keras.src.callbacks.history.History at 0x1fc5250e8d0>

In [8]:
model.save("../models/NN-all_features.keras")

In [9]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(mse)

[1m4258/4258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step
463.3445786013388


# Second try
## Correcting for correlated features  

Unnamed: 0,Feature1,Feature2,cor,abs_cor
28,temperature_2m (°C),relative_humidity_2m (%),-0.580931,0.580931
35,relative_humidity_2m (%),temperature_2m (°C),-0.580931,0.580931
34,relative_humidity_2m (%),opened,-0.417728,0.417728
20,opened,relative_humidity_2m (%),-0.417728,0.417728
10,minutes_since_midnight,opened,0.410093,0.410093
...,...,...,...,...
27,,,,
36,,,,
45,,,,
54,,,,
