# Imports

In [4]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
os.environ["KERAS_BACKEND"] = "jax"
from sklearn.metrics import mean_squared_error
import keras
import shap

# First try
## Preprocessing and data import

In [11]:
df = pd.read_csv("../data/uncor_features.csv")
X,y = df.drop('city', axis = 1), df['city']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


## Model architecture
Just a simple feedforward neural network

In [9]:
model = keras.Sequential()
model.add(keras.layers.InputLayer(shape=(X_train.shape[1],)))
model.add(keras.layers.Dense(1024, activation="relu"))
model.add(keras.layers.Dense(512, activation="relu"))
model.add(keras.layers.Dense(256, activation="relu"))
model.add(keras.layers.Dense(128, activation="relu"))
model.add(keras.layers.Dense(65, activation="relu"))
model.add(keras.layers.Dense(1, activation="linear"))
model.summary()

## Training settings

In [10]:
model.compile(
    loss=keras.losses.MeanSquaredError(name="MSE"),
    optimizer=keras.optimizers.Adam(learning_rate=1e-4)
)

callbacks = [
    # keras.callbacks.ModelCheckpoint(filepath="models/model_at_epoch_{epoch}.keras"),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=4),
]


## Training

In [11]:

batch_size = 1000
epochs = 50
model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.25,
    callbacks=callbacks,
)

[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - loss: 773.8617 - val_loss: 707.7769
Epoch 46/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 799.3095 - val_loss: 732.1503
Epoch 47/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 708.1481 - val_loss: 680.7892
Epoch 48/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 740.1608 - val_loss: 766.0272
Epoch 49/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 787.9553 - val_loss: 756.0521
Epoch 50/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - loss: 745.8737 - val_loss: 744.3831


<keras.src.callbacks.history.History at 0x11a703b2290>

In [13]:
model.save("../models/uncorrel.keras")

In [10]:
model = keras.models.load_model("../models/uncorrel.keras")
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"{mse:3}")

[1m4258/4258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 796us/step
730.7110084732269


# Second try
## Correcting for correlated features  

In [12]:
X_test.head()

Unnamed: 0,weekday,minutes_since_midnight,apparent_temperature (°C),precipitation (mm),cloud_cover (%)
289950,6,395,10.434111,0.482944,97.511667
136749,1,841,5.3705,0.1,100.0
41685,3,425,4.419167,0.0,97.125
16460,1,664,-0.974167,0.0,100.0
254929,1,16,-0.393083,0.1,97.5175
