In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch.nn as nn
import torch 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from tqdm import tqdm 
from training_functions import train_model
from sklearn.metrics import mean_squared_error

In [None]:
from torch.utils.data import DataLoader, TensorDataset
from model import preprocess
from model import lstm

In [4]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)

cuda


Data Processing

In [5]:
df = pd.read_csv('food_price_inflation.csv')
print(df.head())
print(df.shape)

  REF_AREA REF_AREA_LABEL TIME_PERIOD  OBS_VALUE
0      AFG    Afghanistan  2001-01-01  22.943765
1      AFG    Afghanistan  2001-02-01  24.357903
2      AFG    Afghanistan  2001-03-01  21.202933
3      AFG    Afghanistan  2001-04-01  17.301832
4      AFG    Afghanistan  2001-05-01   9.334624
(59839, 4)


In [6]:
# we keep only data in France 
df = df[df['REF_AREA_LABEL'] == 'France']
df = df.drop('REF_AREA', axis=1)
df = df.drop('REF_AREA_LABEL', axis=1)

print(df.head())
print(df.shape)

      TIME_PERIOD  OBS_VALUE
19470  2001-01-01   3.542484
19471  2001-02-01   3.555150
19472  2001-03-01   4.911412
19473  2001-04-01   5.140790
19474  2001-05-01   6.648663
(294, 2)


Training

In [7]:
# preprocessing 
X_train_scaled,X_val_scaled,X_test_scaled,y_train,y_val,y_test = preprocess(df, split_ratio_train_test=0.8, split_ratio_train_valid=0.8, device=device)
input_size = X_train_scaled.shape[2]                             

In [8]:
# training's hyperparameters
model = lstm(input_size=input_size, mode = 'relu').to(device)
criterion = nn.MSELoss()
learning_rate = 0.001
num_epochs = 200
batch_size = 32
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_set = TensorDataset(X_train_scaled, y_train)
train_loader = DataLoader(train_set, batch_size=batch_size)

val_set = TensorDataset(X_val_scaled, y_val)
val_loader = DataLoader(train_set, batch_size=batch_size)

test_set = TensorDataset(X_test_scaled, y_test)
test_loader = DataLoader(test_set, batch_size=batch_size)


In [9]:
history = train_model(model, criterion, optimizer, num_epochs,train_loader, val_loader)

200
Epoch 1/200, Training Loss: 4.6144, Validation Loss: 4.4219
Epoch 2/200, Training Loss: 4.3437, Validation Loss: 4.1628
Epoch 3/200, Training Loss: 4.0745, Validation Loss: 3.8483
Epoch 4/200, Training Loss: 3.7301, Validation Loss: 3.3848
Epoch 5/200, Training Loss: 3.2186, Validation Loss: 2.7209
Epoch 6/200, Training Loss: 2.7181, Validation Loss: 2.6474
Epoch 7/200, Training Loss: 2.6167, Validation Loss: 2.5429
Epoch 8/200, Training Loss: 2.5604, Validation Loss: 2.5372
Epoch 9/200, Training Loss: 2.5626, Validation Loss: 2.5239
Epoch 10/200, Training Loss: 2.5346, Validation Loss: 2.4925
Epoch 11/200, Training Loss: 2.5049, Validation Loss: 2.4803
Epoch 12/200, Training Loss: 2.4979, Validation Loss: 2.4678
Epoch 13/200, Training Loss: 2.4897, Validation Loss: 2.4520
Epoch 14/200, Training Loss: 2.4763, Validation Loss: 2.4388
Epoch 15/200, Training Loss: 2.4612, Validation Loss: 2.4255
Epoch 16/200, Training Loss: 2.4461, Validation Loss: 2.4125
Epoch 17/200, Training Loss: 

In [13]:
y_pred = model(X_test_scaled).cpu().detach().numpy()
mse = mean_squared_error(y_test.detach().cpu().numpy(), y_pred)
print(f'Test MSE: {mse}')
history["final_test_loss"]  = mse

Test MSE: 56.35398483276367
