In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import scipy
import seaborn as sns
from datetime import datetime, timedelta

from tqdm import tqdm

from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures

In [2]:
from transformer import Transformer
from preprocessing import *
from utils import *
from features import *

In [3]:
if torch.backends.mps.is_available():
    device = torch.device("mps")

In [4]:
torch.backends.mps.is_macos13_or_newer()

True

In [5]:
torch.mps.set_per_process_memory_fraction(.0)

In [6]:
torch.mps.driver_allocated_memory(), torch.mps.current_allocated_memory()

(393216, 0)

In [7]:
torch.mps.profiler.start()

In [8]:
raw_data = pd.read_csv('consumption_and_temperatures.csv')
raw_data['timestamp'] = pd.to_datetime(raw_data['timestamp'])

In [9]:
seq_len = 48
scale_output=True
target_column = 'NO1_consumption'

In [10]:
features_to_add = [
    (   
        pick_location_data,
        { 'loc': [1] }
    ),
    (
        add_season_columns, 
        {}
    ),
    # (
    #     shift_data, 
    #     {
    #         "shift_max": 10,
    #         "column_to_shift": "NO1_temperature",
    #         "new_column_name": "temp"
    #     }
    # ),
    (
        shift_data, 
        {   
            "shift_min": 24,
            "shift_max": 24,
            "column_to_shift": "NO1_consumption",
            "new_column_name": "consum"
        },
    ),
    # (
    #     change_timestamp_to_sin,
    #     {}
    # ),
    (
        add_hour_columns,
        {}
    )
]

In [11]:
for feature in features_to_add:
    print(feature)

(<function pick_location_data at 0x16a6013f0>, {'loc': [1]})
(<function add_season_columns at 0x16a601240>, {})
(<function shift_data at 0x16a601480>, {'shift_min': 24, 'shift_max': 24, 'column_to_shift': 'NO1_consumption', 'new_column_name': 'consum'})
(<function add_hour_columns at 0x16a6012d0>, {})


In [12]:
forecast_len=24

In [13]:
(X_train, y_train), (X_val, y_val), (X_test, y_test), (scalerInputMethod, scalerOutputMethod), (df_target, time_delta_shifting) = general_preprocessing(
        raw_data, 
        features_to_add=features_to_add,
        seq_len=seq_len,
        forecast_len=forecast_len,
        # test_start_index=len(raw_data) * 3 //4,
        scale_output=scale_output
    )

FEATURES:
 Index(['NO1_temperature', 'season_fall', 'season_spring', 'season_summer',
       'season_winter', 'consum_24_previous', 'hour_0', 'hour_1', 'hour_2',
       'hour_3', 'hour_4', 'hour_5', 'hour_6', 'hour_7', 'hour_8', 'hour_9',
       'hour_10', 'hour_11', 'hour_12', 'hour_13', 'hour_14', 'hour_15',
       'hour_16', 'hour_17', 'hour_18', 'hour_19', 'hour_20', 'hour_21',
       'hour_22', 'hour_23'],
      dtype='object')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train[features_to_scale] = scalerInputMethod.fit_transform(X_train[features_to_scale].values) if not is_scaler_fitted else  scalerInputMethod.transform(X_train[features_to_scale].values)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_val[features_to_scale] = scalerInputMethod.transform(X_val[features_to_scale])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [14]:
X_train.to(device), y_train.to(device), X_val.to(device), y_val.to(device), X_test.to(device), y_test.to(device)

(tensor([[[-0.5622,  0.0000,  1.0000,  ...,  0.0000,  0.0000,  0.0000],
          [-0.6348,  0.0000,  1.0000,  ...,  0.0000,  0.0000,  0.0000],
          [-0.6348,  0.0000,  1.0000,  ...,  0.0000,  0.0000,  0.0000],
          ...,
          [ 0.3446,  0.0000,  1.0000,  ...,  1.0000,  0.0000,  0.0000],
          [ 0.3204,  0.0000,  1.0000,  ...,  0.0000,  1.0000,  0.0000],
          [ 0.2841,  0.0000,  1.0000,  ...,  0.0000,  0.0000,  1.0000]],
 
         [[-0.6348,  0.0000,  1.0000,  ...,  0.0000,  0.0000,  0.0000],
          [-0.6348,  0.0000,  1.0000,  ...,  0.0000,  0.0000,  0.0000],
          [-0.7073,  0.0000,  1.0000,  ...,  0.0000,  0.0000,  0.0000],
          ...,
          [ 0.3204,  0.0000,  1.0000,  ...,  0.0000,  1.0000,  0.0000],
          [ 0.2841,  0.0000,  1.0000,  ...,  0.0000,  0.0000,  1.0000],
          [ 0.2237,  0.0000,  1.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[-0.6348,  0.0000,  1.0000,  ...,  0.0000,  0.0000,  0.0000],
          [-0.7073,  0.0000,

In [15]:
X_train.shape, y_train.shape

(torch.Size([40861, 48, 30]), torch.Size([40861, 48, 1]))

In [16]:
X_train.shape

torch.Size([40861, 48, 30])

In [17]:
model = Transformer(input_size=X_train.shape[2], output_size=X_train.shape[1])

In [18]:
model.to(device)

Transformer(
  (embedding): Linear(in_features=30, out_features=64, bias=True)
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (linear): Linear(in_features=64, out_features=48, bias=True)
)

In [19]:
lr = 1e-3
num_epochs=10

In [20]:
loss_func = nn.MSELoss()# .to(device)
opt = optim.Adam(model.parameters(), lr=lr)

In [21]:
model, losses, val_loss, opt = fit(
    model, 
    train_set=(X_train, y_train),
    val_set=(X_val, y_val),
    opt=opt,
    loss_func=loss_func,
    num_epochs=num_epochs,
    device=device
)

  0%|          | 0/10 [00:00<?, ?it/s]

val loss


  return F.mse_loss(input, target, reduction=self.reduction)


output
lossep
grad


  return F.mse_loss(input, target, reduction=self.reduction)


In [None]:
plt.plot(losses, label="train loss")
plt.plot(val_loss, label="val loss")
plt.legend()

In [None]:
y_test, y_pred = predict(model, scalerOutputMethod, (X_test, y_test))

In [None]:
plot_error_by_hour_for_test_set(y_test, y_pred)

In [None]:
for k in range(5):
    make_forecast(y_pred=y_pred, df_target=df_target, seq_len=seq_len, time_delta_shifting=time_delta_shifting)