<a href="https://colab.research.google.com/github/gkuch22/ml-final/blob/main/final_model_experiment_tft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import torch # Main PyTorch Library
import matplotlib.pyplot as plt # Used for visualizing the images and plotting the training progress
import pandas as pd # Used to read/create dataframes (csv) and process tabular data
import numpy as np # preprocessing and numerical/mathematical operations
import seaborn as sns

device = "cuda" if torch.cuda.is_available() else "cpu" # detect the GPU if any, if not use CPU, change cuda to mps if you have a mac
print("Device available: ", device)


Device available:  cuda


In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/cs231n/assignments/final/datasets/train.csv')
stores_df = pd.read_csv('/content/drive/MyDrive/cs231n/assignments/final/datasets/stores.csv')
features_df = pd.read_csv('/content/drive/MyDrive/cs231n/assignments/final/datasets/features.csv')
final_df = pd.read_csv('/content/drive/MyDrive/cs231n/assignments/final/datasets/test.csv')

In [None]:
train_df

Unnamed: 0,Store,Dept,Date,Weekly_Sales,IsHoliday
0,1,1,2010-02-05,24924.50,False
1,1,1,2010-02-12,46039.49,True
2,1,1,2010-02-19,41595.55,False
3,1,1,2010-02-26,19403.54,False
4,1,1,2010-03-05,21827.90,False
...,...,...,...,...,...
421565,45,98,2012-09-28,508.37,False
421566,45,98,2012-10-05,628.10,False
421567,45,98,2012-10-12,1061.02,False
421568,45,98,2012-10-19,760.01,False


In [None]:
def get_wmae(y_true, y_pred, weights):
  return np.sum(weights * np.abs(y_true - y_pred)) / np.sum(weights)

In [None]:
# !pip install neuralforecast -q

# MODEL PIPELINE

In [None]:
features_df = features_df.drop(columns=["IsHoliday"])

train_df['Date'] = pd.to_datetime(train_df['Date'])
features_df['Date'] = pd.to_datetime(features_df['Date'])
final_df['Date'] = pd.to_datetime(final_df['Date'])

merged_df = pd.merge(train_df, features_df, on=['Store', 'Date'], how='left')
merged_df = pd.merge(merged_df, stores_df, on='Store', how='left')

final_df = pd.merge(final_df, features_df, on=['Store', 'Date'], how='left')
final_df = pd.merge(final_df, stores_df, on='Store', how='left')

merged_df = merged_df.sort_values(by=['Store', 'Dept', 'Date']).reset_index(drop=True)
final_df = final_df.sort_values(by=['Store', 'Dept', 'Date']).reset_index(drop=True)

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin

class CustomPreprocessorClass(BaseEstimator, TransformerMixin):
  def __init__(self):
    pass

  def fit(self, X, y=None):
    return self

  def transform(self, X):
    df = X
    df = df.fillna(0)
    df['unique_id'] = df['Store'].astype(str) + '_' + df['Dept'].astype(str)
    df = df.sort_values(['unique_id', 'Date'])
    df = df.rename(columns={'Date': 'ds'})

    if 'Weekly_Sales' in df.columns:
      df = df.rename(columns={'Weekly_Sales': 'y'})
    else:
      df['y'] = np.nan

    df = df.drop(columns=['Type'])

    return df

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
from neuralforecast import NeuralForecast
from neuralforecast.models import TFT


class CustomTFTClass(BaseEstimator, TransformerMixin):
  def __init__(self, H=100, INPUT_SIZE=52, MAX_STEPS=100, LEARNING_RATE=1e-4, BATCH_SIZE=64, DROPOUT=0.3, N_RNN_LAYERS=3):
    self.H=H
    self.INPUT_SIZE=INPUT_SIZE
    self.MAX_STEPS=MAX_STEPS
    self.LEARNING_RATE=LEARNING_RATE
    self.BATCH_SIZE=BATCH_SIZE
    self.DROPOUT=DROPOUT
    self.N_RNN_LAYERS=N_RNN_LAYERS
    self.neural_forecast = None

  def fit(self, X, y=None):
    model = TFT(
        h=self.H,
        input_size=self.INPUT_SIZE,
        max_steps=self.MAX_STEPS,
        learning_rate=self.LEARNING_RATE,
        batch_size=self.BATCH_SIZE,
        dropout=self.DROPOUT,
        n_rnn_layers=self.N_RNN_LAYERS
        )

    neural_forecast = NeuralForecast(models=[model], freq='W-FRI')
    neural_forecast.fit(df=X)

    self.neural_forecast = neural_forecast

    return self


  def transform(self, X):
    future_df_template = self.neural_forecast.make_future_dataframe()
    futr_df = pd.merge(future_df_template, X, on=['unique_id', 'ds'], how='left')
    futr_df = futr_df.fillna(0)
    valid_forecast_df = self.neural_forecast.predict(futr_df=futr_df)
    res_df = pd.merge(X, valid_forecast_df, on=['unique_id', 'ds'], how='left')

    return res_df


In [None]:
class CustomTFTPipelineClass(BaseEstimator, TransformerMixin):
  def __init__(self):
    self.prep = CustomPreprocessorClass()
    self.model = CustomTFTClass()

  def fit(self, train_df):
    train_df = self.prep.transform(train_df)
    self.model.fit(train_df)

  def predict(self, final_df):
    final_df = self.prep.transform(final_df)
    return self.model.transform(final_df)

In [None]:
pipeline = CustomTFTPipelineClass()
pipeline.fit(merged_df)
precitions = pipeline.predict(final_df)

In [None]:
precitions

Unnamed: 0,Store,Dept,ds,IsHoliday,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,Size,unique_id,y,TFT
0,10,1,2012-11-02,False,70.79,4.099,25680.20,6037.06,44.68,17412.04,4223.05,131.236226,6.943,126512,10_1,,33613.625000
1,10,1,2012-11-09,False,70.28,3.780,9208.40,2501.11,364.14,679.96,6831.32,131.279355,6.943,126512,10_1,,33350.562500
2,10,1,2012-11-16,False,58.82,3.703,13459.00,120.76,128.41,1802.25,5503.42,131.325800,6.943,126512,10_1,,33163.273438
3,10,1,2012-11-23,True,63.95,3.759,1789.75,0.10,146394.44,787.24,1005.18,131.376667,6.943,126512,10_1,,33032.355469
4,10,1,2012-11-30,False,64.13,3.719,4304.00,0.00,8935.00,175.68,12676.61,131.427533,6.943,126512,10_1,,32942.593750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115059,9,98,2013-01-04,False,34.65,3.161,894.63,5462.31,11.72,0.00,1750.05,227.882682,5.049,125833,9_98,,17.527309
115060,9,99,2012-12-07,False,61.19,3.198,2521.31,0.00,388.00,171.98,4047.33,227.456446,4.954,125833,9_99,,
115061,9,99,2013-07-05,False,80.11,3.422,5382.87,886.86,484.64,1899.92,1478.67,0.000000,0.000,125833,9_99,,
115062,9,99,2013-07-19,False,75.26,3.556,1040.56,188.09,0.00,178.85,1507.30,0.000000,0.000,125833,9_99,,


In [None]:
precitions = precitions[['unique_id', 'ds', 'TFT']].copy()
precitions['TFT'] = precitions['TFT'].fillna(0)

In [None]:
submission = precitions.copy()
submission['Id'] = submission['unique_id'] + '_' + submission['ds'].astype(str)
submission = submission.rename(columns={'TFT': 'Weekly_Sales'})
submission = submission[['Id', 'Weekly_Sales']].reset_index(drop=True)

In [None]:
submission

Unnamed: 0,Id,Weekly_Sales
0,10_1_2012-11-02,33613.625000
1,10_1_2012-11-09,33350.562500
2,10_1_2012-11-16,33163.273438
3,10_1_2012-11-23,33032.355469
4,10_1_2012-11-30,32942.593750
...,...,...
115059,9_98_2013-01-04,17.527309
115060,9_99_2012-12-07,0.000000
115061,9_99_2013-07-05,0.000000
115062,9_99_2013-07-19,0.000000


In [None]:
submission.to_csv('tft_submission.csv', index=False)

# WITHOUT PIPELINE

In [None]:
features_df = features_df.drop(columns=["IsHoliday"])

train_df['Date'] = pd.to_datetime(train_df['Date'])
features_df['Date'] = pd.to_datetime(features_df['Date'])

merged_df = pd.merge(train_df, features_df, on=['Store', 'Date'], how='left')
merged_df = pd.merge(merged_df, stores_df, on='Store', how='left')

merged_df = merged_df.sort_values(by=['Store', 'Dept', 'Date']).reset_index(drop=True)

In [None]:
merged_df['unique_id'] = merged_df['Store'].astype(str) + '_' + merged_df['Dept'].astype(str)
merged_df = merged_df.sort_values(['unique_id', 'Date'])

merged_df['Year'] = merged_df['Date'].dt.year
merged_df['Month'] = merged_df['Date'].dt.month
merged_df['Week'] = merged_df['Date'].dt.isocalendar().week.astype(int)

merged_df = merged_df.rename(columns={'Date': 'ds', 'Weekly_Sales': 'y'})

In [None]:
merged_df = merged_df.fillna(0)

In [None]:
year_split = 2012
month_split = 6

train_df = merged_df[merged_df['Year'] < year_split]
valid_df = merged_df[(merged_df['Year'] == year_split) & (merged_df['Month'] < month_split)]
test_df = merged_df[(merged_df['Year'] == year_split) & (merged_df['Month'] >= month_split)]

In [None]:
from sklearn.preprocessing import OneHotEncoder

onehot_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
train_type_encoded = onehot_encoder.fit_transform(train_df[['Type']])
encoded_cols = onehot_encoder.get_feature_names_out(['Type'])
train_type_df = pd.DataFrame(train_type_encoded, columns=encoded_cols, index=train_df.index)

valid_type_encoded = onehot_encoder.transform(valid_df[['Type']])
valid_type_df = pd.DataFrame(valid_type_encoded, columns=encoded_cols, index=valid_df.index)

test_type_encoded = onehot_encoder.transform(test_df[['Type']])
test_type_df = pd.DataFrame(test_type_encoded, columns=encoded_cols, index=test_df.index)

train_df = train_df.drop(columns='Type')
train_df = pd.concat([train_df, train_type_df], axis=1)

valid_df = valid_df.drop(columns='Type')
valid_df = pd.concat([valid_df, valid_type_df], axis=1)

test_df = test_df.drop(columns='Type')
test_df = pd.concat([test_df, test_type_df], axis=1)


In [None]:
train_df

Unnamed: 0,Store,Dept,ds,y,IsHoliday,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,...,CPI,Unemployment,Size,unique_id,Year,Month,Week,Type_A,Type_B,Type_C
87524,10,1,2010-02-05,40212.84,False,54.34,2.962,0.00,0.00,0.00,...,126.442065,9.765,126512,10_1,2010,2,5,0.0,1.0,0.0
87525,10,1,2010-02-12,67699.32,True,49.96,2.828,0.00,0.00,0.00,...,126.496258,9.765,126512,10_1,2010,2,6,0.0,1.0,0.0
87526,10,1,2010-02-19,49748.33,False,58.22,2.915,0.00,0.00,0.00,...,126.526286,9.765,126512,10_1,2010,2,7,0.0,1.0,0.0
87527,10,1,2010-02-26,33601.22,False,52.77,2.825,0.00,0.00,0.00,...,126.552286,9.765,126512,10_1,2010,2,8,0.0,1.0,0.0
87528,10,1,2010-03-05,36572.44,False,55.92,2.877,0.00,0.00,0.00,...,126.578286,9.765,126512,10_1,2010,3,9,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87516,9,98,2011-11-25,60.75,True,54.32,3.236,121.57,0.00,44061.13,...,222.138968,6.054,125833,9_98,2011,11,47,0.0,1.0,0.0
87517,9,98,2011-12-02,54.75,False,46.84,3.172,1237.88,0.00,1330.53,...,222.397544,6.054,125833,9_98,2011,12,48,0.0,1.0,0.0
87518,9,98,2011-12-09,75.77,False,37.65,3.158,3075.89,0.00,248.80,...,222.656120,6.054,125833,9_98,2011,12,49,0.0,1.0,0.0
87519,9,98,2011-12-16,66.75,False,47.31,3.159,2755.90,0.00,136.48,...,222.882548,6.054,125833,9_98,2011,12,50,0.0,1.0,0.0


In [None]:
H=60
INPUT_SIZE=52
MAX_STEPS=1000
LEARNING_RATE=1e-4
BATCH_SIZE=64
DROPOUT=0.3
N_RNN_LAYERS=3

In [None]:
from neuralforecast import NeuralForecast
from neuralforecast.models import TFT

model = TFT(
    h=H,
    input_size=INPUT_SIZE,
    max_steps=MAX_STEPS,
    learning_rate=LEARNING_RATE,
    batch_size=BATCH_SIZE,
    dropout=DROPOUT,
    n_rnn_layers=N_RNN_LAYERS
    )

neural_forecast = NeuralForecast(models=[model], freq='W-FRI')
neural_forecast.fit(df=train_df, verbose=False)

# valid_forecast_df = neural_forecast.predict()

In [None]:
future_df_template = neural_forecast.make_future_dataframe()

futr_df = pd.merge(future_df_template, merged_df, on=['unique_id', 'ds'], how='left')
futr_df = futr_df.fillna(0)
valid_forecast_df = neural_forecast.predict(futr_df=futr_df, verbose=False)

In [None]:
valid_forecast_df

Unnamed: 0,unique_id,ds,TFT
0,10_1,2012-01-06,44388.031250
1,10_1,2012-01-13,43274.722656
2,10_1,2012-01-20,41739.539062
3,10_1,2012-01-27,40314.335938
4,10_1,2012-02-03,39252.929688
...,...,...,...
198115,9_98,2013-01-18,45.384136
198116,9_98,2013-01-25,49.578171
198117,9_98,2013-02-01,54.116993
198118,9_98,2013-02-08,58.802185


In [None]:
resvalid_df = pd.merge(valid_forecast_df, valid_df, on=['unique_id', 'ds'], how='inner')
resvalid_df['weight'] = np.where(resvalid_df['IsHoliday'], 5, 1)

In [None]:
valid_wmae = get_wmae(y_true=resvalid_df['y'], y_pred=resvalid_df['TFT'], weights=resvalid_df['weight'])
print(f"valid wmae: {valid_wmae:.2f}")
print()

valid wmae: 2555.75



# MLFLOW TRACKING

In [None]:
# !pip install mlflow==2.2.2 -q

In [None]:
# !pip install dagshub -q

In [None]:
# import dagshub
# import mlflow

# dagshub.init(repo_owner='gkuch22', repo_name='ml-final', mlflow=True)

Output()



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=343dc09a-312a-428b-9f6c-8c75877026d1&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=17a6affe84581ee376ac0ce68e329f0daaf0bd0b5858e0ed8906323f79019e51




In [None]:
# experiment_name = "TFT_train"
# run_name = "hyperparameter_tuning4"

In [None]:
# import mlflow

# mlflow.set_experiment(experiment_name)

# with mlflow.start_run(run_name=run_name):

#     mlflow.log_param("model_type", "TFT")
#     mlflow.log_param("h", H)
#     mlflow.log_param("input_size", INPUT_SIZE)
#     mlflow.log_param("max_steps", MAX_STEPS)
#     mlflow.log_param("learning_rate", LEARNING_RATE)
#     mlflow.log_param("batch_size", BATCH_SIZE)
#     mlflow.log_param("dropout", DROPOUT)

#     # mlflow.log_metric("train_wmae", train_wmae)
#     mlflow.log_metric("valid_wmae", valid_wmae)

#     # mlflow.sklearn.log_model(pipeline, "model")
