In [91]:
import pandas as pd
import numpy as np
from sklearn.metrics import(
    mean_squared_error,
    mean_absolute_error,
    r2_score
)
from timeit import default_timer as timer
from sklearn.model_selection import ParameterGrid
from sklearn.base import BaseEstimator, RegressorMixin
from HoltRegressor import HoltWintersTripleExponentialSmoothing
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.model_selection import GridSearchCV
from sklearn.utils.validation import check_array, check_is_fitted

#For Model Export
import joblib

In [76]:
df = pd.read_csv("FB.csv")
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-12-08,76.180000,77.250000,75.400002,76.519997,76.519997,25733900
1,2014-12-09,75.199997,76.930000,74.779999,76.839996,76.839996,25358600
2,2014-12-10,76.650002,77.550003,76.070000,76.180000,76.180000,32210500
3,2014-12-11,76.519997,78.519997,76.480003,77.730003,77.730003,33462100
4,2014-12-12,77.160004,78.879997,77.019997,77.830002,77.830002,28091600
...,...,...,...,...,...,...,...
1254,2019-12-02,202.130005,202.179993,198.050003,199.699997,199.699997,11503400
1255,2019-12-03,197.600006,198.929993,195.080002,198.820007,198.820007,11595300
1256,2019-12-04,200.000000,200.029999,198.050003,198.710007,198.710007,8456300
1257,2019-12-05,199.860001,201.289993,198.210007,199.360001,199.360001,9740400


## Feature Engineering

In [77]:
df["Date"] = pd.to_datetime(df["Date"])
df["MA_5"] = df["Close"].rolling(5).mean()
df["MA_50"] = df["Close"].rolling(50).mean()
df["MA_Spread"] = df["MA_5"] - df["MA_50"]
df["MA_Spread_Z"] = (df['MA_Spread'] - df['MA_Spread'].rolling(20).mean()) / df['MA_Spread'].rolling(20).std()

df['MA_20'] = df['Close'].rolling(20).mean()
df['MA_Diff'] = df['Close'] - df['MA_20']
df['MA_Diff_Pct'] = df['MA_Diff'] / df['MA_20']
df['Close_detrended'] = df['Close'] - df['Close'].rolling(20).mean()
df['Norm_Close'] = df['Close_detrended'] / df['Close'].rolling(20).std()


df["Return"] = df["Close"].pct_change()
df["Z_score"] = (df["Close"] - df["Close"].rolling(20).mean()) / df["Close"].rolling(20).std()
df["mean_reversion_signal"] = -df["Z_score"]  # negative zscore = buy signal
df["rolling_mean"] = df["Close"].rolling(20).mean()
df["rolling_std"] = df["Close"].rolling(20).std()

## Model Metrics

In [16]:
def modelEvaluation(y_true, y_pred):
    def mean_absolute_percentage_error(y_true, y_pred):
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    print(f"Model Evaluation Results: \nMean Squared Error: {round(mean_squared_error(y_true, y_pred), 2)}\nMean Absolute Error: {round(mean_absolute_error(y_true, y_pred), 2)}\nR2 Score: {round(r2_score(y_true, y_pred), 2)}")

## Parameters To be used for Grid Search

In [85]:
param_grid = {
    "trend": ["add", "mul", None],                 # None for no trend (good baseline)
    "seasonal": ["add", "mul", None],              # None for deseasonalized data
    "seasonal_periods": [3, 5, 6, 7, 10, 12],      # Covers weekly & monthly cycles
    "damped": [True, False],                       # Dampening stabilizes trends
}

In [86]:
param = list(ParameterGrid(param_grid))

## Training And Testing

In [81]:
# Assume df has 'Date' and 'Close'
df = df.sort_values('Date')
df = df.set_index('Date')
y = df['Close']

# Split (e.g. 80% train, 20% test)
split = int(len(df) * 0.8)
train, test = df.iloc[:split], df.iloc[split:]

y_train, y_test = train["Close"], test["Close"]

KeyError: "None of ['Date'] are in the columns"

## Model Testing

In [87]:
model = HoltWintersTripleExponentialSmoothing()
grid = GridSearchCV(model, param_grid, n_jobs=-1, cv=3)

grid.fit(np.arange(len(train)), y_train)

             nan             nan -6.59393530e+05             nan
             nan -6.58780826e+05             nan             nan
 -6.56529527e+05             nan             nan -6.54736451e+05
             nan             nan             nan             nan
             nan             nan             nan             nan
             nan             nan             nan             nan
             nan             nan             nan             nan
             nan             nan             nan             nan
 -6.65396319e+05             nan             nan -6.65396319e+05
             nan             nan -6.65396319e+05             nan
             nan -6.65396319e+05             nan             nan
 -6.65396319e+05             nan             nan -6.65396319e+05
             nan             nan -8.09452837e+09             nan
 -6.13814544e+05 -1.48908305e+08             nan -6.36095059e+05
 -1.15272579e+06             nan -6.33172335e+05 -6.14133020e+07
             nan -6.35755

GridSearchCV(cv=3, estimator=HoltWintersTripleExponentialSmoothing(), n_jobs=-1,
             param_grid={'damped': [True, False],
                         'seasonal': ['add', 'mul', None],
                         'seasonal_periods': [3, 5, 6, 7, 10, 12],
                         'trend': ['add', 'mul', None]})

In [88]:
#getting the best parameters
print(f"The Best model Parameters: {grid.best_params_}")

The Best model Parameters: {'damped': False, 'seasonal': 'add', 'seasonal_periods': 3, 'trend': None}


## Actual Model Creation

In [89]:
model_1 = HoltWintersTripleExponentialSmoothing(seasonal="add", seasonal_periods=3, trend=None, damped=False)
model_1.fit(y_train)

y_pred = model_1.predict(y_test)

modelEvaluation(y_test, y_pred)

Model Evaluation Results: 
Mean Squared Error: 1780.34
Mean Absolute Error: 38.61
R2 Score: -4.23




In [93]:
#model export
with open("HoltModel.pkl", "wb") as model:
    joblib.dump(model_1, model)