<a href="https://colab.research.google.com/github/bankehsaz/Holt-Winters-vs-Naive/blob/main/10_Holt_Winters_Model_for_Trading_Gold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import plotly.express as px
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import yfinance as yf
from sklearn import metrics

In [2]:
# Get data from yahoo finance
df = yf.download('GLD', start='2022-01-01')
df.tail()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-06-26,179.050003,179.080002,178.419998,178.509995,178.509995,3222100
2023-06-27,179.070007,179.289993,177.429993,177.690002,177.690002,5036900
2023-06-28,177.229996,177.690002,176.699997,177.279999,177.279999,5866700
2023-06-29,175.830002,177.630005,175.789993,177.089996,177.089996,6862500
2023-06-30,177.690002,178.529999,177.320007,178.270004,178.270004,6420600


In [3]:
# Info
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 375 entries, 2022-01-03 to 2023-06-30
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       375 non-null    float64
 1   High       375 non-null    float64
 2   Low        375 non-null    float64
 3   Close      375 non-null    float64
 4   Adj Close  375 non-null    float64
 5   Volume     375 non-null    int64  
dtypes: float64(5), int64(1)
memory usage: 20.5 KB


In [4]:
# plot
px.line(df, x=df.index, y='Close')

In [5]:
# Log Transform
df['Log-Close'] = np.log(df['Close'])

In [6]:
# plot
px.line(df, x=df.index, y='Log-Close')

In [7]:
# Split data into train and test
n_test = 20
train = df.iloc[: -n_test]
test = df.iloc[-n_test:]

In [8]:
# Set frequency to 'Business day' not working
# df.index.freq = 'B'

In [9]:
# Instantiation
model = ExponentialSmoothing(train['Log-Close'], trend='add', seasonal=None)


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



In [10]:
# Fit the model
res = model.fit()

In [11]:
# Set index for 'Holt-Winters' train and test Column
train_idx = df.index <= train.index[-1]
test_idx = df.index > train.index[-1]

In [12]:
# Calculate Train value and Forecast value for Holt-Winters Columns
df.loc[train_idx, 'Holt-Winters-Train'] = res.fittedvalues
df.loc[test_idx, 'Holt-Winters-Test'] = res.forecast(n_test).to_numpy()


No supported index is available. Prediction results will be given with an integer index beginning at `start`.



In [13]:
# Plot
px.line(df, x=df.index, y=['Log-Close', 'Holt-Winters-Train', 'Holt-Winters-Test'])

In [14]:
y_true_Holt = df.iloc[-n_test:]['Log-Close']
y_pred_Holt = res.forecast(n_test)


No supported index is available. Prediction results will be given with an integer index beginning at `start`.



## Compare Holt-Winters Model with Naive Model

In [15]:
# close prediction
df['Log_Close_Prediction'] = df['Log-Close'].shift(1)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Log-Close,Holt-Winters-Train,Holt-Winters-Test,Log_Close_Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-01-03,168.860001,169.009995,168.0,168.330002,168.330002,9014400,5.125926,5.125901,,
2022-01-04,168.899994,169.720001,168.729996,169.570007,169.570007,6965600,5.133266,5.12617,,5.125926
2022-01-05,170.619995,170.929993,168.899994,169.059998,169.059998,8715600,5.130254,5.13347,,5.133266
2022-01-06,167.160004,167.75,166.860001,166.990005,166.990005,10902700,5.117934,5.130516,,5.130254
2022-01-07,167.369995,168.009995,166.860001,167.75,167.75,8191900,5.122475,5.118248,,5.117934


In [16]:
# Plot
px.line(df, x=df.index, y=['Log-Close', 'Log_Close_Prediction'])

In [17]:
# Compare Holt-Winters Model with Naive Model
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=1, start_cell="top-left", subplot_titles=("Holt-Winters", "Naive"))

fig.add_trace(go.Scatter(x=df.index, y=df['Log-Close'], name='Log-Close'), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['Holt-Winters-Train'], name='Holt-Winters Train'), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['Holt-Winters-Test'], name='Holt-Winters Test'), row=1, col=1)

# fig.update_layout(title='Log-Close, Holt-Winters Train and Holt-Winters Test',
#                   xaxis_title='Date',
#                   yaxis_title='Value')

fig.add_trace(go.Scatter(x=df.index, y=df['Log-Close'], name='Log-Close'), row=2, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['Log_Close_Prediction'], name='Log_Close_Prediction'), row=2, col=1)

# fig.update_layout(title='Log-Close, Log_Close_Prediction',
#                   xaxis_title='Date',
#                   yaxis_title='Value')

fig.show()

In [18]:
# assign values to y_true and y_pred for Naive Model
# df.iloc[1:] ---> Because first row is NaN
y_true_Naive = df.iloc[1:]['Log-Close']
y_pred_Naive = df.iloc[1:]['Log_Close_Prediction']

In [19]:
# The Mean absolute percentage error(MAPE)
MAPE_Naive = metrics.mean_absolute_percentage_error(y_true_Naive, y_pred_Naive)
MAPE_Holt = metrics.mean_absolute_percentage_error(y_true_Holt, y_pred_Holt)
print('Naive MAPE: ', MAPE_Naive)
print('Holt MAPE: ', MAPE_Holt)

Naive MAPE:  0.0014188653227571403
Holt MAPE:  0.00443518160560495


In [20]:
# The R^2
R2_Naive = metrics.r2_score(y_true_Naive, y_pred_Naive)
R2_Holt = metrics.r2_score(y_true_Holt, y_pred_Holt)
print('Naive R2: ', R2_Naive)
print('Holt R2: ', R2_Holt)

Naive R2:  0.9724038907836284
Holt R2:  -5.33102704330965
