In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import plotly.express as px
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import yfinance as yf
from sklearn import metrics

In [2]:
# Get data from yahoo finance
df = yf.download('GLD', start='2023-01-01')
df.tail()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-08-17,176.529999,176.559998,174.929993,175.300003,175.300003,7937700
2023-08-18,175.830002,176.059998,175.100006,175.330002,175.330002,6251000
2023-08-21,175.899994,175.940002,174.960007,175.809998,175.809998,4206400
2023-08-22,175.830002,176.25,175.309998,176.100006,176.100006,5892200
2023-08-23,177.320007,178.070007,177.074997,177.835007,177.835007,2262936


In [3]:
# Info
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 161 entries, 2023-01-03 to 2023-08-23
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       161 non-null    float64
 1   High       161 non-null    float64
 2   Low        161 non-null    float64
 3   Close      161 non-null    float64
 4   Adj Close  161 non-null    float64
 5   Volume     161 non-null    int64  
dtypes: float64(5), int64(1)
memory usage: 8.8 KB


In [4]:
# plot
px.line(df, x=df.index, y='Close')

In [5]:
# Log Transform
df['Log-Close'] = np.log(df['Close'])

In [6]:
# plot
px.line(df, x=df.index, y='Log-Close')

In [7]:
# Split data into train and test
n_test = 20
train = df.iloc[: -n_test]
test = df.iloc[-n_test:]

In [8]:
# Set frequency to 'Business day' not working
# df.index.freq = 'B'

In [9]:
# Instantiation
model = ExponentialSmoothing(train['Log-Close'], trend='add', seasonal=None)


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



In [10]:
# Fit the model
res = model.fit()

In [11]:
# Set index for 'Holt-Winters' train and test Column
train_idx = df.index <= train.index[-1]
test_idx = df.index > train.index[-1]

In [12]:
# Calculate Train value and Forecast value for Holt-Winters Columns
df.loc[train_idx, 'Holt-Winters-Train'] = res.fittedvalues
df.loc[test_idx, 'Holt-Winters-Test'] = res.forecast(n_test).to_numpy()


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.



In [13]:
# Plot
px.line(df, x=df.index, y=['Log-Close', 'Holt-Winters-Train', 'Holt-Winters-Test'])

In [14]:
y_true_Holt = df.iloc[-n_test:]['Log-Close']
y_pred_Holt = res.forecast(n_test)


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.



## Compare Holt-Winters Model with Naive Model

In [15]:
# close prediction
df['Log_Close_Prediction'] = df['Log-Close'].shift(1)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Log-Close,Holt-Winters-Train,Holt-Winters-Test,Log_Close_Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-03,171.320007,172.089996,170.020004,171.059998,171.059998,7992900,5.142014,5.142296,,
2023-01-04,172.860001,173.089996,171.850006,172.669998,172.669998,7574600,5.151382,5.142514,,5.142014
2023-01-05,170.820007,171.0,169.789993,170.520004,170.520004,4692600,5.138853,5.151575,,5.151382
2023-01-06,171.759995,173.960007,171.229996,173.710007,173.710007,7889000,5.157387,5.13977,,5.138853
2023-01-09,174.509995,175.039993,173.880005,174.100006,174.100006,5134500,5.15963,5.157286,,5.157387


In [16]:
# Plot
px.line(df, x=df.index, y=['Log-Close', 'Log_Close_Prediction'])

In [17]:
# Compare Holt-Winters Model with Naive Model
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=1, start_cell="top-left", subplot_titles=("Holt-Winters", "Naive"))

fig.add_trace(go.Scatter(x=df.index, y=df['Log-Close'], name='Log-Close'), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['Holt-Winters-Train'], name='Holt-Winters Train'), row=1, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['Holt-Winters-Test'], name='Holt-Winters Test'), row=1, col=1)

# fig.update_layout(title='Log-Close, Holt-Winters Train and Holt-Winters Test',
#                   xaxis_title='Date',
#                   yaxis_title='Value')

fig.add_trace(go.Scatter(x=df.index, y=df['Log-Close'], name='Log-Close'), row=2, col=1)
fig.add_trace(go.Scatter(x=df.index, y=df['Log_Close_Prediction'], name='Log_Close_Prediction'), row=2, col=1)

# fig.update_layout(title='Log-Close, Log_Close_Prediction',
#                   xaxis_title='Date',
#                   yaxis_title='Value')

fig.show()

In [18]:
# assign values to y_true and y_pred for Naive Model
# df.iloc[1:] ---> Because first row is NaN
y_true_Naive = df.iloc[1:]['Log-Close']
y_pred_Naive = df.iloc[1:]['Log_Close_Prediction']

In [19]:
# The Mean absolute percentage error(MAPE)
MAPE_Naive = metrics.mean_absolute_percentage_error(y_true_Naive, y_pred_Naive)
MAPE_Holt = metrics.mean_absolute_percentage_error(y_true_Holt, y_pred_Holt)
print('Naive MAPE: ', MAPE_Naive)
print('Holt MAPE: ', MAPE_Holt)

Naive MAPE:  0.0012385281776511952
Holt MAPE:  0.006350752359897781


In [20]:
# The R^2
R2_Naive = metrics.r2_score(y_true_Naive, y_pred_Naive)
R2_Holt = metrics.r2_score(y_true_Holt, y_pred_Holt)
print('Naive R2: ', R2_Naive)
print('Holt R2: ', R2_Holt)

Naive R2:  0.9115816456635506
Holt R2:  -8.179154271014683
