In [2]:
from prophet import Prophet
from prophet.plot import plot_plotly
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import math

import plotly.express as px

In [3]:
data = pd.read_csv('/Users/Selma/dev/STAT3007-timeseries_forecasting/data/dataeqnr_max_daily.csv')
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Scaled_close
0,2001-06-18 00:00:00-04:00,2.507967,2.514673,2.504614,2.504614,7189500,0.0,0.0,-0.973381
1,2001-06-19 00:00:00-04:00,2.531437,2.591789,2.531437,2.554907,1410700,0.0,0.0,-0.970356
2,2001-06-20 00:00:00-04:00,2.575024,2.578377,2.551554,2.561612,550400,0.0,0.0,-0.969953
3,2001-06-21 00:00:00-04:00,2.538142,2.548201,2.531436,2.538142,643600,0.0,0.0,-0.971364
4,2001-06-22 00:00:00-04:00,2.521378,2.531436,2.507966,2.518025,822600,0.0,0.0,-0.972574


In [4]:
# Converting date column from str to timestamp
# Remove a specific substring from the end
def remove_suffix(s, suffix='00:00:00-04:00'):
    if s.endswith(suffix):
        return s[:-len(suffix)]
    return s

data['Date'] = data['Date'].apply(lambda x: remove_suffix(x, '00:00:00-04:00'))
data['Date'] = data['Date'].apply(lambda x: remove_suffix(x, '00:00:00-05:00'))

In [5]:
data['Date'] = pd.to_datetime(data['Date'])
data = data.drop(columns=["Close"])
data.head()

Unnamed: 0,Date,Open,High,Low,Volume,Dividends,Stock Splits,Scaled_close
0,2001-06-18,2.507967,2.514673,2.504614,7189500,0.0,0.0,-0.973381
1,2001-06-19,2.531437,2.591789,2.531437,1410700,0.0,0.0,-0.970356
2,2001-06-20,2.575024,2.578377,2.551554,550400,0.0,0.0,-0.969953
3,2001-06-21,2.538142,2.548201,2.531436,643600,0.0,0.0,-0.971364
4,2001-06-22,2.521378,2.531436,2.507966,822600,0.0,0.0,-0.972574


In [6]:
data=data[['Date','Scaled_close']]
data.head()

data.columns=['ds','y']
data.head()

Unnamed: 0,ds,y
0,2001-06-18,-0.973381
1,2001-06-19,-0.970356
2,2001-06-20,-0.969953
3,2001-06-21,-0.971364
4,2001-06-22,-0.972574


In [7]:
fig = px.line(data, x=data['ds'], y=data['y'],labels={'ds':'Date','y':'Scaled Close Stock'})
fig.update_traces(marker_line_width=2, opacity=0.6)
fig.update_layout(title_text='Stock close price chart', plot_bgcolor='white', font_size=15, font_color='black')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

In [8]:
# Split data into training and test sets
train_size = int(0.8 * len(data))
train_df = data[:train_size]
test_df = data[train_size:]

In [9]:
prophet= Prophet(daily_seasonality=True)
prophet.fit(train_df)

12:04:00 - cmdstanpy - INFO - Chain [1] start processing
12:04:02 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x12c453200>

In [10]:
future_dates=prophet.make_future_dataframe(periods=len(test_df))
predictions=prophet.predict(future_dates)
plot_plotly(prophet, predictions)

In [11]:
metric_df = predictions.set_index('ds')[['yhat']].join(data.set_index('ds').y).reset_index()

In [12]:
metric_df.tail()

Unnamed: 0,ds,yhat,y
5741,2022-11-13,0.363356,
5742,2022-11-14,0.35543,0.667566
5743,2022-11-15,0.355286,0.740945
5744,2022-11-16,0.354742,0.684619
5745,2022-11-17,0.355336,0.679452


In [13]:
metric_df.dropna(inplace=True)

In [14]:
r2 = r2_score(metric_df.y, metric_df.yhat)


In [15]:
mse = mean_squared_error(metric_df.y, metric_df.yhat)

In [16]:
mae = mean_absolute_error(metric_df.y, metric_df.yhat)

In [17]:
print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'RMSE: {math.sqrt(mse)}')
print(f'R2 Score: {r2}')

MAE: 0.0997995772214288
MSE: 0.025392619512740522
RMSE: 0.1593506181749557
R2 Score: 0.7804334792064539
