In [55]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [57]:
def split_data(df, choose):
  if choose == 1: # 7:2:1
    train_size = int(len(df) * 0.7)
    test_size = int(len(df) * 0.2)
    val_size = len(df) - train_size - test_size

    train_data = df[:train_size]
    test_data = df[train_size:train_size + test_size]
    val_data = df[train_size + test_size:]
  elif choose == 2: # 6:3:1
    train_size = int(len(df) * 0.6)
    test_size = int(len(df) * 0.3)
    val_size = len(df) - train_size - test_size

    train_data = df[:train_size]
    test_data = df[train_size:train_size + test_size]
    val_data = df[train_size + test_size:]
  elif choose == 3: # 5:3:2
    train_size = int(len(df) * 0.5)
    test_size = int(len(df) * 0.3)
    val_size = len(df) - train_size - test_size

    train_data = df[:train_size]
    test_data = df[train_size:train_size + test_size]
    val_data = df[train_size + test_size:]


  print("Train: ", train_size)
  print("Test: ", test_size)
  print("Val: ", val_size)
 
  return train_data, test_data, val_data
  

In [58]:
df = pd.read_csv('/content/drive/MyDrive/IS403.N22/Project/dataset/TSLA_Stock.csv',parse_dates=['Date'])
df

Unnamed: 0,Date,Price
0,2018-01-02,21.37
1,2018-01-03,21.15
2,2018-01-04,20.97
3,2018-01-05,21.11
4,2018-01-08,22.43
...,...,...
1330,2023-04-17,187.04
1331,2023-04-18,184.31
1332,2023-04-19,180.59
1333,2023-04-20,162.99


In [59]:
df = df.groupby('Date').sum()
df

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2018-01-02,21.37
2018-01-03,21.15
2018-01-04,20.97
2018-01-05,21.11
2018-01-08,22.43
...,...
2023-04-17,187.04
2023-04-18,184.31
2023-04-19,180.59
2023-04-20,162.99


In [None]:
#re-sampling the data to monthly buckets
#daily> monthyly 

df = df.resample(rule='M').sum() 

df[['Price']].plot(title='Price of Close', figsize=(10,6))

<Axes: title={'center': 'Price of Close'}, xlabel='Date'>

In [None]:
train_data, test_data, val_data = split_data(df, 1)

In [None]:
decompose_result = seasonal_decompose(df['Price'],model='additive',period=12)
decompose_result.plot()
plt.show()

In [None]:
model = ExponentialSmoothing(train_data['Price'], trend='add', seasonal='mul', seasonal_periods=12)
model_fit = model.fit()

In [None]:
def rmse_mape(true, predict):
  rmse = np.sqrt(mean_squared_error(true, predict))
  mape = mean_absolute_percentage_error(true, predict)

  return rmse, mape

In [None]:
test_pred = model_fit.predict(start=test_data.index[0], end=test_data.index[-1])
val_pred = model_fit.predict(start=val_data.index[0], end=val_data.index[-1])

In [None]:
rmse_mape(test_data['Price'], test_pred)

In [None]:
rmse_mape(val_data['Price'], val_pred)

In [None]:
train_data['Price'].plot(legend=True, label='Train', figsize=(10,6))

test_data['Price'].plot(legend=True, label='Test')
test_pred.plot(legend=True, label='predict_test')

val_data['Price'].plot(legend=True, label='val')
val_pred.plot(legend=True, label='predict_val')

forecast_start = val_data.index[-1] + pd.DateOffset(months=1)
forecast_end = forecast_start + pd.DateOffset(months=11)
forecast_30months = model_fit.predict(start=forecast_start, end=forecast_end)
forecast_30months.plot(legend=True, label='3 months')