In [None]:
!pip install pycaret

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

In [None]:
# Load the data
data = pd.read_csv('/content/train.csv')
test_data = pd.read_csv('/content/test.csv')

In [None]:


# Define the start date
start_date = pd.to_datetime('2010-01-01')

# Convert 'id' to datetime in the train set
data['date'] = pd.to_datetime(data['id'], origin=start_date, unit='D')

# Convert 'id' to datetime in the test set
test_data['date'] = pd.to_datetime(test_data['id'], origin=start_date, unit='D')


In [None]:
data.drop(columns=['id'], inplace =True)
data

Unnamed: 0,price,date
0,10.383,2010-01-02
1,6.163,2010-01-03
2,8.812,2010-01-04
3,7.994,2010-01-05
4,6.910,2010-01-06
...,...,...
4995,0.683,2023-09-06
4996,0.679,2023-09-07
4997,0.697,2023-09-08
4998,0.687,2023-09-09


In [None]:
# create 12 month moving average
data['MA12'] = data['price'].rolling(12).mean()

# plot the data and MA
import plotly.express as px
def plot_line_graph(data, x_col, y_cols, template='plotly_dark'):
    fig = px.line(data, x=x_col, y=y_cols, template=template)
    fig.show()

In [None]:
plot_line_graph(data, "date", ["price","MA12"])

In [None]:
# extract month and year from dates**
data['day'] = [i.day for i in data['date']]
data['Month'] = [i.month for i in data['date']]
data['Year'] = [i.year for i in data['date']]

# create a sequence of numbers
data['Series'] = np.arange(1,len(data)+1)

In [None]:
##Feature Engineering

# 7 day moving average
data['7_day_MA'] = data['price'].rolling(window=7).mean()

# 30 day moving average
data['30_day_MA'] = data['price'].rolling(window=30).mean()

# 7 day standard deviation
data['7_day_std'] = data['price'].rolling(window=7).std()

# 30 day standard deviation
data['30_day_std'] = data['price'].rolling(window=30).std()

delta = data['price'].diff()
up = delta.clip(lower=0)
down = -1*delta.clip(upper=0)
ema_up = up.ewm(com=13, adjust=False).mean()
ema_down = down.ewm(com=13, adjust=False).mean()
rs = ema_up/ema_down

data['RSI'] = 100 - (100/(1 + rs))

data['price_change'] = data['price'].diff()
data['price_pct_change'] = data['price'].pct_change()

In [None]:
data.dropna(inplace = True)
data

Unnamed: 0,price,date,Month,Series,7_day_MA,30_day_MA,7_day_std,30_day_std,RSI,price_change,price_pct_change,day,Year
58,2.884,2010-03-01,3,30,2.854286,3.255133,0.104589,0.524235,49.362757,-0.031,-0.010635,1,2010
59,2.984,2010-03-02,3,31,2.897000,3.213967,0.082694,0.493526,52.550524,0.100,0.034674,2,2010
60,3.090,2010-03-03,3,32,2.944857,3.170900,0.083748,0.441740,55.731784,0.106,0.035523,3,2010
61,3.052,2010-03-04,3,33,2.953000,3.134833,0.091819,0.402854,54.325623,-0.038,-0.012298,4,2010
62,2.966,2010-03-05,3,34,2.970857,3.097067,0.077435,0.360199,51.178464,-0.086,-0.028178,5,2010
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,0.683,2023-09-06,9,4967,0.682000,0.607467,0.006481,0.084962,60.105973,0.008,0.011852,6,2023
4996,0.679,2023-09-07,9,4968,0.681286,0.613067,0.006499,0.083914,59.043226,-0.004,-0.005857,7,2023
4997,0.697,2023-09-08,9,4969,0.682571,0.619467,0.008600,0.082701,62.275670,0.018,0.026510,8,2023
4998,0.687,2023-09-09,9,4970,0.684857,0.625567,0.006986,0.080613,59.467662,-0.010,-0.014347,9,2023


In [None]:
data.drop(columns=['MA12','Year'],inplace = True)

KeyError: ignored

In [None]:
data

Unnamed: 0,price,date,Month,Series,7_day_MA,30_day_MA,7_day_std,30_day_std,RSI,price_change,price_pct_change,day,Year
58,2.884,2010-03-01,3,30,2.854286,3.255133,0.104589,0.524235,49.362757,-0.031,-0.010635,1,2010
59,2.984,2010-03-02,3,31,2.897000,3.213967,0.082694,0.493526,52.550524,0.100,0.034674,2,2010
60,3.090,2010-03-03,3,32,2.944857,3.170900,0.083748,0.441740,55.731784,0.106,0.035523,3,2010
61,3.052,2010-03-04,3,33,2.953000,3.134833,0.091819,0.402854,54.325623,-0.038,-0.012298,4,2010
62,2.966,2010-03-05,3,34,2.970857,3.097067,0.077435,0.360199,51.178464,-0.086,-0.028178,5,2010
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,0.683,2023-09-06,9,4967,0.682000,0.607467,0.006481,0.084962,60.105973,0.008,0.011852,6,2023
4996,0.679,2023-09-07,9,4968,0.681286,0.613067,0.006499,0.083914,59.043226,-0.004,-0.005857,7,2023
4997,0.697,2023-09-08,9,4969,0.682571,0.619467,0.008600,0.082701,62.275670,0.018,0.026510,8,2023
4998,0.687,2023-09-09,9,4970,0.684857,0.625567,0.006986,0.080613,59.467662,-0.010,-0.014347,9,2023


In [None]:
# import pycaret time series and init setup
from pycaret.time_series import *
s = setup(data,target='price', fh = 365, session_id = 512)

Unnamed: 0,Description,Value
0,session_id,512
1,Target,price
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(4942, 13)"
5,Transformed data shape,"(4942, 13)"
6,Transformed train set shape,"(4577, 13)"
7,Transformed test set shape,"(365, 13)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [None]:
# check statistical tests on original data
check_stats()

Unnamed: 0,Test,Test Name,Data,Property,Setting,Value
0,Summary,Statistics,Transformed,Length,,4942.0
1,Summary,Statistics,Transformed,# Missing Values,,0.0
2,Summary,Statistics,Transformed,Mean,,2.071337
3,Summary,Statistics,Transformed,Median,,1.986
4,Summary,Statistics,Transformed,Standard Deviation,,1.251487
5,Summary,Statistics,Transformed,Variance,,1.566219
6,Summary,Statistics,Transformed,Kurtosis,,0.235222
7,Summary,Statistics,Transformed,Skewness,,0.817985
8,Summary,Statistics,Transformed,# Distinct Values,,2776.0
9,White Noise,Ljung-Box,Transformed,Test Statictic,"{'alpha': 0.05, 'K': 24}",111102.963819


In [None]:
models()

Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
naive,Naive Forecaster,sktime.forecasting.naive.NaiveForecaster,True
grand_means,Grand Means Forecaster,sktime.forecasting.naive.NaiveForecaster,True
snaive,Seasonal Naive Forecaster,sktime.forecasting.naive.NaiveForecaster,True
arima,ARIMA,sktime.forecasting.arima.ARIMA,True
auto_arima,Auto ARIMA,sktime.forecasting.arima.AutoARIMA,True
stlf,STLF,sktime.forecasting.trend.STLForecaster,True
croston,Croston,sktime.forecasting.croston.Croston,True
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,pycaret.containers.models.time_series.BaseCdsD...,True
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,pycaret.containers.models.time_series.BaseCdsD...,True
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,pycaret.containers.models.time_series.BaseCdsD...,True


In [None]:
# compare baseline models
best = compare_models()

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
stlf,STLF,0.7753,0.6672,0.2907,0.3722,0.2498,0.2005,-0.2746,0.2
croston,Croston,0.786,0.6779,0.2942,0.3776,0.2563,0.2052,-0.3596,0.0467
snaive,Seasonal Naive Forecaster,0.843,0.7252,0.3146,0.4027,0.2786,0.2221,-0.7707,0.0667
naive,Naive Forecaster,0.8879,0.7588,0.3324,0.4228,0.2832,0.2214,-0.6462,0.23
grand_means,Grand Means Forecaster,2.6539,1.9028,0.9761,1.0417,0.9351,0.567,-24.9202,0.07


Processing:   0%|          | 0/101 [00:00<?, ?it/s]

In [None]:
# plot forecast for 952 days in future
plot_model(best, plot = 'forecast', data_kwargs = {'fh' : 1317})

In [None]:
# predict on test set
holdout_pred = predict_model(best)

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,STLF,0.3378,0.296,0.1627,0.202,0.1573,0.1654,0.2977


In [None]:
# plot forecast for 952 days in future
plot_model(best, plot = 'forecast', data_kwargs = {'fh' : 1317})

In [None]:
# generate forecast period in future
result = predict_model(tuned, fh = 1317)

ValueError: ignored

In [None]:
# save pipeline
save_model(tuned, 'my_tuned_pipeline')

Transformation Pipeline and Model Successfully Saved


(ForecastingPipeline(steps=[('forecaster',
                             TransformedTargetForecaster(steps=[('model',
                                                                 NaiveForecaster(sp=60))]))]),
 'my_tuned_pipeline.pkl')

In [None]:
# load pipeline
loaded_best_pipeline = load_model('my_tuned_pipeline')
loaded_best_pipeline

Transformation Pipeline and Model Successfully Loaded


In [None]:
result

Unnamed: 0,y_pred
4635,1.045
4636,1.059
4637,1.058
4638,1.069
4639,1.046
...,...
5947,0.930
5948,0.916
5949,0.901
5950,0.888


In [None]:
result.to_csv('result_snaive_no-tuned.csv',index = False)