# 1. Import libraries

In [None]:
# data manipulation
import pandas as pd

# pipeline
from sklearn.pipeline import Pipeline

# transformers
from sklearn.impute import SimpleImputer

# model
from sklearn.tree import DecisionTreeRegressor

# save model
from joblib import dump

import warnings
warnings.filterwarnings('ignore')

# 2. Load data

In [None]:
dataset = pd.read_csv('../data/prepared_data.csv')
dataset['Date'] = pd.to_datetime(dataset['Date'], format='%Y-%m-%d')

# Rolling mean
dataset['Volume_ma_7'] = dataset['Volume'].shift(1).rolling(7).mean()
dataset.dropna(inplace=True)
dataset.reset_index(drop=True, inplace=True)

dataset

In [None]:
dataset.info()

# 3. Train Test Split

In [None]:
first_test_date = pd.datetime.strptime('2020-01-01', '%Y-%m-%d')

train_dataset = dataset.loc[dataset['Date']<first_test_date, :].copy()
X_train, y_train = train_dataset[['Volume_ma_7']].copy(), train_dataset['Volume'].copy()

test_dataset = dataset.loc[dataset['Date']>=first_test_date, :].copy()
X_test, y_test = test_dataset[['Volume_ma_7']].copy(), test_dataset['Volume'].copy()

# 4. Feature Engineering

In [None]:
feature_engineering = Pipeline([
    ('missing_imputer', SimpleImputer()),
])

# 5. Modelling

In [None]:
model_pipeline = Pipeline([
    ('feature_engineering', feature_engineering),
    ('model', DecisionTreeRegressor(max_depth=7, min_samples_leaf=20))
])

In [None]:
model_pipeline.fit(X_train, y_train)

In [None]:
# forecast
model_pipeline.predict(X_test)

# 6. Save Output

In [None]:
# save datasets
X_test.to_csv("../Dash_app/assets/X_test.csv", index=False)
y_test.to_csv("../Dash_app/assets/y_test.csv", index=False)
dump(model_pipeline, filename='../Dash_app/assets/fitted_model.pkl')