In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import warnings
warnings.filterwarnings("ignore")
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.base import BaseEstimator, TransformerMixin
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
from springstone.data import get_data, create_df_for_prophet, create_train_test
df = get_data('TSLA')
df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-02-21,6.974,6.974,6.762,6.9,5679000
2012-02-22,6.9,6.944,6.5,6.844,8273000
2012-02-23,6.798,6.994,6.712,6.906,4102000
2012-02-24,6.846,6.904,6.654,6.75,4799500
2012-02-27,6.682,6.8,6.6,6.724,3030000


In [3]:
df_prophet = create_df_for_prophet(df)
df_prophet.head()

Unnamed: 0,ds,y
0,2012-02-21,6.9
1,2012-02-22,6.844
2,2012-02-23,6.906
3,2012-02-24,6.75
4,2012-02-27,6.724


In [4]:
data_train, data_test = create_train_test(df_prophet)
print(data_train.head())
data_test.head()

          ds      y
0 2012-02-21  6.900
1 2012-02-22  6.844
2 2012-02-23  6.906
3 2012-02-24  6.750
4 2012-02-27  6.724


Unnamed: 0,ds,y
2014,2020-02-24,166.757996
2015,2020-02-25,159.981995
2016,2020-02-26,155.759995
2017,2020-02-27,135.800003
2018,2020-02-28,133.598007


In [5]:
from springstone.utils import moving_average, bollinger_bands
df_ = moving_average(df, 'Close', 7,new_columns_only=True)
df_.tail()

Unnamed: 0_level_0,Close_ma7
Date,Unnamed: 1_level_1
2022-02-14,903.567147
2022-02-15,903.440002
2022-02-16,905.732858
2022-02-17,899.211426
2022-02-18,888.49428


In [6]:
class MovingAverageTransformer(BaseEstimator, TransformerMixin):
    """
        Computes the Average price over a specified period or a given column
        Returns a copy of the DataFrame X with only one column: {column}_ma.
    """
    def __init__(self, column='Close',period=7):
        self.column = column
        self.period = period

    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        assert isinstance(X, pd.DataFrame)
        X_ = X.copy()
        column_name = f'{self.column}_ma_{str(self.period)}'
        X_[column_name] = moving_average(
            X,
            self.column,
            self.period,
            new_columns_only=True
        )
        return X_[[column_name]]
        

In [7]:
#X_train = df.drop(columns=['Close'])
moving_average_trans = MovingAverageTransformer('Close',20)
close_ma = moving_average_trans.fit_transform(df,None)
close_ma.tail()

Unnamed: 0_level_0,Close_ma_20
Date,Unnamed: 1_level_1
2022-02-14,920.866501
2022-02-15,915.4625
2022-02-16,911.8495
2022-02-17,905.853497
2022-02-18,901.507495


In [8]:
class BollingerBandsTransformer(BaseEstimator, TransformerMixin):
    """
        Computes the Bollinger band over a specified period
        Returns a copy of the DataFrame X with only one column: {column}_bb_{period}_{standard_deviation}.
    """
    def __init__(self, 
                 column='Close',
                 period=7,
                 standard_deviation=2
                ):
        self.column = column
        self.period = period
        self.standard_deviation = standard_deviation

    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        assert isinstance(X, pd.DataFrame)
        X_ = X.copy()
        column_name = f'{self.column}_bb_{str(self.period)}_{str(self.standard_deviation)}'
        X_[column_name] = bollinger_bands(
            X,
            self.column,
            self.period,
            self.standard_deviation,
            new_columns_only=True
        )
        return X_[[column_name]]

In [9]:
bb_trans = BollingerBandsTransformer('Close',20,3)
close_bb = bb_trans.fit_transform(df,None)
close_bb.tail()

Unnamed: 0_level_0,Close_bb_20_3
Date,Unnamed: 1_level_1
2022-02-14,1068.417036
2022-02-15,1041.165194
2022-02-16,1024.372656
2022-02-17,1003.536366
2022-02-18,1000.546792


In [10]:
class TimeFeaturesEncoder(BaseEstimator, TransformerMixin):
    """
        Extracts the day of week (dow), the month and the year from a time column.
        Returns a copy of the DataFrame X with only four columns: 'dow', 'month', 'year'.
    """

    def __init__(self):
        !PASS
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        assert isinstance(X, pd.DataFrame)
        X_ = X.copy()
        X_["dow"] = X_.index.weekday+1
        X_["month"] = X_.index.month
        X_["year"] = X_.index.year
        return X_[['dow', 'month', 'year']]

In [11]:
time_trans = TimeFeaturesEncoder()
time_features = time_trans.fit_transform(df,None)
time_features.tail()

zsh:1: command not found: PASS


Unnamed: 0_level_0,dow,month,year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-14,1,2,2022
2022-02-15,2,2,2022
2022-02-16,3,2,2022
2022-02-17,4,2,2022
2022-02-18,5,2,2022


In [12]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-02-21,6.974000,6.974000,6.762000,6.900000,5679000
2012-02-22,6.900000,6.944000,6.500000,6.844000,8273000
2012-02-23,6.798000,6.994000,6.712000,6.906000,4102000
2012-02-24,6.846000,6.904000,6.654000,6.750000,4799500
2012-02-27,6.682000,6.800000,6.600000,6.724000,3030000
...,...,...,...,...,...
2022-02-14,861.570007,898.880005,853.150024,875.760010,22585500
2022-02-15,900.000000,923.000000,893.380005,922.429993,19216500
2022-02-16,914.049988,926.429993,901.210022,923.390015,17098100
2022-02-17,913.260010,918.500000,874.099976,876.349976,18392800


In [13]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn import set_config; set_config(display='diagram')

column='Close'
period=7

ma_pipe = Pipeline([
    ('ma_trans', FunctionTransformer(lambda df: moving_average(df,column,period,new_columns_only=True)))])
ma_pipe
# bb_pipe = Pipeline([
#     ('ma_trans', FunctionTransformer(lambda df: bollinger_bands(df,column,period,new_columns_only=True))),
#     ('ma_stdscaler', StandardScaler())])
time_pipe = Pipeline([
    ('time_enc', TimeFeaturesEncoder()),
    ('ohe', OneHotEncoder(handle_unknown='ignore', sparse=False))
    ])
# time_pipe

preproc_pipe = ColumnTransformer([
    ('ma', ma_pipe, df.columns),
    ('time', time_pipe, df.columns)
], remainder="drop")

# # display preprocessing pipeline
# preproc_pipe

# pipe = Pipeline([
#     ('preproc', preproc_pipe),
#     ('linear_model', LinearRegression())
# ])

# # display the pipeline with model
# pipe

zsh:1: command not found: PASS


In [14]:
ma_transformed = preproc_pipe.fit_transform(df)
pd.DataFrame(ma_transformed)
#TimeFeaturesEncoder.get_feature_names_out = (lambda self, names=None: self.feature_names_out)
#preproc_pipe.get_feature_names_out()

zsh:1: command not found: PASS


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,25,26,27,28
0,,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2513,903.567147,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2514,903.440002,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2515,905.732858,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2516,899.211426,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [15]:
from prophet import Prophet

In [16]:
m = Prophet()
m.fit(data_train)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Initial log joint probability = -109.46


<prophet.forecaster.Prophet at 0x7f40de6fd4f0>

    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       4540.77      0.265068       1006.65           1           1      117   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       4561.79     0.0177532       251.929       7.159      0.7159      246   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       4567.25    0.00139966       126.275      0.2801      0.2801      370   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     340       4568.45   0.000831228       231.496   1.047e-05       0.001      511  LS failed, Hessian reset 
     399       4569.37    0.00184802       75.1581      0.3747           1      588   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     483       4569.74    0.00011813       107.135   1.713e-06       0.001      7

In [17]:
future = m.make_future_dataframe(periods=10)
future.tail()

Unnamed: 0,ds
2019,2020-02-27
2020,2020-02-28
2021,2020-02-29
2022,2020-03-01
2023,2020-03-02


In [18]:
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
2019,2020-02-27,75.339183,62.066364,89.455582
2020,2020-02-28,74.600358,60.914687,88.238026
2021,2020-02-29,75.546408,63.110453,90.866001
2022,2020-03-01,75.018603,60.849118,89.931806
2023,2020-03-02,72.925707,58.845944,86.502868


In [19]:
data_train

Unnamed: 0,ds,y
0,2012-02-21,6.900000
1,2012-02-22,6.844000
2,2012-02-23,6.906000
3,2012-02-24,6.750000
4,2012-02-27,6.724000
...,...,...
2009,2020-02-14,160.005997
2010,2020-02-18,171.679993
2011,2020-02-19,183.483994
2012,2020-02-20,179.882004


In [20]:
data_train, data_test = create_train_test(df)

In [45]:
class ProphetWrapper(BaseEstimator):
    def __init__(self, non_business_days):
        super().__init__()
        self.non_business_days = non_business_days
        self.prophet = Prophet(holidays=self.non_business_days)

    def fit(self, X, y=0):
        self.prophet.fit(X)
        return self

In [51]:
from springstone.utils import prophet_preprocessing, prophet_non_business_days
df_non_bd = prophet_non_business_days(data_train)
#df_non_bd
df_ph = prophet_preprocessing(data_train,'Close')
#print(df_ph)
#prophet = Prophet(holidays=df_non_bd)
#prophet.fit(df_ph)
pipe_ph = Pipeline([
            ('prophet_preproc', FunctionTransformer(lambda df: prophet_preprocessing(df,'Close'))),
            ('prophet_model', ProphetWrapper(df_non_bd))
        ])

pipe_ph

In [41]:
#prophet_preprocessing(data_train,'Close')
#test = pipe_ph.fit(data_train)
#test

In [48]:
from sklearn.metrics import mean_absolute_error
#pipe_ph['prophet_model']

In [52]:
pipe_ph.fit(data_train)

n_days_prediction=90
future = pipe_ph['prophet_model'].prophet.make_future_dataframe(periods=n_days_prediction)
future
forecast = pipe_ph['prophet_model'].prophet.predict(future)
forecast = forecast[- n_days_prediction:][['ds', 'yhat', 'yhat_lower', 'yhat_upper', 'non business day']]
mae_prophet = mean_absolute_error(data_test[:n_days_prediction]['Close'], forecast['yhat'])
mae_prophet

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Initial log joint probability = -158.309
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       6691.14     0.0197837       254.371           1           1      123   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       6735.59    0.00532877       276.371      0.4334      0.9463      248   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       6755.73    0.00411112       334.618      0.1918           1      367   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     315       6756.89    7.5583e-05       92.5677   4.069e-07       0.001      424  LS failed, Hessian reset 
     335       6757.58   0.000184734       152.414   2.067e-06       0.001      482  LS failed, Hessian reset 
     392       6758.57   0.000247642       269.265   1.101e-06       0.001      584  LS failed, Hessian rese

76.23555686704827