In [1]:
import pandas as pd
from fbprophet import Prophet
from etl_resources import sqlite_connection
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
plt.style.use('fivethirtyeight')

In [2]:
def ticker_list():
    
    con = sqlite_connection()
    cur = con.cursor()
    cur.execute(f"select distinct ticker from weekly_prices_clean")
    res = cur.fetchall()
    res = [val[0] for val in res]
    
    return res

In [3]:
def base_df(ticker):
    
    '''
    This function returns the base time series dataframe (date and close)
    '''
    
    con = sqlite_connection()
        
    df = pd.read_sql(f"select * from weekly_prices_clean where ticker='{ticker}' and date>'2017-12-31' order by date desc",con=con)
    
    df[['ds', 'y']] = df[['date','close']]
    df = df[['ds', 'y']]
        
    return df

In [4]:
def train_test_split(df, split=0.2):
    
    test_rows = int(round(df.shape[0] * split,0))
    train_rows = df.shape[0] - test_rows
    
    test_df = df.head(test_rows).copy()
    train_df = df.tail(train_rows).copy()
    #print(train_df.shape[0], test_df.shape[0])
    return train_df, test_df

In [5]:
def train_prophet():
    
    tickers = ticker_list()
    
    for ticker in tickers:
        
        print(f"Building model for {ticker}")
        
        # Build the dataset and split
        df = base_df(ticker)
        train, test = train_test_split(df, split=0.1)
        
        # Train & Test Prophet
        model = Prophet(daily_seasonality=True)
        model.fit(train)
        forecast = model.predict(test)
        
        # Create a DF with future dates to predict (weekly candence)
        future = pd.DataFrame()
        future['ds'] = pd.date_range('2018-01-05', periods=250, freq='W-FRI')

        # Make the predictions
        future_fcast = model.predict(future)

        # Plot and Save
        model.plot(future_fcast).savefig(f'../data/visualization/prophet/{ticker}.png')


In [None]:
train_prophet()

Building model for MMM
Initial log joint probability = -6.89131
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       959.181    0.00556373       761.103           1           1      119   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     135       962.746    0.00061616       342.832   3.202e-06       0.001      189  LS failed, Hessian reset 
     199       966.031    0.00304705       162.676           1           1      267   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       968.795   0.000339307       76.3189           1           1      388   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     325       968.898   0.000130408       107.889   1.379e-06       0.001      458  LS failed, Hessian reset 
     378       968.953   0.000324626       102.321   3.735e-06       0.001      556  

  fig = plt.figure(facecolor='w', figsize=figsize)


Building model for CAT
Initial log joint probability = -13.0085
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       959.191    0.00151437        165.59      0.3906      0.8453      119   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199        970.73   0.000608303       196.859        0.31      0.7145      232   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     233       972.531   9.51912e-05        144.58   8.617e-07       0.001      315  LS failed, Hessian reset 
     257       973.694   0.000770607       388.522    7.08e-06       0.001      375  LS failed, Hessian reset 
     299       975.745      0.020309       203.561           1           1      423   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     362       977.649   6.94568e-05       106.115   5.238e-07       0.001      537  

In [None]:
print("Mean Squared Error (MSE):", mean_squared_error(y_true = test["y"], y_pred = forecast['yhat']))
print("Mean Absolute Error (MAE):", mean_absolute_error(y_true = test["y"], y_pred = forecast['yhat']))

def mean_abs_perc_err(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

print("Mean Absolute % Error (MAPE): ", mean_abs_perc_err(y_true = np.asarray(test["y"]), y_pred = np.asarray(forecast['yhat'])))

In [None]:
#future_fcast
#forecast
model