In [1]:
import pandas as pd
import numpy as np
from ipynb.fs.defs.add_features import add_features
import os
import pandas_datareader as pdr
import time
import chart_studio.plotly as py
import plotly.graph_objects as go
from plotly import tools
from datetime import datetime, timedelta

# Time Series / ARIMA
from statsmodels.tsa.stattools import pacf
from statsmodels.tsa.arima_model import ARIMA
from pandas.plotting import autocorrelation_plot
from sklearn import metrics
from datetime import datetime, timedelta
from pmdarima import auto_arima

# Gradient Boosting
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, normalize
import random 

In [2]:
stock_list = ['RACE', 'COST', 'WMT', 'AMZN', 'BA']
api = '73d3a6aeca232c1374f0c610b84ad43e5700afd1'

def Rand(start, end, num): 
    """ Return: List of random integers
    
    Parameters:
    start(int): Minimum list point
    end(int): Maximum list point
    num(int): Length of list"""
    
    res = [] 
    for j in range(num): 
        res.append(random.randint(start, end)) 
    return res 

# Number of trees in random forest
n_estimators = Rand(5, 1000, 10)
n_estimators.append(10)

# Maximum number of levels in tree
max_depth = Rand(1, 10, 3)
max_depth.append(None)

# Minimum number of samples required to split a node
learning_rate = [.01, .1, .2]

# Minimum number of samples required to split a node
min_samples_split = Rand(2, 10, 3)

# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 3, 4]


# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_depth': max_depth,
               'learning_rate': learning_rate,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf
              }

print(random_grid)

{'n_estimators': [350, 852, 244, 574, 797, 375, 548, 782, 710, 457, 10], 'max_depth': [6, 6, 2, None], 'learning_rate': [0.01, 0.1, 0.2], 'min_samples_split': [4, 5, 4], 'min_samples_leaf': [1, 2, 3, 4]}


In [3]:
def ideal_arima(stock_close, max_p=20, max_q=20):
    """Function to run auto_arima for ideal parameters
    Parameters:
    stock_close(dataframe): df of the stock close price
    max_p(int): max number of p
    max_q(int): max number of q
    
    Returns the ideal ARIMA parameters
    """
    
    # Run auto ARIMA based on the parameteres below
    stepwise_model = auto_arima(stock_close, start_p=1, start_q=1,
                           max_p=max_p, max_q=max_q, m=12, start_P=0, seasonal=True,
                           d=None, max_d=1, D=1, error_action='ignore',  
                           suppress_warnings=True, stepwise=True)
    
    # Append ideal auto ARIMA parameters to list
    tmp = stepwise_model.get_params('order')
    order = tmp.get('order')
    
    return order

In [4]:
def forecast_arima(stock_close, n_days=10):
    """Run ideal ARIMA function and forecast closing stock price 
    
    Parameters:
    stock_close(dataframe): df of the stock close price
    n_days(int): number of days to forecast
    """
    
    # Create list of all stock close values
    df1 = stock_close
    X = df1.values
    history = [x for x in X]
    
    # Create forecast prediction list
    predictions = list()
    #order = ideal_arima(stock_close)
    #print(order)
    
    # Run ARIMA for each day to forecast
    for t in range(n_days):
        model = ARIMA(history, order=(0,1,0))
        model_fit = model.fit()
        output = model_fit.forecast()
        
        # Append forecast to predictions and list of stock values
        yhat = output[0]
        predictions.append(yhat)
        history.append(yhat)
        
    # Get last date from dataframe
    last_date = df1.index[-1] 
    
    # create date index for forecast
    date_index = []
    for x in range(1, n_days+1):
        x = date_by_adding_business_days(last_date, x)
        date_index.append(x)
        
    # Create Dataframe with predictions
    df_pred = pd.DataFrame(predictions)

    # Add prediction dates to index
    df_pred.index = date_index
    df_pred.rename(columns={0:'pred_close'}, inplace=True)
    df_forecast = pd.concat([stock_close, df_pred], axis=1)
    
    return df_forecast

In [5]:
def date_by_adding_business_days(last_date, add_days):
    """Adds and returns the business date based on parameters
    
    Parameters:
    last_date(date): current date
    add_days(int): number of days to add to last date
    """

    business_days_to_add = add_days
    current_date = last_date
    while business_days_to_add > 0:
        current_date += timedelta(days=1)
        weekday = current_date.weekday()
        if weekday >= 5: # sunday = 6
            continue
        business_days_to_add -= 1
    
    return current_date

In [6]:
def forecast_chart(stock, api, n_days):
    """ Returns generated chart of historical prices with the forecasted price

    Parameters:
    stock(str): stock ticker
    api(str): api for Tiingo
    n_days(int): Number of days in the forecast
    """
    
    df = pdr.get_data_tiingo(stock, api_key=api)
    df = df.tail(100)
    df.reset_index(inplace=True)
    df.index = df['date']
    df_forecast = forecast_arima(df['close'], n_days)
    
    # Historical prices and forecasted prices
    trace0 = go.Scatter(x=df_forecast.index, y=df_forecast['close'],
                        mode='lines', name='Historical', 
                        line=dict(color='blue', width=1), yaxis='y')

    trace1 = go.Scatter(x=df_forecast.index, y=df_forecast['pred_close'],
                        mode='lines', name='Prediction', 
                        line=dict(color='red', width=1), yaxis='y')
    
    data = [trace0, trace1]
    
    # Additional layout and format of the chart
    layout = go.Layout(
        yaxis=dict(domain=[0, 1], title='Price (USD)'),

        title='{} Stock Forecast Chart'.format(stock),

        xaxis=dict(
            title='Date',
            
            # Add range selector buttons to the chart
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label='1m',
                         step='month',
                         stepmode='backward'),
                    dict(count=3,
                         label='3m',
                         step='month',
                         stepmode='backward'),
                    dict(step='all')
                ])
            ),
            rangeslider=dict(
                visible = False
            ),
            type='date'
        )
    )
    
    # Generate chart based on data and layout features
    fig = go.Figure(data=data, layout=layout)
    
    return fig.show()

In [7]:
def roi_calc(stock_list, api, random_grid):
    """Iterates through stock list and calculates ROI based
    on the ARIMA forecast.
    
    Parameters:
    stock_list(list): list of stocks to iterate
    api(str): api for Tiingo
    random_grid(dict): Dictionary of parameters for the grid
    to iterate over
    
    Returns list of top three stocks, forecast, and predicted ROI
    """

    start_time = time.time()
    roi = {}
    forecasts = {}
    
    for x in stock_list:
        # Create dataframe for each ticker
        df = pdr.get_data_tiingo(x, api_key=api)
        df = df.tail(1000)
        df.reset_index(inplace=True)
        df.index = df['date']
        stock_close = df['close']
        
        # Drop unneeded columns
        df.drop(['symbol', 'date', 'adjClose', 'adjHigh', 'adjLow', 
         'adjOpen', 'adjVolume', 'splitFactor'], axis=1, inplace=True)
        
        # Create new variable for stationary transformation of closing price
        df['close_diff'] = df['close'] - df['close'].shift()
        
        # Get the forecast for each ticker
        Y_pred = gbr_forecast(df, random_grid)
        
        # Calculate ROI
        last_close = df['close'][-1]
        forecast = Y_pred + last_close
        calc_roi = ((forecast - last_close) / last_close) * 100
        
        # Append to forecast and ROI dictionaries
        roi.update({x:calc_roi})
        forecasts.update({x:forecast})
        
    # Print ROI rates for top 3
    best_roi = sorted(roi, key=roi.get, reverse=True)[:3]  
    print('Top 3 Forecasted ROI:')
    
    for x in best_roi:
        print('{}: {}% Forecasted gain'.format(x, roi[x]))
        print('Forecasted Price: {}'.format(x, forecasts[x]))
    
    print("\n--- %s seconds ---" % (time.time() - start_time))  
    return roi, forecasts

In [8]:
def split_train(df):
    """Map training data to X and Y variables
    
    Parameters:
    df(dataframe): stock historical data
    
    Returns mapped training data"""
    
    # Map data to training features and target variable
    train_features = df.drop('close_diff', axis=1).iloc[:-1]
    X_train = StandardScaler().fit_transform(train_features)
    Y_train = df['close_diff'].iloc[1:]
    
    return X_train, Y_train

In [9]:
def gbr_hyperparams(random_grid, X_train, Y_train):
    """Perform a grid search based on the inputted
    parameters.
    
    Parameters:
    random_grid(dict): Dictionary of parameters for the grid
    to iterate over
    X_train(dataframe): features for predicting target variable
    Y_train(array): Target variables
    
    Returns ideal parameters for Gradient Boosting Regressor"""
    
    # Use the random grid to search for best hyperparameters
    gbr = GradientBoostingRegressor()

    # Random search of parameters, using 2 fold cross validation, 
    # Search across all combinations, and use all available cores
    gbr_random = GridSearchCV(estimator=gbr, param_grid=random_grid, 
                             cv=2, verbose=0, n_jobs=-1)

    # Fit the random search model
    gbr_random.fit(X_train, Y_train)

    params = gbr_random.best_params_
    
    return params

In [10]:
def gbr_forecast(df, random_grid):
    """Returns stock price forecast for next day
    
    Parameters:
    df(dataframe): stock historical data
    random_grid(dict): Dictionary of parameters for the grid
    to iterate over"""

    # Fit best parameters to GBR and predict stock price change
    X_train, Y_train = split_train(df)
    params = gbr_hyperparams(random_grid, X_train, Y_train)
    
    gbr = GradientBoostingRegressor(**params)

    gbr.fit(X_train, Y_train)
    
    # Create test set from last day in dataframe
    X_test = df.drop('close_diff', axis=1).iloc[-1]
    X_test = X_test.values.reshape(1, -1)
    Y_pred = gbr.predict(X_test)
    Y_pred = np.array(Y_pred).ravel()
    
    return Y_pred