In [1]:
##
## CETM46 Data Science Product Prototype
## By Yuk Kuen Chan
## 
## Date: 18 Oct 2021
##
## Application: Stock Prediction Analysis Platform
##

In [2]:
#load required libraries
import dash
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np
import plotly.graph_objs as go
from dash import html
from dash import dcc
from dash import dash_table
from dash.dash_table.Format import Group
from dash.dependencies import Input, Output, State
from keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from sklearn.linear_model import LinearRegression
from pmdarima.arima import auto_arima
from fastai.tabular.all import *
from plotly.subplots import make_subplots
import statistics 

In [3]:
#function to read the stock data
def getStockData(value):
    if value == "AAPL":
      return pd.read_csv("https://raw.githubusercontent.com/capwin3/UoS/main/AAPL.csv")
    elif value == "TSM":
      return pd.read_csv("https://raw.githubusercontent.com/capwin3/UoS/main/TSM.csv")
    elif value == "MSFT":
      return pd.read_csv("https://raw.githubusercontent.com/capwin3/UoS/main/MSFT.csv")
    elif value == "V":
      return pd.read_csv("https://raw.githubusercontent.com/capwin3/UoS/main/V.csv")
    elif value == "JNJ":
      return pd.read_csv("https://raw.githubusercontent.com/capwin3/UoS/main/JNJ.csv")

In [4]:
#building up the ploty dashboard
app = dash.Dash(__name__)

app.layout = html.Div([
                      html.H1('Stock Prediction Analysis Dashboard', style={"textAlign": "center"}),
                      dcc.Tabs(id='tabs-example', value='tab-1', children=[
                            dcc.Tab(label='Prediction', value='tab-1', children=[
                                    html.Div(id='Message', children='Select a stock and press submit, it takes approximately 3 mins to finish.'),
                                      dcc.Loading( #create a component to show the loading status
                                      id='loading1',
                                      fullscreen = True,    
                                      children=html.Div([
                                                        dcc.Dropdown(   #create dropdown box for user to select a stock - default = AAPL
                                                        id='demo-dropdown',
                                                        options=[
                                                        {'label': 'Apple', 'value': 'AAPL'},
                                                        {'label': 'Taiwan Semiconductor Mfg', 'value': 'TSM'},
                                                        {'label': 'Microsoft', 'value': 'MSFT'},
                                                        {'label': 'Visa', 'value': 'V'},
                                                        {'label': 'Johnson & Johnson', 'value': 'JNJ'}
                                                        ],
                                                        value='AAPL',
                                                        multi=False
                                                        ),
                                                         html.Button('Submit', id='submit-val', n_clicks=0),   
                                                         html.Div(id='kpi2',  className="six columns"),
                                                         html.Div(id='chart1'),
                                                         html.Div(id='chart2'),
                                                         html.Div(id='chart3'),
                                                         html.Div(id='chart4'),
                                                         html.Div(id='chart5')
                                                        ])
                                        )
                            ]),
                      dcc.Tab(label='Raw Data', value='tab-2', children=[
                            html.Div(id='combo'),
                            html.Div(id='table')
                        ])
                    ])
])


In [5]:
#generate the raw data table
@app.callback(
    Output('table', 'children'),
    Input('demo-dropdown', 'value'),
    Input('submit-val', 'n_clicks'))
def update_table(value, n_clicks):
    changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
    if 'submit-val' in changed_id:
            df=getStockData(value)
            df['Ticker'] = value
            df["Volume"] = pd.to_numeric(df["Volume"])
            page_size = 25
            page_current = 0
            columns = [{"name": i, "id": i, } for i in df.columns]
            data = df.iloc[
                    page_current*page_size:(page_current+ 1)*page_size
                    ].to_dict('records')
            return dash_table.DataTable(
                                    id='table1',
                                    columns = columns,
                                    data = data,
                                    page_current=0,
                                    page_size=5,
                                    page_action='custom')

In [6]:
#generate KPI cards
@app.callback(
     Output('kpi2', 'children'),
     Input('demo-dropdown', 'value'),
     Input('submit-val', 'n_clicks')
    )
def update_kpi(value, n_clicks):
    changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
    if 'submit-val' in changed_id:
        df = getStockData(value)    
        column = df["Close"]
        mean = statistics.mean(column)
        std = statistics.stdev(column)
        min_date = df.iloc[0]['Date']
        max_date = df.iloc[-1]['Date']
        max_value = df['Close'].max()
        max_price = df.query('Close == @max_value')
        max_price_date = max_price['Date'].values[0]
        min_value = df['Close'].min()
        min_price = df.query('Close == @min_value')
        min_price_date = min_price['Date'].values[0]
    
        fig = go.Figure()

        fig.add_trace(go.Indicator(
           mode = "number",
           value = max_value,
           domain = {'row': 0, 'column': 0},
           title = {'text': "Max Closing Price<br>at {}".format(max_price_date)}
        ))

        fig.add_trace(go.Indicator(
            mode = "number",
            value = min_value,
            title = {'text': "Min Closing Price<br>at {}".format(min_price_date)},
            domain = {'row': 0, 'column': 1}
        ))

        fig.add_trace(go.Indicator(
            mode = "number",
            value = mean,
            title = {'text': "Mean Closing Price"},
            domain = {'row': 0, 'column': 2}
        ))
        fig.add_trace(go.Indicator(
            mode = "number",
            value = std,
            title = {'text': "Standard Deviation"},
            domain = {'row': 0, 'column': 3}
        ))
    
        temp_var = "Histroical Data from " + min_date + " to " + max_date
        fig.update_layout(
        grid = {'rows': 1, 'columns': 4, 'pattern': "independent"},
        title = {"text": "{}".format(temp_var)}
        )
        return dcc.Graph(figure=fig)

In [7]:
#app callback to generate LSTM prediction chart
@app.callback(
    Output('chart5', 'children'),
    Input('demo-dropdown', 'value'),
    Input('submit-val', 'n_clicks')
)
def Predict_LSTM(value, n_clicks):
    changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
    if 'submit-val' in changed_id:
        print(value)
        df_lstm=getStockData(value)
        df_train, df_valid=LSTM_Prediction(df_lstm)
        return generateLSTMPredict(df_train, df_valid)

In [8]:
#perform LSTM prediction
def LSTM_Prediction(value):
    df=value
    df["Date"]=pd.to_datetime(df.Date,format="%m/%d/%Y")
    df.index=df['Date']

    df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
    df.index = df['Date']
    
    data = df.sort_index(ascending=True, axis=0)
    new_data = pd.DataFrame(index=range(0,len(df)),columns=['Date', 'Close'])
    for i in range(0,len(data)):
        new_data['Date'][i] = data['Date'][i]
        new_data['Close'][i] = data['Close'][i]

    new_data.index = new_data.Date
    new_data.drop('Date', axis=1, inplace=True)

    dataset = new_data.values

    train = dataset[0:987,:]
    valid = dataset[987:,:]

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset)

    x_train, y_train = [], []
    for i in range(60,len(train)):
        x_train.append(scaled_data[i-60:i,0])
        y_train.append(scaled_data[i,0])
    x_train, y_train = np.array(x_train), np.array(y_train)

    x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))

    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
    model.add(LSTM(units=50))
    model.add(Dense(1))

    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(x_train, y_train, epochs=1, batch_size=1, verbose=2)

    inputs = new_data[len(new_data) - len(valid) - 60:].values
    inputs = inputs.reshape(-1,1)
    inputs  = scaler.transform(inputs)

    X_test = []
    for i in range(60,inputs.shape[0]):
        X_test.append(inputs[i-60:i,0])
    X_test = np.array(X_test)

    X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
    closing_price = model.predict(X_test)
    closing_price = scaler.inverse_transform(closing_price)

    train = new_data[:987]
    valid = new_data[987:]
    valid['Predictions'] = closing_price
    
    return train, valid

In [9]:
#generte LSTM chart
def generateLSTMPredict(train,valid):
    print("generateLSTMPredict")
    return dcc.Graph(
        id='graph2',
                    figure={
                        "data":[
                            go.Scatter(
                                name='Train Data',
                                x=train.index,
                                y=train['Close'],
                                mode='lines'
                            ),
                            go.Scatter(
                                name='Predicted Close',
                                x=valid.index,
                                y=valid['Predictions'],
                                mode='lines'
                            ),
                            go.Scatter(
                                name='Actual Close',
                                x=valid.index,
                                y=valid['Close'],
                                mode='lines'
                            )
                        ],
                        "layout":go.Layout(
                            title='LSTM Prediction Plot',
                            xaxis={'title':'Date'},
                            yaxis={'title':'Price'}
                        )
                    }
                )                

In [10]:
@app.callback(
    Output('chart4', 'children'),
    Input('demo-dropdown', 'value'),
    Input('submit-val', 'n_clicks')
)
def Predict_ARIMA(value, n_clicks):
    changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
    if 'submit-val' in changed_id:
        print(value)
        df_arima=getStockData(value)
        df_train, df_valid, df_forecast=ARIMA_Prediction(df_arima)
        return generateARIMAPredict(df_train, df_valid, df_forecast)

In [11]:
def generateARIMAPredict(train,valid,forecast):
    print("generateARIMAPredict")
    return dcc.Graph(
        id='graph4',
                    figure={
                        "data":[
                            go.Scatter(
                                name='Train Data',
                                x=train.index,
                                y=train['Close'],
                                mode='lines'
                            ),
                            go.Scatter(
                                name='Predicted Close',
                                x=forecast.index,
                                y=forecast['Predictions'],
                                mode='lines'
                            ),
                            go.Scatter(
                                name='Actual Close',
                                x=valid.index,
                                y=valid['Close'],
                                mode='lines'
                            )
                        ],
                        "layout":go.Layout(
                            title='ARIMA Prediction Plot',
                            xaxis={'title':'Date'},
                            yaxis={'title':'Price'}
                        )
                    }
                )                

In [12]:
def ARIMA_Prediction(value):
    #achieve the df
    df = None
    
    df=value
    df["Date"]=pd.to_datetime(df.Date,format="%m/%d/%Y")
    df.index=df['Date']

    df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
    df.index = df['Date']
    
    data = df.sort_index(ascending=True, axis=0)

    train = data[:987]
    valid = data[987:]

    training = train['Close']
    validation = valid['Close']

    model = auto_arima(training, start_p=1, start_q=1,max_p=3, max_q=3, m=12,start_P=0, seasonal=True,d=1, D=1, trace=True,error_action='ignore',suppress_warnings=True)
    model.fit(training)
    
    forecast = model.predict(n_periods=271)
    forecast = pd.DataFrame(forecast, columns=['Predictions'], index=valid.index)
      
    return train, valid, forecast

In [13]:
@app.callback(
    Output('chart3', 'children'),
    Input('demo-dropdown', 'value'),
    Input('submit-val', 'n_clicks')
)
def Predict_Linear(value, n_clicks):
    changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
    if 'submit-val' in changed_id:
        print(value)
        df_linear=getStockData(value)
        df_train, df_valid=Linear_Prediction(df_linear)
        return generateLinearPredict(df_train, df_valid)

In [14]:
def generateLinearPredict(train,valid):
    print("generateLinearPredict")
    return dcc.Graph(
        id='graph5',
                    figure={
                        "data":[
                            go.Scatter(
                                name='Train Data',
                                x=train.index,
                                y=train['Close'],
                                mode='lines'
                            ),
                            go.Scatter(
                                name='Predicted Close',
                                x=valid.index,
                                y=valid['Predictions'],
                                mode='lines'
                            ),
                            go.Scatter(
                                name='Actual Close',
                                x=valid.index,
                                y=valid['Close'],
                                mode='lines'
                            )
                        ],
                        "layout":go.Layout(
                            title='Linear Prediction Plot',
                            xaxis={'title':'Date'},
                            yaxis={'title':'Price'}
                        )
                    }
                )    

In [15]:
from fastai import *
def Linear_Prediction(value):
    df=value

    df["Date"]=pd.to_datetime(df.Date,format="%m/%d/%Y")
    df.index=df['Date']

    df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
    df.index = df['Date']
    
    data = df.sort_index(ascending=True, axis=0)

    new_data = pd.DataFrame(index=range(0,len(df)),columns=['Date', 'Close'])

    for i in range(0,len(data)):
        new_data['Date'][i] = data['Date'][i]
        new_data['Close'][i] = data['Close'][i]
    
    add_datepart(new_data, 'Date')
    new_data.drop('Elapsed', axis=1, inplace=True)
    
    new_data['mon_fri'] = 0
    for i in range(0,len(new_data)):
        if (new_data['Dayofweek'][i] == 0 or new_data['Dayofweek'][i] == 4):
            new_data.iloc[[i], 13] = 1
        else:
            new_data.iloc[[i], 13] = 0
        
    train = new_data[:987]
    valid = new_data[987:]

    x_train = train.drop('Close', axis=1)
    y_train = train['Close']
    x_valid = valid.drop('Close', axis=1)
    y_valid = valid['Close']

    from sklearn.linear_model import LinearRegression
    model = LinearRegression()
    model.fit(x_train,y_train)
    
    preds = model.predict(x_valid)
    
    pd.options.mode.chained_assignment = None  # default='warn'

    valid['Predictions'] = 0
    valid['Predictions'] = preds

    valid.index = new_data[987:].index
    train.index = new_data[:987].index
    
    return train, valid

In [16]:
@app.callback(
    Output('chart1', 'children'),
    Input('demo-dropdown', 'value'),
    Input('submit-val', 'n_clicks')
)
def retStockData(value, n_clicks):
    print(value)
    changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
    if 'submit-val' in changed_id:
        df=getStockData(value)
        df["Date"]=pd.to_datetime(df.Date,format="%m/%d/%Y")
        df.index=df['Date']
        return generateCandleGraphs(df,value)

In [17]:
def generateCandleGraphs(df,value):
    
    avg_30 = df.Close.rolling(window=30, min_periods=1).mean()

    avg_50 = df.Close.rolling(window=50, min_periods=1).mean()
    trace1 = {
    'x': df.index,
    'open': df.Open,
    'close': df.Close,
    'high': df.High,
    'low': df.Low,
    'type': 'candlestick',
    'name': value,
    'showlegend': False
    }
    trace2 = {
    'x': df.index,
    'y': avg_30,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 1,
        'color': 'blue'
            },
    'name': 'Moving Average of 30 periods'
    }
    trace3 = {
    'x': df.index,
    'y': avg_50,
    'type': 'scatter',
    'mode': 'lines',
    'line': {
        'width': 1,
        'color': 'red'
    },
    'name': 'Moving Average of 50 periods'
    }

    return dcc.Graph(
        id='graph6',
        figure = {
            "data": [trace1, trace2, trace3],
            "layout":go.Layout({
                'title': {
                'text': '"{}" Historial Closing Price'.format(value),
                'font': {
                'size': 15
            }
            }
        })
        })

In [18]:
@app.callback(
    Output('chart2', 'children'),
    Input('demo-dropdown', 'value'),
    Input('submit-val', 'n_clicks')
)
def Predict_Moving(value, n_clicks):
    changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
    if 'submit-val' in changed_id:
        df_moving=getStockData(value)
        df_train, df_valid=Moving_Average_Prediction(df_moving)
        return generateMovingPredict(df_train, df_valid)

In [19]:
def Moving_Average_Prediction(value):
    df_mov=value
    df_mov["Date"]=pd.to_datetime(df_mov.Date,format="%m/%d/%Y")
    df_mov.index=df_mov['Date']

    df_mov['Date'] = pd.to_datetime(df_mov.Date,format='%Y-%m-%d')
    df_mov.index = df_mov['Date']
    
    mov_data = df_mov.sort_index(ascending=True, axis=0)
    new_mov_data = pd.DataFrame(index=range(0,len(df_mov)),columns=['Date', 'Close'])

    for i in range(0,len(mov_data)):
         new_mov_data['Date'][i] = mov_data['Date'][i]
         new_mov_data['Close'][i] = mov_data['Close'][i]

    mov_train = new_mov_data[:987]
    mov_valid = new_mov_data[987:]
    
    preds = []
    for i in range(0,mov_valid.shape[0]):
        a = mov_train['Close'][len(mov_train)-248+i:].sum() + sum(preds)
        b = a/248
        preds.append(b)

    pd.options.mode.chained_assignment = None  # default='warn'
    mov_valid['Predictions'] = 0
    mov_valid['Predictions'] = preds

    x = 25
    return mov_train, mov_valid

In [20]:
def generateMovingPredict(train,valid):
    print("generateMovingPredict")
    return dcc.Graph(
        id='graph3',
                    figure={
                        "data":[
                            go.Scatter(
                                name='Train Data',
                                x=train['Date'],
                                y=train['Close'],
                                mode='lines'
                            ),
                            go.Scatter(
                                name='Predicted Close',
                                x=valid['Date'],
                                y=valid['Predictions'],
                                mode='lines'
                            ),
                            go.Scatter(
                                name='Actual Close',
                                x=valid['Date'],
                                y=valid['Close'],
                                mode='lines'
                            )
                        ],
                        "layout":go.Layout(
                            title='Moving Average Prediction Plot',
                            xaxis={'title':'Date'},
                            yaxis={'title':'Price'}
                        )
                    }
                )      

In [21]:
@app.callback(
    dash.dependencies.Output('combo', 'children'),
    dash.dependencies.Input('demo-dropdown', 'value'),
    dash.dependencies.Input('submit-val', 'n_clicks'))
def update_output(value, n_clicks):
    changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
    if 'submit-val' in changed_id:
        df=getStockData(value)
        fig = make_subplots(specs=[[{"secondary_y": True}]])
        trace1 = go.Bar(x=df['Date'],
                y=df['Volume'],
                name='Volume',   
               )
 

        trace2 = go.Scatter(x=df.Date, 
                    y=df.Close, 
                    name= 'Close')
        
        fig.add_trace(trace1, secondary_y=False);

        fig.add_trace(trace2, secondary_y=True);

        fig.update_layout(autosize= True,  
                  title="Historical Closing and Volume",
                  legend= dict(x=1.15, 
                               y=1),
                  hovermode='x');
        fig.update_xaxes(tickangle= -45, 
                 autorange=True,
                );

        y1 = df["Volume"]
        max_y1 = y1.max()
        y2 = df["Close"]
        max_y2 = y2.max()

        fig.update_yaxes(range= [0, y1], #left yaxis
                 title= 'Volume', secondary_y=False);
        fig.update_yaxes(range= [0, y2], #right yaxis
                 showgrid= False, 
                 title= 'Close',
                 secondary_y=True);
        return dcc.Graph(figure=fig)

In [None]:
if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False, port = 8080)

Dash is running on http://127.0.0.1:8080/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
AAPL
AAPL
AAPL
AAPL
AAPL
Performing stepwise search to minimize aic
generateMovingPredict
 ARIMA(1,1,1)(0,1,1)[12]             : AIC=inf, Time=2.90 sec
 ARIMA(0,1,0)(0,1,0)[12]             : AIC=3442.570, Time=0.18 sec
generateLinearPredict
 ARIMA(1,1,0)(1,1,0)[12]             : AIC=3200.306, Time=0.47 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AIC=inf, Time=0.83 sec
 ARIMA(1,1,0)(0,1,0)[12]             : AIC=3415.216, Time=0.13 sec
 ARIMA(1,1,0)(2,1,0)[12]             : AIC=3106.771, Time=0.63 sec
 ARIMA(1,1,0)(2,1,1)[12]             : AIC=inf, Time=2.55 sec
 ARIMA(1,1,0)(1,1,1)[12]             : AIC=inf, Time=1.03 sec
 ARIMA(0,1,0)(2,1,0)[12]             : AIC=3144.340, Time=0.41 sec
 ARIMA(2,1,0)(2,1,0)[12]             : AIC=3106.991, Time=0.78 sec
 ARIMA(1,1,1)(2,1,0)[12]             : AIC=3106.964, Time=1.08