# Machine Learning Script to evaluate the potential a stock for next 10 days using multiple ML algorithms
A script to check on specific sources for specific data. It will check the end value in last year and try to plot a new for next 10 days.

It will use the data for RO Stocks (erased, because of laws), International Stock, or from local CSV file. 

<b>Version 20230109a</b>

Source and ideas from https://www.kaggle.com/code/ysthehurricane/advanced-stock-pred-using-svr-rfr-knn-lstm-gru
    
* TODO: To use the functions declared only in last cell
* TODO: Use a list to compare
* TODO: Create a new function to study the json files with the report


# Configurations:
## Start by choosing what you search:
Choose the right dataset source by decomenting in type variable:
- ("bvb",\<stock_symbol\>)
- ("stoc",\<international_symbol\>)
- ("csv",\<local_csv_file\>)


In [None]:
# source = ["bvb","SNN"]
source = ["stooq", "msft.us"]
# source = ("csv", "./kaggle/input/stockstudy/msft_us_d.csv")
time_step = 15

<a name="libs"></a>
### Import libraries and packages

In [None]:
!pip install --upgrade pip
!pip install pandas plotly scikit-learn
import datetime as dt
import glob
import json
import math
import os
import time
from itertools import cycle

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tensorflow as tf
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM, GRU
from tensorflow.keras.models import Sequential

start = time.time()


## Initialize & checks 
This will show what devices are available and will be used!

In [None]:
# detect and init the TPU
import tensorflow as tf

def check_resources():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
        tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Num TPUs Available: ", len(tf.config.experimental.list_physical_devices('TPU')), tf.config.experimental.list_physical_devices('TPU'))
    except Exception as e:
        print("TPU not working", e)

    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')), tf.config.experimental.list_physical_devices('GPU'))
    print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')), tf.config.experimental.list_physical_devices('CPU'))
    
    import os
    for dirname, _, filenames in os.walk('/kaggle'):
        for filename in filenames:
            print(os.path.join(dirname, filename))

check_resources()

<a name="dataset"></a>
# Import dataset 

In [None]:
import datetime as dt

def download_stooq(stock):
    today = dt.date.today()
    last_year = today - dt.timedelta(days=365)
    # stock = "ogn.us"
    link = f'https://stooq.com/q/d/l/?s={stock}&d1={last_year.strftime("%Y%m%d")}&d2={today.strftime("%Y%m%d")}&i=d'
    print(link)
    return pd.read_csv(link)


In [None]:

def import_dataset(source):
    print("Importing", source)
    if source[0]=="bvb":
        return download_bvb(source[1])
    if source[0]=="stooq":
        return download_stooq(source[1])
    if source[0]=="csv":
        return pd.read_csv(source[1])


bist100 = import_dataset(source)
bist100.head()

<a name="cname"></a>
### Rename columns
/kaggle/input/reliance-industries-limited-stocks-202021

<a name="nullna"></a>
### Checking null and na value

<a name="coldt"></a>

### Checking datatype of each column

<a name="dateformat"></a>

### Convert date from string to date format

<a name="sortdate"></a>

### Sorting dataset by date format


In [None]:
# Rename columns
def process_data(bist100):
    bist100.rename(columns={"Date":"date","Open":"open","High":"high","Low":"low","Close":"close"}, inplace= True)
    bist100.head()
    # Checking null value
    bist100.isnull().sum()
    # Checking na value
    bist100.isna().any()
    bist100.dropna(inplace=True)
    bist100.isna().any()
    # Checking Data type of each column
    bist100.head()
    print("Date column data type: ", type(bist100['date'][0]))
    print("Open column data type: ", type(bist100['open'][0]))
    print("Close column data type: ", type(bist100['close'][0]))
    print("High column data type: ", type(bist100['high'][0]))
    print("Low column data type: ", type(bist100['low'][0]))
    # convert date field from string to Date format and make it index
    bist100['date'] = pd.to_datetime(bist100.date)
    bist100.head()
    bist100.sort_values(by='date', inplace=True)
    # bist100.head()
    bist100.shape
    
process_data(bist100)

<a name="eda"></a>

### EDA - Exploratory Data Analysis

<a name="duration"></a>

### Get the duration of dataset

<a name="month_op_close"></a>

### Monthwise comparision between Stock actual, open and close price

<a name="month_high_low"></a>

### Monthwise High and Low stock price 

In [None]:
def exploratory_analysis(bist100):

    
    print("Starting date: ",bist100.iloc[0]['date'])
    print("Ending date: ", bist100.iloc[-1]['date'])
    print("Duration: ", bist100.iloc[-1]['date']-bist100.iloc[0]['date'])
    monthvise= bist100.groupby(bist100['date'].dt.strftime('%B'))[['open','close']].mean().sort_values(by='close')
    monthvise.head()
    
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=monthvise.index,
        y=monthvise['open'],
        name='Stock Open Price',
        marker_color='crimson'
    ))
    fig.add_trace(go.Bar(
        x=monthvise.index,
        y=monthvise['close'],
        name='Stock Close Price',
        marker_color='lightsalmon'
    ))

    fig.update_layout(barmode='group', xaxis_tickangle=-45, 
                      title='Monthwise comparision between Stock actual, open and close price')
    
    # fig.write_image("images/exploratory.jpeg")
    
    fig.show()
    
    bist100.groupby(bist100['date'].dt.strftime('%B'))['low'].min()
    
exploratory_analysis(bist100)

In [None]:
def high_and_low(bist100):
    monthvise_high= bist100.groupby(bist100['date'].dt.strftime('%B'))['high'].max()
    monthvise_low= bist100.groupby(bist100['date'].dt.strftime('%B'))['low'].min()
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=monthvise_high.index,
        y=monthvise_high,
        name='Stock high Price',
        marker_color='rgb(0, 153, 204)'
    ))
    fig.add_trace(go.Bar(
        x=monthvise_low.index,
        y=monthvise_low,
        name='Stock low Price',
        marker_color='rgb(255, 128, 0)'
    ))

    fig.update_layout(barmode='group', 
                      title=' Monthwise High and Low stock price')
    fig.show()
    
high_and_low(bist100)

<a name="trend"></a>

### Trend comparision between stock price, open price, close price, high price, low price

In [None]:
def trend_evolution(bist100):
    names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])

    fig = px.line(bist100, x=bist100.date, y=[bist100['open'], bist100['close'], 
                                              bist100['high'], bist100['low']],
                 labels={'date': 'Date','value':'Stock value'})
    fig.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters')
    fig.for_each_trace(lambda t:  t.update(name = next(names)))
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)

    fig.show()
    
trend_evolution(bist100)

<a name="closepred"></a>

### Close price prediction preparation and preprocessing

<a name="sepclose"></a>

### Make separate dataframe with close price

<a name="plotclose"></a>

### Plotting stock close price chart

<a name="norm"></a>

### Normalizing / scaling close value between 0 to 1

In [None]:
def prepare_data(bist100):
    closedf = bist100[['date','close']]
    print("Shape of close dataframe:", closedf.shape)
    fig = px.line(closedf, x=closedf.date, y=closedf.close,labels={'date':'Date','close':'Close Stock'})
    fig.update_traces(marker_line_width=2, opacity=0.6)
    fig.update_layout(title_text='Stock close price chart', plot_bgcolor='white', font_size=15, font_color='black')
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.show()
    
    close_stock = closedf.copy()
    del closedf['date']
    scaler=MinMaxScaler(feature_range=(0,1))
    closedf=scaler.fit_transform(np.array(closedf).reshape(-1,1))
    print(closedf.shape)
    return close_stock, closedf, scaler
    
close_stock, closedf, scaler = prepare_data(bist100)


<a name="splitdata"></a>

### Split data for training and testing
Ratio for training and testing data is 65:35

In [None]:
def split_data(closedf):
    training_size=int(len(closedf)*0.65)
    test_size=len(closedf)-training_size
    train_data,test_data=closedf[0:training_size,:],closedf[training_size:len(closedf),:1]
    print("train_data: ", train_data.shape)
    print("test_data: ", test_data.shape)
    return train_data, test_data

train_data, test_data = split_data(closedf)

<a name="tsp"></a>

### Create new dataset according to requirement of time-series prediction 

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, time_step):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100 
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [None]:

X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test", y_test.shape)

## Compute evaluation metrics

In [None]:
def compute_metrics(original_ytrain, train_predict, original_ytest, test_predict):
        # Evaluation metrices RMSE and MAE
    # {[argument_index_or_keyword]:[width][.precision][type]}
    # {0:.3f}
    print("Evaluation metrics... lower is better:")
    print(f"RMSE -> train:{math.sqrt(mean_squared_error(original_ytrain,train_predict)):.4f}, test:{math.sqrt(mean_squared_error(original_ytest,test_predict)):.4f}")
    print(f"MSE -> train:{mean_squared_error(original_ytrain,train_predict):.4f}, test:{mean_squared_error(original_ytest,test_predict):.4f}")
    print(f"MAE -> train:{mean_absolute_error(original_ytrain,train_predict):.4f}, test:{mean_absolute_error(original_ytest,test_predict):.4f}")
    print(f"EVS -> train:{1-explained_variance_score(original_ytrain, train_predict):.4f}, test:{1-explained_variance_score(original_ytest, test_predict):.4f}")
    print(f"R^2 -> train:{1-r2_score(original_ytrain, train_predict):.4f}, test:{1-r2_score(original_ytest, test_predict):.4f}")
    print(f"MGD -> train:{mean_gamma_deviance(original_ytrain, train_predict):.4f}, test:{mean_gamma_deviance(original_ytest, test_predict):.4f}")
    print(f"MPD -> train:{mean_poisson_deviance(original_ytrain, train_predict):.4f}, test:{mean_poisson_deviance(original_ytest, test_predict):.4f}")
    metrics = {
        'Train data RMSE: ': math.sqrt(mean_squared_error(original_ytrain,train_predict)),
           'Train data MSE: ': mean_squared_error(original_ytrain,train_predict),
           'Train data MAE: ': mean_absolute_error(original_ytrain,train_predict),
           'Test data RMSE: ': math.sqrt(mean_squared_error(original_ytest,test_predict)),
            "Test data MSE: ": mean_squared_error(original_ytest,test_predict),
            "Test data MAE: ": mean_absolute_error(original_ytest,test_predict),
            "Train data explained variance regression score:": 1-explained_variance_score(original_ytrain, train_predict),
        "Test data explained variance regression score:": 1-explained_variance_score(original_ytest, test_predict),
            "Train data R2 score:": 1-r2_score(original_ytrain, train_predict),
        "Test data R2 score:": 1-r2_score(original_ytest, test_predict),
        "Train data MGD: ": mean_gamma_deviance(original_ytrain, train_predict),
        "Test data MGD: ": mean_gamma_deviance(original_ytest, test_predict),
        "Train data MPD: ": mean_poisson_deviance(original_ytrain, train_predict),
        "Test data MPD: ": mean_poisson_deviance(original_ytest, test_predict)
    }
    
    return metrics
    

<a name="algo"></a>

# Algorithms

<a name="svr"></a>

### Super vector regression - SVR

<a name="svrevalmat"></a>

#### Evaluation metrices RMSE, MSE and MAE

Root Mean Square Error (RMSE), Mean Square Error (MSE) and Mean absolute Error (MAE) are a standard way to measure the error of a model in predicting quantitative data.

<a name="svrevariance"></a>

#### Explained variance regression score


The explained variance score explains the dispersion of errors of a given dataset, and the formula is written as follows: Here, and Var(y) is the variance of prediction errors and actual values respectively. Scores close to 1.0 are highly desired, indicating better squares of standard deviations of errors.

<a name="svrrsquare"></a>

#### R<sup>2</sup> score for regression

R-squared (R2) is a statistical measure that represents the proportion of the variance for a dependent variable that's explained by an independent variable or variables in a regression model.

1 = Best <br>
0 or < 0 = worse

<a name="svrrloss"></a>

#### Regression Loss Mean Gamma deviance regression loss (MGD) and Mean Poisson deviance regression loss (MPD)

In [None]:
def super_vector_regression(X_train, X_test, y_train, y_test, scaler):
# def super_vector_regression(scaler):
    from sklearn.svm import SVR
    
    # moved the timestamp
    X_train, y_train = create_dataset(train_data, time_step)
    X_test, y_test = create_dataset(test_data, time_step)
    
    svr_rbf = SVR(kernel= 'rbf', C= 1e2, gamma= 0.1)
    svr_rbf.fit(X_train, y_train)
    
    # Lets Do the prediction 
    train_predict=svr_rbf.predict(X_train)
    test_predict=svr_rbf.predict(X_test)

    train_predict = train_predict.reshape(-1,1)
    test_predict = test_predict.reshape(-1,1)

    print("Train data prediction:", train_predict.shape)
    print("Test data prediction:", test_predict.shape)
    
    train_predict = scaler.inverse_transform(train_predict)
    test_predict = scaler.inverse_transform(test_predict)
    original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
    original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 
    
    SVR_eval = compute_metrics(original_ytrain, train_predict, original_ytest, test_predict)
    
    return train_predict, test_predict, svr_rbf, SVR_eval
    
    
train_predict, test_predict, svr_rbf, SVR_eval = super_vector_regression(X_train, X_test, y_train, y_test, scaler)
# train_predict, test_predict, svr_rbf, SVR_eval = super_vector_regression(scaler)

## <a name="svrcomparechart"></a>

#### Comparision between original stock close price vs predicted close price
Show SVR eval!

In [None]:
def compare_original(closedf, time_step, train_predict, test_predict, close_stock):
    # shift train predictions for plotting
# def compare_original(closedf, time_step, close_stock, scaler):
    # train_predict, test_predict, svr_rfb, SRV_eval = super_vector_regression(scaler)

    look_back=time_step
    trainPredictPlot = np.empty_like(closedf)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
    print("Train predicted data: ", trainPredictPlot.shape)

    # shift test predictions for plotting
    testPredictPlot = np.empty_like(closedf)
    testPredictPlot[:, :] = np.nan
    testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
    print("Test predicted data: ", testPredictPlot.shape)

    names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

    plotdf = pd.DataFrame({'date': close_stock['date'],
                           'original_close': close_stock['close'],
                          'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                          'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

    fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
    fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
    fig.for_each_trace(lambda t:  t.update(name = next(names)))

    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.show()
    
compare_original(closedf, time_step, train_predict, test_predict, close_stock)
# compare_original(closedf, time_step, close_stock, scaler)

<a name="svrpred10"></a>

#### Predicting next 10 days

<a name="svrlast25"></a>

#### Plotting last 15 days and next predicted 10 days

In [None]:
def predict_next_10(test_data, time_step, svr_rbf):
    from numpy import array

    x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
    temp_input=list(x_input)
    temp_input=temp_input[0].tolist()


    lst_output=[]
    n_steps=time_step
    i=0
    pred_days = 10
    while(i<pred_days):

        if(len(temp_input)>time_step):

            x_input=np.array(temp_input[1:])
            #print("{} day input {}".format(i,x_input))
            x_input=x_input.reshape(1,-1)

            yhat = svr_rbf.predict(x_input)
            #print("{} day output {}".format(i,yhat))
            temp_input.extend(yhat.tolist())
            temp_input=temp_input[1:]

            lst_output.extend(yhat.tolist())
            i=i+1

        else:
            yhat = svr_rbf.predict(x_input)

            temp_input.extend(yhat.tolist())
            lst_output.extend(yhat.tolist())

            i=i+1

    print("Output of predicted next days: ", len(lst_output))

    last_days=np.arange(1,time_step+1)
    day_pred=np.arange(time_step+1,time_step+pred_days+1)
    # print(last_days)
    # print(day_pred)
    
    temp_mat = np.empty((len(last_days)+pred_days+1,1))
    temp_mat[:] = np.nan
    temp_mat = temp_mat.reshape(1,-1).tolist()[0]

    last_original_days_value = temp_mat
    next_predicted_days_value = temp_mat

    last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
    next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

    new_pred_plot = pd.DataFrame({
        'last_original_days_value':last_original_days_value,
        'next_predicted_days_value':next_predicted_days_value
    })

    names = cycle(['Last 15 days close price','Predicted next 10 days close price'])

    fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'],
                                                          new_pred_plot['next_predicted_days_value']],
                  labels={'value': 'Stock price','index': 'Timestamp'})
    fig.update_layout(title_text='Compare last 15 days vs next 10 days',
                      plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
    fig.for_each_trace(lambda t:  t.update(name = next(names)))
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.show()
    
    return lst_output
    
    
lst_output = predict_next_10(test_data, time_step, svr_rbf)

<a name="svrwholepred"></a>

#### Plotting whole closing stock price with prediction

In [None]:
def final_svr(closedf, lst_output):
    

    svrdf=closedf.tolist()
    svrdf.extend((np.array(lst_output).reshape(-1,1)).tolist())
    svrdf=scaler.inverse_transform(svrdf).reshape(1,-1).tolist()[0]

    names = cycle(['Close Price'])

    fig = px.line(svrdf,labels={'value': 'Stock price','index': 'Timestamp'})
    fig.update_layout(title_text='Plotting whole closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')
    fig.for_each_trace(lambda t:  t.update(name = next(names)))
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.show()
    
    return svrdf
    
svrdf = final_svr(closedf, lst_output)

<a name="rf"></a>

### Random Forest Regressor - RF

In [None]:
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators = 100, random_state = 0)
regressor.fit(X_train, y_train)

In [None]:
# Lets Do the prediction 

train_predict=regressor.predict(X_train)
test_predict=regressor.predict(X_test)

train_predict = train_predict.reshape(-1,1)
test_predict = test_predict.reshape(-1,1)

print("Train data prediction:", train_predict.shape)
print("Test data prediction:", test_predict.shape)


In [None]:
# Transform back to original form

train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

<a name="rfevalmat"></a>

#### Evaluation metrices RMSE, MSE and MAE

Root Mean Square Error (RMSE), Mean Square Error (MSE) and Mean absolute Error (MAE) are a standard way to measure the error of a model in predicting quantitative data.

<a name="rfevariance"></a>

#### Explained variance regression score - EVS
##### Modified to inverse the score

The explained variance score explains the dispersion of errors of a given dataset, and the formula is written as follows: Here, and Var(y) is the variance of prediction errors and actual values respectively. Scores close to 1.0 are highly desired, indicating better squares of standard deviations of errors. -

<a name="rfrsquare"></a>

#### R<sup>2</sup> score for regression
##### Modified to inverse the score

R-squared (R2) is a statistical measure that represents the proportion of the variance for a dependent variable that's explained by an independent variable or variables in a regression model.

1 = Best <br>
0 or < 0 = worse

<a name="rfrloss"></a>

#### Regression Loss Mean Gamma deviance regression loss (MGD) and Mean Poisson deviance regression loss (MPD)

In [None]:
RF = compute_metrics(original_ytrain, train_predict, original_ytest, test_predict)

<a name="rfcomparechart"></a>

#### Comparision between original stock close price vs predicted close price

In [None]:
# shift train predictions for plotting

look_back=time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock['date'],
                       'original_close': close_stock['close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<a name="rfpred10"></a>

#### Predicting next 10 days

In [None]:
x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

from numpy import array

lst_output=[]
n_steps=time_step
i=0
pred_days = 10
while(i<pred_days):
    
    if(len(temp_input)>time_step):
        
        x_input=np.array(temp_input[1:])
        #print("{} day input {}".format(i,x_input))
        x_input=x_input.reshape(1,-1)
        
        yhat = regressor.predict(x_input)
        #print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat.tolist())
        temp_input=temp_input[1:]
       
        lst_output.extend(yhat.tolist())
        i=i+1
        
    else:
        yhat = regressor.predict(x_input)
        
        temp_input.extend(yhat.tolist())
        lst_output.extend(yhat.tolist())
        
        i=i+1
        
print("Output of predicted next days: ", len(lst_output))

<a name="rflast25"></a>

#### Plotting last 15 days and next predicted 10 days

In [None]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)
# print(last_days)
# print(day_pred)

In [None]:
temp_mat = np.empty((len(last_days)+pred_days+1,1))
temp_mat[:] = np.nan
temp_mat = temp_mat.reshape(1,-1).tolist()[0]

last_original_days_value = temp_mat
next_predicted_days_value = temp_mat

last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

names = cycle(['Last 15 days close price','Predicted next 10 days close price'])

new_pred_plot = pd.DataFrame({
    'last_original_days_value':last_original_days_value,
    'next_predicted_days_value':next_predicted_days_value
})

fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'],
                                                      new_pred_plot['next_predicted_days_value']],
              labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Compare last 15 days vs next 10 days',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<a name="rfwholepred"></a>

#### Plotting whole closing stock price with prediction

In [None]:
rfdf=closedf.tolist()
rfdf.extend((np.array(lst_output).reshape(-1,1)).tolist())
rfdf=scaler.inverse_transform(rfdf).reshape(1,-1).tolist()[0]

names = cycle(['Close price'])

fig = px.line(rfdf,labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Plotting whole closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
# fig.show()

<a name="knn"></a>

### K-nearest neighgbour - KNN

In [None]:
from sklearn import neighbors

K = time_step
neighbor = neighbors.KNeighborsRegressor(n_neighbors = K)
neighbor.fit(X_train, y_train)

In [None]:
# Lets Do the prediction 

train_predict=neighbor.predict(X_train)
test_predict=neighbor.predict(X_test)

train_predict = train_predict.reshape(-1,1)
test_predict = test_predict.reshape(-1,1)

print("Train data prediction:", train_predict.shape)
print("Test data prediction:", test_predict.shape)


In [None]:
# Transform back to original form

train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

<a name="rfevalmat"></a>

#### Evaluation metrices RMSE, MSE and MAE

Root Mean Square Error (RMSE), Mean Square Error (MSE) and Mean absolute Error (MAE) are a standard way to measure the error of a model in predicting quantitative data.

<a name="rfevariance"></a>

#### Explained variance regression score - EVS
##### Modified to inverse the score

The explained variance score explains the dispersion of errors of a given dataset, and the formula is written as follows: Here, and Var(y) is the variance of prediction errors and actual values respectively. Scores close to 1.0 are highly desired, indicating better squares of standard deviations of errors. -

<a name="rfrsquare"></a>

#### R<sup>2</sup> score for regression
##### Modified to inverse the score

R-squared (R2) is a statistical measure that represents the proportion of the variance for a dependent variable that's explained by an independent variable or variables in a regression model.

1 = Best <br>
0 or < 0 = worse

<a name="rfrloss"></a>

#### Regression Loss Mean Gamma deviance regression loss (MGD) and Mean Poisson deviance regression loss (MPD)

In [None]:
KNN = compute_metrics(original_ytrain, train_predict, original_ytest, test_predict)

<a name="knncomparechart"></a>

#### Comparision between original stock close price vs predicted close price

In [None]:
# shift train predictions for plotting

look_back=time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

plotdf = pd.DataFrame({'date': close_stock['date'],
                       'original_close': close_stock['close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<a name="knnpred10"></a>

#### Predicting next 10 days

In [None]:
x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

from numpy import array

lst_output=[]
n_steps=time_step
i=0
pred_days = 10
while(i<pred_days):
    
    if(len(temp_input)>time_step):
        
        x_input=np.array(temp_input[1:])
        #print("{} day input {}".format(i,x_input))
        x_input=x_input.reshape(1,-1)
        
        yhat = neighbor.predict(x_input)
        #print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat.tolist())
        temp_input=temp_input[1:]
       
        lst_output.extend(yhat.tolist())
        i=i+1
        
    else:
        yhat = neighbor.predict(x_input)
        
        temp_input.extend(yhat.tolist())
        lst_output.extend(yhat.tolist())
        
        i=i+1
        
# print("Output of predicted next days: ", len(lst_output))

<a name="knnlast25"></a>

#### Plotting last 15 days and next predicted 10 days

In [None]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)
# print(last_days)
# print(day_pred)

In [None]:
temp_mat = np.empty((len(last_days)+pred_days+1,1))
temp_mat[:] = np.nan
temp_mat = temp_mat.reshape(1,-1).tolist()[0]

last_original_days_value = temp_mat
next_predicted_days_value = temp_mat

last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

new_pred_plot = pd.DataFrame({
    'last_original_days_value':last_original_days_value,
    'next_predicted_days_value':next_predicted_days_value
})

names = cycle(['Last 15 days close price','Predicted next 10 days close price'])

fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'],
                                                      new_pred_plot['next_predicted_days_value']],
              labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Compare last 15 days vs next 10 days',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<a name="knnwholepred"></a>

#### Plotting whole closing stock price with prediction

In [None]:
knndf=closedf.tolist()
knndf.extend((np.array(lst_output).reshape(-1,1)).tolist())
knndf=scaler.inverse_transform(knndf).reshape(1,-1).tolist()[0]

names = cycle(['Close price'])

fig = px.line(knndf,labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Plotting whole closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
# fig.show()

<a name="lstm"></a>

### LSTM

In [None]:
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

<a name="lstmevalmat"></a>

#### LSTM model structure

In [None]:
tf.keras.backend.clear_session()
model=Sequential()
model.add(LSTM(32,return_sequences=True,input_shape=(time_step,1)))
model.add(LSTM(32,return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')

In [None]:
model.summary()

In [None]:
# model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=32,verbose=1)

# model.fit
## Better computing

We save the best model using ModelCheckpoint...

https://medium.com/zero-equals-false/early-stopping-to-avoid-overfitting-in-neural-network-keras-b68c96ed05d9#:~:text=Too%20many%20epochs%20can%20lead,improving%20on%20the%20validation%20dataset.

TODO: check the evaluation


In [None]:
from keras.models import load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint


# callbacks = EarlyStopping(monitor='val_loss', patience=200, restore_best_weights=True)
#              
callbacks = ModelCheckpoint(filepath='lstm_best.h5', monitor='val_loss', save_best_only=True)

model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=5,verbose=1, callbacks=callbacks)
# Evaluate the model
# loss, acc = model.evaluate(X_test, y_test, verbose=2)
# print("Fit model, accuracy: {:5.2f}%".format(100 * acc))

model = load_model('lstm_best.h5')
# Re-evaluate the model
# loss, acc = model.evaluate(X_train, y_train, verbose=2)
# print("Restored model, accuracy: {:5.2f}%".format(100 * acc))



In [None]:
### Lets Do the prediction and check performance metrics
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)
train_predict.shape, test_predict.shape

In [None]:
# Transform back to original form

train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

<a name="rfevalmat"></a>

#### Evaluation metrices RMSE, MSE and MAE

Root Mean Square Error (RMSE), Mean Square Error (MSE) and Mean absolute Error (MAE) are a standard way to measure the error of a model in predicting quantitative data.

<a name="rfevariance"></a>

#### Explained variance regression score - EVS
##### Modified to inverse the score

The explained variance score explains the dispersion of errors of a given dataset, and the formula is written as follows: Here, and Var(y) is the variance of prediction errors and actual values respectively. Scores close to 1.0 are highly desired, indicating better squares of standard deviations of errors. -

<a name="rfrsquare"></a>

#### R<sup>2</sup> score for regression
##### Modified to inverse the score

R-squared (R2) is a statistical measure that represents the proportion of the variance for a dependent variable that's explained by an independent variable or variables in a regression model.

1 = Best <br>
0 or < 0 = worse

<a name="rfrloss"></a>

#### Regression Loss Mean Gamma deviance regression loss (MGD) and Mean Poisson deviance regression loss (MPD)

In [None]:
LSTM_eval = compute_metrics(original_ytrain, train_predict, original_ytest, test_predict)

<a name="lstmcomparechart"></a>

#### Comparision between original stock close price vs predicted close price

In [None]:
# shift train predictions for plotting

look_back=time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock['date'],
                       'original_close': close_stock['close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text=f'LSTM - Comparision between original close price vs predicted close price {source[0], source[1]}',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<a name="lstmpred10"></a>

#### Predicting next 10 days

In [None]:
x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

from numpy import array

lst_output=[]
n_steps=time_step
i=0
pred_days = 10
while(i<pred_days):
    
    if(len(temp_input)>time_step):
        
        x_input=np.array(temp_input[1:])
        #print("{} day input {}".format(i,x_input))
        x_input = x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        
        yhat = model.predict(x_input, verbose=0)
        #print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)
       
        lst_output.extend(yhat.tolist())
        i=i+1
        
    else:
        
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        
        lst_output.extend(yhat.tolist())
        i=i+1
               
# print("Output of predicted next days: ", len(lst_output))

<a name="lstmlast25"></a>

#### Plotting last 15 days and next predicted 10 days

In [None]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)
# print(last_days)
# print(day_pred)

In [None]:
temp_mat = np.empty((len(last_days)+pred_days+1,1))
temp_mat[:] = np.nan
temp_mat = temp_mat.reshape(1,-1).tolist()[0]

last_original_days_value = temp_mat
next_predicted_days_value = temp_mat

last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

new_pred_plot = pd.DataFrame({
    'last_original_days_value':last_original_days_value,
    'next_predicted_days_value':next_predicted_days_value
})

names = cycle(['Last 15 days close price','Predicted next 10 days close price'])

fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'],
                                                      new_pred_plot['next_predicted_days_value']],
              labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Compare last 15 days vs next 10 days',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')

fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<a name="lstmwholepred"></a>

#### Plotting whole closing stock price with prediction

In [None]:
lstmdf=closedf.tolist()
lstmdf.extend((np.array(lst_output).reshape(-1,1)).tolist())
lstmdf=scaler.inverse_transform(lstmdf).reshape(1,-1).tolist()[0]

names = cycle(['Close price'])

fig = px.line(lstmdf,labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Plotting whole closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')

fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
# fig.show()

<a name="gru"></a>

### GRU

In [None]:
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)


#### GRU model structure

In [None]:
tf.keras.backend.clear_session()
model=Sequential()
model.add(GRU(32,return_sequences=True,input_shape=(time_step,1)))
model.add(GRU(32,return_sequences=True))
model.add(GRU(32,return_sequences=True))
model.add(GRU(32))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')

In [None]:
model.summary()

In [None]:
callbacks = ModelCheckpoint(filepath='gru_best.h5', monitor='val_loss', save_best_only=True)

model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=5,verbose=1, callbacks=callbacks)

# model = load_model("gru_best.h5")


In [None]:
model = load_model('lstm_best.h5')


In [None]:
### Lets Do the prediction and check performance metrics
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)
train_predict.shape, test_predict.shape

In [None]:
# Transform back to original form

train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

<a name="rfevalmat"></a>

#### Evaluation metrices RMSE, MSE and MAE

Root Mean Square Error (RMSE), Mean Square Error (MSE) and Mean absolute Error (MAE) are a standard way to measure the error of a model in predicting quantitative data.

<a name="rfevariance"></a>

#### Explained variance regression score - EVS
##### Modified to inverse the score

The explained variance score explains the dispersion of errors of a given dataset, and the formula is written as follows: Here, and Var(y) is the variance of prediction errors and actual values respectively. Scores close to 1.0 are highly desired, indicating better squares of standard deviations of errors. -

<a name="rfrsquare"></a>

#### R<sup>2</sup> score for regression
##### Modified to inverse the score

R-squared (R2) is a statistical measure that represents the proportion of the variance for a dependent variable that's explained by an independent variable or variables in a regression model.

1 = Best <br>
0 or < 0 = worse

<a name="rfrloss"></a>

#### Regression Loss Mean Gamma deviance regression loss (MGD) and Mean Poisson deviance regression loss (MPD)

In [None]:
GRU_eval = compute_metrics(original_ytrain, train_predict, original_ytest, test_predict)

<a name="grucomparechart"></a>

#### Comparision between original stock close price vs predicted close price

In [None]:
# shift train predictions for plotting

look_back=time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)


names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

plotdf = pd.DataFrame({'date': close_stock['date'],
                       'original_close': close_stock['close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='GRU - Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<a name="grupred10"></a>

#### Predicting next 10 days

In [None]:
x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

from numpy import array

lst_output=[]
n_steps=time_step
i=0
pred_days = 10
while(i<pred_days):
    
    if(len(temp_input)>time_step):
        
        x_input=np.array(temp_input[1:])
        #print("{} day input {}".format(i,x_input))
        x_input = x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        
        yhat = model.predict(x_input, verbose=0)
        #print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)
       
        lst_output.extend(yhat.tolist())
        i=i+1
        
    else:
        
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        
        lst_output.extend(yhat.tolist())
        i=i+1
               
# print("Output of predicted next days: ", len(lst_output))

<a name="grulast25"></a>

#### Plotting last 15 days and next predicted 10 days

In [None]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)
# print(last_days)
# print(day_pred)

In [None]:
temp_mat = np.empty((len(last_days)+pred_days+1,1))
temp_mat[:] = np.nan
temp_mat = temp_mat.reshape(1,-1).tolist()[0]

last_original_days_value = temp_mat
next_predicted_days_value = temp_mat

last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

new_pred_plot = pd.DataFrame({
    'last_original_days_value':last_original_days_value,
    'next_predicted_days_value':next_predicted_days_value
})
names = cycle(['Last 15 days close price','Predicted next 10 days close price'])

fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'],
                                                      new_pred_plot['next_predicted_days_value']],
              labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Compare last 15 days vs next 10 days',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<a name="gruwholepred"></a>

#### Plotting whole closing stock price with prediction

In [None]:
grudf=closedf.tolist()
grudf.extend((np.array(lst_output).reshape(-1,1)).tolist())
grudf=scaler.inverse_transform(grudf).reshape(1,-1).tolist()[0]

names = cycle(['Close price'])
fig = px.line(grudf,labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Plotting whole closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
# fig.show()

<a name="lstm_gru"></a>

### LSTM + GRU

In [None]:
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

#### Model structure

In [None]:
tf.keras.backend.clear_session()
model=Sequential()
model.add(LSTM(32,return_sequences=True,input_shape=(time_step,1)))
model.add(LSTM(32,return_sequences=True))
model.add(GRU(32,return_sequences=True))
model.add(GRU(32))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')

In [None]:
model.summary()

In [None]:
callbacks = ModelCheckpoint(filepath='ltsmgru_best.h5', monitor='val_loss', save_best_only=True)
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=5,verbose=1, callbacks=callbacks)


In [None]:
model = load_model('ltsmgru_best.h5')

In [None]:
### Lets Do the prediction and check performance metrics
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)
train_predict.shape, test_predict.shape

In [None]:
# Transform back to original form

train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

<a name="rfevalmat"></a>

#### Evaluation metrices RMSE, MSE and MAE

Root Mean Square Error (RMSE), Mean Square Error (MSE) and Mean absolute Error (MAE) are a standard way to measure the error of a model in predicting quantitative data.

<a name="rfevariance"></a>

#### Explained variance regression score - EVS
##### Modified to inverse the score

The explained variance score explains the dispersion of errors of a given dataset, and the formula is written as follows: Here, and Var(y) is the variance of prediction errors and actual values respectively. Scores close to 1.0 are highly desired, indicating better squares of standard deviations of errors. -

<a name="rfrsquare"></a>

#### R<sup>2</sup> score for regression
##### Modified to inverse the score

R-squared (R2) is a statistical measure that represents the proportion of the variance for a dependent variable that's explained by an independent variable or variables in a regression model.

1 = Best <br>
0 or < 0 = worse

<a name="rfrloss"></a>

#### Regression Loss Mean Gamma deviance regression loss (MGD) and Mean Poisson deviance regression loss (MPD)

In [None]:
LSTM_GRU = compute_metrics(original_ytrain, train_predict, original_ytest, test_predict)

<a name="lstm_grucomparechart"></a>

#### Comparision between original stock close price vs predicted close price

In [None]:
# shift train predictions for plotting

look_back=time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

plotdf = pd.DataFrame({'date': close_stock['date'],
                       'original_close': close_stock['close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<a name="lstm_grupred10"></a>

#### Predicting next 10 days

In [None]:
x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

from numpy import array

lst_output=[]
n_steps=time_step
i=0
pred_days = 10
while(i<pred_days):
    
    if(len(temp_input)>time_step):
        
        x_input=np.array(temp_input[1:])
        #print("{} day input {}".format(i,x_input))
        x_input = x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        
        yhat = model.predict(x_input, verbose=0)
        #print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)
       
        lst_output.extend(yhat.tolist())
        i=i+1
        
    else:
        
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        
        lst_output.extend(yhat.tolist())
        i=i+1
               
# print("Output of predicted next days: ", len(lst_output))

<a name="lstm_grulast25"></a>

#### Plotting last 15 days and next predicted 10 days

In [None]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)
# print(last_days)
# print(day_pred)

In [None]:
temp_mat = np.empty((len(last_days)+pred_days+1,1))
temp_mat[:] = np.nan
temp_mat = temp_mat.reshape(1,-1).tolist()[0]

last_original_days_value = temp_mat
next_predicted_days_value = temp_mat

last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

new_pred_plot = pd.DataFrame({
    'last_original_days_value':last_original_days_value,
    'next_predicted_days_value':next_predicted_days_value
})
names = cycle(['Last 15 days close price','Predicted next 10 days close price'])

fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'],
                                                      new_pred_plot['next_predicted_days_value']],
              labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Compare last 15 days vs next 10 days',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

<a name="lstm_gruwholepred"></a>

#### Plotting whole closing stock price with prediction

In [None]:
lstmgrudf=closedf.tolist()
lstmgrudf.extend((np.array(lst_output).reshape(-1,1)).tolist())
lstmgrudf=scaler.inverse_transform(lstmgrudf).reshape(1,-1).tolist()[0]

names = cycle(['Close price'])

fig = px.line(lstmgrudf,labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Plotting whole closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Stock')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

In [None]:
def generate_finaldf(svr, rf, knn, lstm, gru, lstm_gru):
    finaldf = None
    finaldf = pd.DataFrame({
        'svr':svrdf,
        'rf':rfdf,
        'knn':knndf,
        'lstm':lstmdf,
        'gru':grudf,
        'lstm_gru':lstmgrudf,
    })
    finaldf.insert(0, "mean", finaldf.mean(axis=1))

    return finaldf

finaldf = generate_finaldf(svrdf, rfdf, knndf, lstmdf, grudf, lstmgrudf)

# finaldf.tail(11)

<a name="final_chart"></a>

### Conclusion Chart

In [None]:
def conclusion_chart(finaldf):
    names = cycle(['SVR[0]', 'RF[1]','KNN[2]','LSTM[3]','GRU[4]','LSTM + GRU[5]', 'MEAN[6]'])
    
    fig = px.line(finaldf[225:], 
                  x=finaldf.index[225:],
                  y=[finaldf['svr'][225:], # 0
                     finaldf['rf'][225:],  # 1
                     finaldf['knn'][225:], # 2
                     finaldf['lstm'][225:],  # 3
                     finaldf['gru'][225:], # 4
                     finaldf['lstm_gru'][225:], # 5
                     finaldf['mean'][225:]
                    ],
                 labels={'x': 'Timestamp','value':'Stock close price'})
    fig.update_layout(title_text=f'Final stock analysis chart: {source[0]}/{source[1]}', font_size=15, font_color='black',legend_title_text='Algorithms')
    fig.for_each_trace(lambda t:  t.update(name = next(names)))
    fig.update_xaxes(showgrid=True)
    fig.update_yaxes(showgrid=False)
    fig.show()

conclusion_chart(finaldf)

def conclusion_chart(finaldf):
    list_names = ['SVR[0]', 'RF[1]','KNN[2]','LSTM[3]','GRU[4]','LSTM + GRU[5]', 'MEAN[6]']
    print(list_names)
    names = cycle(list_names)
    print(finaldf.head())
    
    
    fig = px.line(finaldf[225:], 
                  line_dash = list(names),
                  x=finaldf.index[225:],
                  y=[finaldf['svr'][225:], # 0
                     finaldf['rf'][225:],  # 1
                     finaldf['knn'][225:], # 2
                     finaldf['lstm'][225:],  # 3
                     finaldf['gru'][225:], # 4
                     finaldf['lstm_gru'][225:], # 5
                     finaldf['mean'][225:]
                    ],
                 labels={'x': 'Timestamp','value':'Stock close price'})
    fig.update_layout(title_text=f'Final stock analysis chart: {source[0]}/{source[1]}', font_size=15, font_color='black',legend_title_text='Algorithms')
    fig.for_each_trace(lambda t:  t.update(name = next(names)))
    fig.update_xaxes(showgrid=True)
    fig.update_yaxes(showgrid=False)
    fig.show()

conclusion_chart(finaldf)

# Stock price evolution per ml

In [None]:
# Try to make a table with plotly
def conclusion_table(finaldf):
    
    from plotly.colors import n_colors

    import pandas as pd
    import numpy as np

    data = finaldf.tail(11)
    data.shape

    # normalize data
    # Min-Max Global Normalization (if only on columns don't put numpy)
    df_norm = (data-data.to_numpy().min())/(data.to_numpy().max()-data.to_numpy().min())


    colors = n_colors('rgb(172, 255, 117)', 'rgb(233, 116, 237)', 100, colortype='rgb')
    svr = (df_norm['svr']*99.9).astype(int)
    rf = (df_norm['rf']*99.9).astype(int)
    knn = (df_norm['knn']*99.9).astype(int)
    lstm = (df_norm['lstm']*99.9).astype(int)
    gru = (df_norm['gru']*99.9).astype(int)
    lstm_gru = (df_norm['lstm_gru']*99.9).astype(int)
    mean = (df_norm['mean']*99.9).astype(int)


    header={
        'values': list(data.columns),
        'fill_color': 'paleturquoise',
        'align': 'center'
        }



    fig = go.Figure(data=[
        go.Table(
            header=header,
            cells={
                'values': data.transpose().round(2),
                'fill_color': [np.array(colors)[svr], np.array(colors)[rf], np.array(colors)[knn], np.array(colors)[lstm],
                           np.array(colors)[gru], np.array(colors)[lstm_gru], np.array(colors)[mean]],
                'align': 'right',
                'height': 25
            }
        )
    ])

    fig.update_layout(
        title=f"Prediction for {source[0]}/{source[1]}",
        font={
            'family': "Arial",
            'size': 15,
            # color="RebeccaPurple"
        }
    )

    fig.show()

conclusion_table(finaldf)

In [None]:
def scores(data):
    # data = [SVR_eval, RF, KNN, LSTM_eval, GRU_eval, LSTM_GRU]
    df = pd.DataFrame.from_records(data, index=['SVR', 'RF', 'KNN','LSTM', 'GRU', 'LSTM_GRU'])
    df.tail(11)
    col_list= ['Test data RMSE: ', 'Test data MSE: ', 'Test data MAE: ', 'Test data explained variance regression score:', 'Test data R2 score:', 'Test data MGD: ', 'Test data MPD: ']

    df.insert(0, "Tests Score", df[col_list].sum(axis=1))
    df.sort_values("Tests Score", inplace=True)
    return (df*100).round(2), df

score_table, df = scores([SVR_eval, RF, KNN, LSTM_eval, GRU_eval, LSTM_GRU])

print("LOWER IS BETTER")
score_table

Save to file

In [None]:
def save_to_file(source, df, file = 'stock_reports.json'):
    from datetime import datetime
    dict = {
        source[1]: {
            "date": datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
            "source": source[0],
            "predict": finaldf.tail(11).to_dict(),
            "scores": df.to_dict()
        }
    }

    print(file)
    with open(file, 'a+') as f:
        f.seek(0)
        try:
            current_json = json.load(f)
        except Exception as e:
            print(f"Error reading file {f}, {e}")
            current_json = {}
        current_json.update(dict)
        f.seek(0)
        f.truncate()
        json.dump(current_json, f)


save_to_file(source, df)


In [None]:
import time
end = time.time()
print(int(end-start))