# Importing Libraries

In [38]:


import pandas as pd
import numpy as np
import csv
import datetime
import time
import math
import matplotlib.pyplot as plot
import plotly.graph_objects as go
from itertools import cycle
import plotly.express as px
from plotly.subplots import make_subplots


from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM

from sklearn import preprocessing
from numpy import array


# Building Model (XGBoost and LSTM)

In [39]:
class Crypto:
    
    def __init__(self,data,closedf):
        self.data = []
        self.closedf=[]
    
    def load_data():
        global data
        data=pd.read_csv('Bitcoin.csv' )
        return data
    
    def clean_data():
        global data
        print('Total Null Values:',data.isnull().values.sum())
        print('Total NA values:',data.isnull().values.any())
        data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d')
        return data.info()
        
    def model_preparation():
        global closedf
        global scaler
        global close_stock
        closedf = data[['Date','Close']]
        closedf = closedf[closedf['Date'] > '2020-09-01']
        close_stock = closedf.copy()
        print("Total data for prediction: ",closedf.shape[0])
        
        #Normalization
        del closedf['Date']
        scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
        closedf=scaler.fit_transform(np.array(closedf).reshape(-1,1))
        print(closedf.shape)
        
        
    #XBOOST
    def splitData():
        global X_train, Y_train
        global X_test, Y_test
        global time_step, test_data, training_data
        #Seperating Training Data and Test Data
        training_size = int(len(closedf)*0.70) #70 percent for training data
        test_size = len(closedf) - training_size #remaining 30 percent for test data
        training_data , test_data = closedf[0:training_size,:], closedf[training_size:len(closedf), :1]
        print("Training Data: ", training_data.shape)
        print("Test Data: ", test_data.shape)
        #Preparing Training Data for time series analysis
        # convert an array of values into a dataset matrix
        def create_dataset(dataset, time_step=1):
            Xdata, Ydata = [], []
            for i in range(len(dataset)-time_step-1):
                p = dataset[i:(i+time_step), 0]
                Xdata.append(p)
                Ydata.append(dataset[i + time_step, 0])
            return np.array(Xdata), np.array(Ydata)
        time_step = 21
        X_train, Y_train = create_dataset(training_data, time_step)
        X_test, Y_test = create_dataset(test_data, time_step)

        print("X_train: ", X_train.shape)
        print("Y_train: ", Y_train.shape)
        print("X_test: ", X_test.shape)
        print("Y_test", Y_test.shape)
        
        return create_dataset

    def plotTrainTest():
        global close_stock
        fig, ax = plt.subplots(figsize=(15, 6))
        sns.lineplot(x = close_stock['Date'][:354], y = close_stock['Close'][:354], color = 'black')
        sns.lineplot(x = close_stock['Date'][354:], y = close_stock['Close'][354:], color = 'red')

        #Adding graph title etc.
        ax.set_title('Train & Test data', fontsize = 20, loc='center', fontdict=dict(weight='bold'))
        ax.set_xlabel('Date', fontsize = 16, fontdict=dict(weight='bold'))
        ax.set_ylabel('Weekly Sales', fontsize = 16, fontdict=dict(weight='bold'))
        plt.tick_params(axis='y', which='major', labelsize=16)
        plt.tick_params(axis='x', which='major', labelsize=16)
        plt.legend(loc='upper right' ,labels = ('train', 'test'))
        plt.show()
       
    def xgboostModel():
        global X_train, Y_train
        global X_test, Y_test
        global scaler, time_step
        global xg_model
        
        #Building XGBoost Model
        xg_model = XGBRegressor(n_estimators = 1000)
        xg_model.fit(X_train, Y_train, verbose = False)
        prediction = xg_model.predict(X_test)
        
        print("Mean Absolute Error - MAE : " + str(mean_absolute_error(Y_test, prediction)))
        print("Root Mean squared Error - RMSE : " + str(math.sqrt(mean_squared_error(Y_test, prediction))))
       
        
        training_predict=xg_model.predict(X_train)
        test_predict=xg_model.predict(X_test)

        training_predict = training_predict.reshape(-1,1)
        test_predict = test_predict.reshape(-1,1)


        # Transform back to original form

        training_predict = scaler.inverse_transform(training_predict)
        test_predict = scaler.inverse_transform(test_predict)
        original_Ytrain = scaler.inverse_transform(Y_train.reshape(-1,1)) 
        original_Ytest = scaler.inverse_transform(Y_test.reshape(-1,1))
        print("Train data explained variance regression score:", explained_variance_score(original_Ytrain, training_predict))
        print("Test data explained variance regression score:", explained_variance_score(original_Ytest, test_predict))
        print("Train data R2 score:", r2_score(original_Ytrain, training_predict))
        print("Test data R2 score:", r2_score(original_Ytest, test_predict))
                
        
        
        # shift train predictions for plotting

        look_back=time_step
        trainPredictPlot = np.empty_like(closedf)
        trainPredictPlot[:, :] = np.nan
        trainPredictPlot[look_back:len(training_predict)+look_back, :] = training_predict
        print("Train predicted data: ", trainPredictPlot.shape)

        # shift test predictions for plotting
        testPredictPlot = np.empty_like(closedf)
        testPredictPlot[:, :] = np.nan
        testPredictPlot[len(training_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
        print("Test predicted data: ", testPredictPlot.shape)

        names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

        plotdf = pd.DataFrame({'date': close_stock['Date'],
                               'original_close': close_stock['Close'],
                              'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                              'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

        fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                                  plotdf['test_predicted_close']],
                      labels={'value':'Close price','date': 'Date'})
        fig.update_layout(title_text='Comparison between Original Close Price Vs Predicted Close Price',
                          plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
        fig.for_each_trace(lambda t:  t.update(name = next(names)))

        fig.update_xaxes(showgrid=False)
        fig.update_yaxes(showgrid=False)
        fig.show()
        
        return
        
    def xgboostPrediction():
        global test_data, time_step
        global X_input
        global xg_model
        
        #Prediction of next 10 days
        X_input = test_data[len(test_data)-time_step:].reshape(1,-1)
        input_temp = list(X_input)
        input_temp = input_temp[0].tolist()

        from numpy import array

        lst_output = []
        steps = time_step
        i = 0
        pred_days = 10
        while(i < pred_days):

            if(len(input_temp) > steps):

                X_input = np.array(input_temp[1:])
                #print("{} day input {}".format(i,x_input))
                X_input=X_input.reshape(1,-1)

                yhat = xg_model.predict(X_input)
                #print("{} day output {}".format(i,yhat))
                input_temp.extend(yhat.tolist())
                input_temp=input_temp[1:]

                lst_output.extend(yhat.tolist())
                i = i + 1

            else:
                yhat = xg_model.predict(X_input)

                input_temp.extend(yhat.tolist())
                lst_output.extend(yhat.tolist())

                i = i + 1

        print("Output of next predicted days: ", len(lst_output))

        last_days = np.arange(1, time_step + 1)
        day_pred = np.arange(time_step + 1, time_step + pred_days + 1)
        temp_matrix = np.empty((len(last_days) + pred_days + 1, 1))
        temp_matrix[:] = np.nan
        temp_matrix = temp_matrix.reshape(1,-1).tolist()[0]

        Original_last_days_value = temp_matrix
        Predicted_next_days_value = temp_matrix

        Original_last_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
        Predicted_next_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

        #print(Original_last_days_value[0:time_step+1])
        #print(Predicted_next_days_value[time_step+1:])
        new_pred_plot = pd.DataFrame({
            'Original_Last_Days_Value':Original_last_days_value,
            'Predicted_Next_Days_Value':Predicted_next_days_value
        })

        names = cycle(['Last 15 days close price','Predicted next 10 days close price'])

        fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['Original_Last_Days_Value'], new_pred_plot['Predicted_Next_Days_Value']],
                      labels={'value': 'Close price','index': 'Timestamp'})
        fig.update_layout(title_text='Compare last 15 days Vs Next 10 days',
                          plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
        fig.for_each_trace(lambda t:  t.update(name = next(names)))
        fig.update_xaxes(showgrid=False)
        fig.update_yaxes(showgrid=False)
        fig.show()
        
        xg_model=closedf.tolist()
        xg_model.extend((np.array(lst_output).reshape(-1,1)).tolist())
        xg_model=scaler.inverse_transform(xg_model).reshape(1,-1).tolist()[0]

        names = cycle(['Close Price'])

        fig = px.line(xg_model,labels={'value': 'Close price','index': 'Timestamp'})
        fig.update_layout(title_text='Plotting whole closing price with prediction',
                          plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')
        fig.for_each_trace(lambda t:  t.update(name = next(names)))
        fig.update_xaxes(showgrid=False)
        fig.update_yaxes(showgrid=False)
        fig.show()
        return
        
    #Splitting for LSTM Model    
    def splitting_data():
        global closedf
        global train_data
        global test_data
        global X_train
        global X_test
        global y_train
        global y_test
        global time_step
        training_size=int(len(closedf)*0.70)
        test_size=len(closedf)-training_size
        train_data,test_data=closedf[0:training_size,:],closedf[training_size:len(closedf),:1]
        print("Train_data: ", train_data.shape)
        print("Test_data: ", test_data.shape)
        def create_dataset(dataset, time_step=1):
                dataX, dataY = [], []
                for i in range(len(dataset)-time_step-1):
                    a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100 
                    dataX.append(a)
                    dataY.append(dataset[i + time_step, 0])
                return np.array(dataX), np.array(dataY)
        time_step = 15
        X_train, y_train = create_dataset(train_data, time_step)
        X_test, y_test =create_dataset(test_data, time_step)
        print("X_train: ", X_train.shape)
        print("y_train: ", y_train.shape)
        print("X_test: ", X_test.shape)
        print("y_test", y_test.shape)
        

        return create_dataset
    
    
    def lstm_model():
        global X_train
        global X_test
        global y_train
        global y_test
        global train_predict
        global test_predict
        global scaler
        global model
        # reshape input to be [samples, time steps, features] which is required for LSTM
        X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
        X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)
        print("X_train: ", X_train.shape)
        print("X_test: ", X_test.shape)
        print("y_train: ", y_train.shape)
        print("y_test: ", y_test.shape)
        model=Sequential()
        model.add(LSTM(10,input_shape=(None,1),activation="relu"))
        model.add(Dense(1))
        model.compile(loss="mean_squared_error",optimizer="adam")
        model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=32,verbose=1)
        
        train_predict=model.predict(X_train)
        test_predict=model.predict(X_test)
        train_predict.shape, test_predict.shape
        print("Mean Absolute Error - MAE : " + str(mean_absolute_error(y_test, test_predict)))
        print("Root Mean squared Error - RMSE : " + str(math.sqrt(mean_squared_error(y_test,test_predict))))
        
        
        # Transform back to original form
        train_predict = scaler.inverse_transform(train_predict)
        test_predict = scaler.inverse_transform(test_predict)
        original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
        original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 
        # Evaluation metrices RMSE and MAE
        
        
        print("Train data explained variance regression score:", explained_variance_score(original_ytrain, train_predict))
        print("Test data explained variance regression score:", explained_variance_score(original_ytest, test_predict))
        print("Train data R2 score:", r2_score(original_ytrain, train_predict))
        print("Test data R2 score:", r2_score(original_ytest, test_predict))
        return
        
    def predict_next_10_days():
        global train_predict
        global test_predict
        global time_step
        global last_days
        global pred_days
        global model
        global lst_output
        x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
        temp_input=list(x_input)
        temp_input=temp_input[0].tolist()
        lst_output=[]
        n_steps=time_step
        i=0
        pred_days = 10
        while(i<pred_days):
            if(len(temp_input)>time_step):
                x_input=np.array(temp_input[1:])
                
                x_input = x_input.reshape(1,-1)
                x_input = x_input.reshape((1, n_steps, 1))
                yhat = model.predict(x_input, verbose=0)
                
                temp_input.extend(yhat[0].tolist())
                temp_input=temp_input[1:]
                
                lst_output.extend(yhat.tolist())
                i=i+1
            else:
                x_input = x_input.reshape((1, n_steps,1))
                yhat = model.predict(x_input, verbose=0)
                temp_input.extend(yhat[0].tolist())
                lst_output.extend(yhat.tolist())
                i=i+1
        print("Output of predicted next days: ", len(lst_output))
        last_days=np.arange(1,time_step+1)
        day_pred=np.arange(time_step+1,time_step+pred_days+1)
        return
        
        
    def final_lstm_prediction_next10_days():
        global pred_days
        global last_days
        global close_df
        global scaler
        global lst_output
        temp_mat = np.empty((len(last_days)+pred_days+1,1))
        temp_mat[:] = np.nan
        temp_mat = temp_mat.reshape(1,-1).tolist()[0]
        last_original_days_value = temp_mat
        next_predicted_days_value = temp_mat
        last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0]
        next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]
        new_pred_plot = pd.DataFrame({
            'last_original_days_value':last_original_days_value,
            'next_predicted_days_value':next_predicted_days_value})
        names = cycle(['Last 15 days close price','Predicted next 10 days close price'])
        fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'],
                                                      new_pred_plot['next_predicted_days_value']],
              labels={'value': 'Stock price','index': 'Timestamp'})
        fig.update_layout(title_text='Compare last 15 days vs next 10 days',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
        fig.for_each_trace(lambda t:  t.update(name = next(names)))
        fig.update_xaxes(showgrid=False)
        fig.update_yaxes(showgrid=False)
        fig.show()
        lstmdf=closedf.tolist()
        lstmdf.extend((np.array(lst_output).reshape(-1,1)).tolist())
        lstmdf=scaler.inverse_transform(lstmdf).reshape(1,-1).tolist()[0]
        names = cycle(['Close price'])
        fig = px.line(lstmdf,labels={'value': 'Stock price','index': 'Timestamp'})
        fig.update_layout(title_text='Plotting whole closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')
        fig.for_each_trace(lambda t:  t.update(name = next(names)))
        fig.update_xaxes(showgrid=False)
        fig.update_yaxes(showgrid=False)
        fig.show()
        return
        
        
        

        
        
        
        
        
        
       
        
        
        

# Results

In [40]:

Crypto.load_data()
Crypto.clean_data()
Crypto.model_preparation()
Crypto.splitData()





Total Null Values: 0
Total NA values: False
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2683 entries, 0 to 2682
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       2683 non-null   datetime64[ns]
 1   Open       2683 non-null   float64       
 2   High       2683 non-null   float64       
 3   Low        2683 non-null   float64       
 4   Close      2683 non-null   float64       
 5   Adj Close  2683 non-null   float64       
 6   Volume     2683 non-null   int64         
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 146.9 KB
Total data for prediction:  506
(506, 1)
Training Data:  (354, 1)
Test Data:  (152, 1)
X_train:  (332, 21)
Y_train:  (332,)
X_test:  (130, 21)
Y_test (130,)


<function __main__.Crypto.splitData.<locals>.create_dataset(dataset, time_step=1)>

In [41]:
Crypto.xgboostModel()


Mean Absolute Error - MAE : 0.04548250335926976
Root Mean squared Error - RMSE : 0.058035861968792964
Train data explained variance regression score: 0.9999898464094282
Test data explained variance regression score: 0.8465988716948324
Train data R2 score: 0.9999898464094282
Test data R2 score: 0.8083899018870204
Train predicted data:  (506, 1)
Test predicted data:  (506, 1)


In [42]:
Crypto.xgboostPrediction()

Output of next predicted days:  10


In [43]:
#LSTM
Crypto.splitting_data()


Train_data:  (354, 1)
Test_data:  (152, 1)
X_train:  (338, 15)
y_train:  (338,)
X_test:  (136, 15)
y_test (136,)


<function __main__.Crypto.splitting_data.<locals>.create_dataset(dataset, time_step=1)>

In [44]:
Crypto.lstm_model()


X_train:  (338, 15, 1)
X_test:  (136, 15, 1)
y_train:  (338,)
y_test:  (136,)
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72

Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/

Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
Mean Absolute Error - MAE : 0.0320272057888394
Root Mean squared Error - RMSE : 0.04038631063484652
Train data explained variance regression score: 0.9784090616204417
Test data explained variance regression score: 0.9064185524819662
Train data R2 score: 0.9783186620810232
Test data R2 score: 0.9055418640659157


In [45]:
Crypto.predict_next_10_days()


Output of predicted next days:  10


In [46]:
Crypto.final_lstm_prediction_next10_days()