<a href="https://colab.research.google.com/github/bivash20/Credit-Card-Fraud-Detection/blob/main/8thSemProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd
import numpy as np
import math
import datetime as dt

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM, GRU

from itertools import cycle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [None]:
stock_df = pd.read_csv('MSFT.csv')
stock_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2018-06-11,101.010002,101.589996,100.669998,101.050003,95.572853,23490900
1,2018-06-12,101.099998,101.449997,100.75,101.309998,95.818779,18325200
2,2018-06-13,101.720001,102.010002,100.559998,100.849998,95.383698,29492900
3,2018-06-14,101.650002,102.029999,101.0,101.419998,95.922813,25691800
4,2018-06-15,101.510002,101.529999,100.07,100.129997,94.702713,65738600


In [None]:
stock_df = stock_df.rename(columns={'Date': 'date','Open':'open','High':'high','Low':'low','Close':'close',
                                'Adj Close':'adj_close','Volume':'volume'})
stock_df.head()

Unnamed: 0,date,open,high,low,close,adj_close,volume
0,2018-06-11,101.010002,101.589996,100.669998,101.050003,95.572853,23490900
1,2018-06-12,101.099998,101.449997,100.75,101.309998,95.818779,18325200
2,2018-06-13,101.720001,102.010002,100.559998,100.849998,95.383698,29492900
3,2018-06-14,101.650002,102.029999,101.0,101.419998,95.922813,25691800
4,2018-06-15,101.510002,101.529999,100.07,100.129997,94.702713,65738600


In [None]:
print("Total number of days: ",stock_df.shape[0])
print("Total number of fields: ",stock_df.shape[1])

Total number of days:  1259
Total number of fields:  7


In [None]:
print("Null values:", stock_df.isnull().values.sum())

print("NA values:", stock_df.isna().values.any())

Null values: 0
NA values: False


In [None]:
stock_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   date       1259 non-null   object 
 1   open       1259 non-null   float64
 2   high       1259 non-null   float64
 3   low        1259 non-null   float64
 4   close      1259 non-null   float64
 5   adj_close  1259 non-null   float64
 6   volume     1259 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 69.0+ KB


In [None]:
stock_df['date'] = pd.to_datetime(stock_df['date'], utc=True)

In [None]:
stock_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype              
---  ------     --------------  -----              
 0   date       1259 non-null   datetime64[ns, UTC]
 1   open       1259 non-null   float64            
 2   high       1259 non-null   float64            
 3   low        1259 non-null   float64            
 4   close      1259 non-null   float64            
 5   adj_close  1259 non-null   float64            
 6   volume     1259 non-null   int64              
dtypes: datetime64[ns, UTC](1), float64(5), int64(1)
memory usage: 69.0 KB


In [None]:
print("Starting date: ",stock_df.iloc[0][0])

print("Ending date: ", stock_df.iloc[-1][0])

print("Duration: ", stock_df.iloc[-1][0]-stock_df.iloc[0][0])

Starting date:  2018-06-11 00:00:00+00:00
Ending date:  2023-06-09 00:00:00+00:00
Duration:  1824 days 00:00:00


In [None]:
names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])

fig = px.line(stock_df, x=stock_df.date, y=[stock_df['open'], stock_df['close'],
                                          stock_df['high'], stock_df['low']],
             labels={'date': 'Date','value':'Stock value'})

fig.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters')

fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)

fig.update_yaxes(showgrid=False)

fig.show()

In [None]:
stock_df_close = stock_df[['date','close']]

print("Shape of close dataframe:", stock_df_close.shape)

Shape of close dataframe: (1259, 2)


In [None]:
stock_df_close_last_2_year = stock_df_close[stock_df_close['date'] > '2021-06-11']

stock_df_close_last_2_year_original_gt = stock_df_close_last_2_year.copy()

print("Total data for prediction: ",stock_df_close_last_2_year.shape[0])

Total data for prediction:  502


In [None]:
fig = px.line(stock_df_close_last_2_year, x=stock_df_close_last_2_year.date, y=stock_df_close_last_2_year.close,labels={'date':'Date','close':'Close Stock'})

fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')

fig.update_layout(title_text='Considered period to predict Stock close price', plot_bgcolor='white', font_size=15, font_color='black')

fig.update_xaxes(showgrid=False)

fig.update_yaxes(showgrid=False)

fig.show()

In [None]:
training_size=int(len(stock_df_close_last_2_year)*0.80)

test_size=len(stock_df_close_last_2_year)-training_size

train_data,test_data = stock_df_close_last_2_year[0:training_size], stock_df_close_last_2_year[training_size:len(stock_df_close_last_2_year)]

print("train_data: ", train_data.shape)

print("test_data: ", test_data.shape)

train_data:  (401, 2)
test_data:  (101, 2)


In [None]:
del train_data['date']

del test_data['date']

In [None]:
scaler=MinMaxScaler(feature_range=(0,1))

train_data=scaler.fit_transform(np.array(train_data).reshape(-1,1))

test_data = scaler.transform(np.array(test_data).reshape(-1,1))

print(train_data.shape)

(401, 1)


In [None]:
# convert an array of values into a dataset matrix

def create_sliding_window(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
time_step = 15

X_train, y_train = create_sliding_window(train_data, time_step)

X_test, y_test = create_sliding_window(test_data, time_step)

print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test", y_test.shape)

X_train:  (385, 15)
y_train:  (385,)
X_test:  (85, 15)
y_test (85,)


In [None]:
# reshape input to be [samples, time steps, features] which is required for LSTM
# Or in terms of TensorFlow / Keras => [batch_size, time steps, input_dim].

X_train =X_train.reshape(X_train.shape[0], X_train.shape[1] , 1)

X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

X_train:  (385, 15, 1)
X_test:  (85, 15, 1)


In [None]:
model=Sequential()

model.add(GRU(32,return_sequences=True,input_shape=(time_step,1)))

model.add(GRU(32,return_sequences=True))

model.add(GRU(32))

model.add(Dropout(0.20))

model.add(Dense(1))

model.compile(loss='mean_squared_error',optimizer='adam')

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 15, 32)            3360      
                                                                 
 gru_1 (GRU)                 (None, 15, 32)            6336      
                                                                 
 gru_2 (GRU)                 (None, 32)                6336      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 16,065
Trainable params: 16,065
Non-trainable params: 0
_________________________________________________________________


In [None]:
history = model.fit(X_train,y_train, epochs=200, batch_size=32, verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [None]:
train_predict=model.predict(X_train)

test_predict=model.predict(X_test)

train_predict.shape, test_predict.shape



((385, 1), (85, 1))

In [None]:
train_predict = scaler.inverse_transform(train_predict)

test_predict = scaler.inverse_transform(test_predict)

original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1))

original_ytest = scaler.inverse_transform(y_test.reshape(-1,1))

In [None]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain, train_predict)))

print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))

print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict))

print("-------------------------------------------------------------------------------------")

print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))

print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))

print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))

Train data RMSE:  5.514419787482583
Train data MSE:  30.408825592579458
Train data MAE:  4.278449419640322
-------------------------------------------------------------------------------------
Test data RMSE:  5.116271575170866
Test data MSE:  26.17623483090137
Test data MAE:  3.8924228626838264


In [None]:
print("Train data explained variance regression score:", explained_variance_score(original_ytrain, train_predict))

print("Test data explained variance regression score:", explained_variance_score(original_ytest, test_predict))

Train data explained variance regression score: 0.96850816515362
Test data explained variance regression score: 0.9632907376545191


In [None]:
print("Train data R2 score:", r2_score(original_ytrain, train_predict))
print("Test data R2 score:", r2_score(original_ytest, test_predict))

Train data R2 score: 0.9685074762512087
Test data R2 score: 0.960283493537848


In [None]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict))

print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict))
print("----------------------------------------------------------------------")

print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict))

print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))

Train data MGD:  0.00041178956769663105
Test data MGD:  0.0003227677812820694
----------------------------------------------------------------------
Train data MPD:  0.111203223762813
Test data MPD:  0.09162248342075159


In [None]:
# shift train predictions for plotting the look-ahead time for predicted stock values

# First delete the 'date' column from below df to make it a single column df
# so that train_predict_look_ahead variable can be properly structured
del stock_df_close_last_2_year['date']

look_back=time_step

train_predict_look_ahead = np.empty_like(stock_df_close_last_2_year)

train_predict_look_ahead[:, :] = np.nan

# For looking ahead by look_back, I need to start at look_back
train_predict_look_ahead[look_back:len(train_predict)+look_back, :] = train_predict

print("Train predicted data: ", train_predict_look_ahead.shape)

####################### TEST #############################
# shift test predictions for plotting
test_predict_look_ahead = np.empty_like(stock_df_close_last_2_year)

test_predict_look_ahead[:, :] = np.nan

test_predict_look_ahead[len(train_predict)+(look_back*2)+1:len(stock_df_close_last_2_year)-1, :] = test_predict

print("Test predicted data: ", test_predict_look_ahead.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': stock_df_close_last_2_year_original_gt['date'],
                       'original_close': stock_df_close_last_2_year_original_gt['close'],
                      'train_predicted_close': train_predict_look_ahead.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': test_predict_look_ahead.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})

fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')

fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (502, 1)
Test predicted data:  (502, 1)


In [None]:
# for x_input, I need to pick the last 15 days

x_input=test_data[len(test_data)-time_step:].reshape(1,-1) # time_step is 15
print("x_input.shape ", x_input.shape)
print("x_input", x_input)

print('###########################################################################')

temp_input=list(x_input)
print(temp_input)

x_input.shape  (1, 15)
x_input [[0.80777594 0.82981535 0.78387414 0.772932   0.86659961 0.92068934
  0.90765175 0.88576772 0.91828341 0.94016769 0.94435834 0.92681986
  0.84688823 0.86147775 0.87335109]]
###########################################################################
[array([0.80777594, 0.82981535, 0.78387414, 0.772932  , 0.86659961,
       0.92068934, 0.90765175, 0.88576772, 0.91828341, 0.94016769,
       0.94435834, 0.92681986, 0.84688823, 0.86147775, 0.87335109])]


In [None]:
temp_input=temp_input[0].tolist()
len(temp_input)

15

In [None]:

from numpy import array

lst_output=[]
n_steps=time_step
i=0
pred_days = 30
while(i<pred_days):

    if(len(temp_input)>time_step):

        x_input=np.array(temp_input[1:])
        #print("{} day input {}".format(i,x_input))
        x_input = x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))

        y_pred = model.predict(x_input, verbose=0)
        #print("{} day output {}".format(i,y_pred))
        temp_input.extend(y_pred[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)

        lst_output.extend(y_pred.tolist())
        i=i+1

    else:

        x_input = x_input.reshape((1, n_steps,1)) # Reshape x_input to a 3D Tensor [samples, time steps, features] before feeding into the model
        y_pred = model.predict(x_input, verbose=0)
        temp_input.extend(y_pred[0].tolist())

        lst_output.extend(y_pred.tolist())
        i=i+1

print("Output of predicted next days: ", len(lst_output))

Output of predicted next days:  30


In [None]:
lst_output

[[0.8587257862091064],
 [0.8410202264785767],
 [0.8236505389213562],
 [0.807488203048706],
 [0.7924630045890808],
 [0.7783589959144592],
 [0.764994740486145],
 [0.7522556781768799],
 [0.7400766015052795],
 [0.7284170389175415],
 [0.7172494530677795],
 [0.7065523266792297],
 [0.6963070034980774],
 [0.6864978075027466],
 [0.6771055459976196],
 [0.6681119799613953],
 [0.6595007181167603],
 [0.6512563228607178],
 [0.6433637142181396],
 [0.6358084678649902],
 [0.6285767555236816],
 [0.6216550469398499],
 [0.6150305867195129],
 [0.6086909770965576],
 [0.602624237537384],
 [0.5968191027641296],
 [0.5912646055221558],
 [0.5859501361846924],
 [0.5808656811714172],
 [0.5760015845298767]]

In [None]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)
print(last_days)
print(day_pred)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
[16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
 40 41 42 43 44 45]


In [None]:
temp_matrix = np.empty((len(last_days)+pred_days+1, 1))

temp_matrix[:] = np.nan

temp_matrix

array([[nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan]])

In [None]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)
print(last_days)
print(day_pred)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
[16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
 40 41 42 43 44 45]


In [None]:
temp_matrix = np.empty((len(last_days)+pred_days+1, 1))

temp_matrix[:] = np.nan

temp_matrix

array([[nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan]])

In [None]:
temp_matrix.shape

(46, 1)

In [None]:
temp_matrix = temp_matrix.reshape(1,-1).tolist()[0]

In [None]:
last_original_days_value = temp_matrix

next_predicted_days_value = temp_matrix

last_original_days_value[0:time_step+1] = stock_df_close_last_2_year_original_gt[len(stock_df_close_last_2_year_original_gt)-time_step:]['close'].tolist()

next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

In [None]:
last_original_days_value[0:time_step+1]

[318.339996,
 321.179993,
 315.26001,
 313.850006,
 325.920013,
 332.890015,
 331.209991,
 328.390015,
 332.579987,
 335.399994,
 335.940002,
 333.679993,
 323.380005,
 325.26001,
 326.790009,
 nan]

In [None]:
next_predicted_days_value[time_step+1:]

[324.90539193001865,
 322.623853768726,
 320.3855960906479,
 318.3029177325332,
 316.3667708844039,
 314.5493285381523,
 312.82721078412357,
 311.1856554060376,
 309.6162597688213,
 308.11380870865884,
 306.6747537635723,
 305.2963222176007,
 303.9761100261572,
 302.71209717733683,
 301.5018105006701,
 300.3428997161457,
 299.23325264401495,
 298.1708799949873,
 297.1538385636938,
 296.18026963195564,
 295.2483912881303,
 294.35646002384334,
 293.50283217921765,
 292.6859101782978,
 291.90415020970374,
 291.15610062989924,
 290.44034819861594,
 289.7555257595074,
 289.10034296276365,
 288.4735555424962]

In [None]:
len(last_original_days_value[0:time_step+1])

16

In [None]:
len(next_predicted_days_value[time_step+1:])

30

In [None]:
new_pred_plot = pd.DataFrame({
    'last_original_days_value':last_original_days_value,
    'next_predicted_days_value':next_predicted_days_value
})

new_pred_plot.tail()

Unnamed: 0,last_original_days_value,next_predicted_days_value
41,291.156101,291.156101
42,290.440348,290.440348
43,289.755526,289.755526
44,289.100343,289.100343
45,288.473556,288.473556


In [None]:
new_pred_plot.shape

(46, 2)