In [None]:
#Importing all the required packages and libraries in to the dataframe 
import pandas as pd
import numpy as np
import csv
import random
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dropout
from tensorflow.keras.layers import Dense

In [None]:
from sklearn.preprocessing import MinMaxScaler
import sklearn.metrics as metrics

In [None]:
from google.colab import files
uploaded=files.upload()

Saving tweets_scraped_2016_2021.csv to tweets_scraped_2016_2021 (2).csv


In [None]:
import io
#Reading the 2016-2021 tweets file with ticker symbol obtained using snscarpe scraper into a Dataframe
dlf=pd.read_csv(io.BytesIO(uploaded["tweets_scraped_2016_2021.csv"]))

In [None]:
dlf.tail()

Unnamed: 0,Datetime,Text
18695,2021-12-02 04:35:07+00:00,Stock: $AAPL - Apple Inc.\n*** NEWS ***\nProfi...
18696,2021-12-02 04:15:47+00:00,A bunch of people are about to find out that t...
18697,2021-12-02 04:08:57+00:00,$AAPL This guy posting Bear 🐻 posts all last m...
18698,2021-12-02 03:25:10+00:00,$AAPL i'm an apple bull but expect a pullback ...
18699,2021-12-02 00:39:25+00:00,Bought a $140 $aapl put when it was up $167 ye...


In [None]:
dlf.dtypes

Datetime    object
Text        object
dtype: object

In [None]:
#As we only require the Date from Datetime format, we first convert the value into a Datetime format
#Then convert it into Date and store it in the Dataframe
dlf['Datetime']=pd.to_datetime(dlf.Datetime).dt.date

In [None]:
#Grouping tweets into a single row based on date
df=dlf.groupby(by="Datetime", as_index=False)['Text'].apply(' '.join)
df.dropna(inplace=True)

In [None]:
#Changing the date format to string because we need it to merge with the stock price dataset
df['Datetime']=df['Datetime'].astype(str)

In [None]:
df.head()

Unnamed: 0,Datetime,Text
0,2016-01-01,2/n. Apple has peaked in my view. The stock $a...
1,2016-01-02,"AAPL Apple, Inc. % Change\nhttps://t.co/1F3brx..."
2,2016-01-03,"AAPL Apple, Inc. P/E Ratio\nhttps://t.co/iFloX..."
3,2016-01-04,Apple Stock Price: 105.35 #apple $AAPL Apple S...
4,2016-01-05,Remember that time when 40 months passed and A...


In [None]:
#Calling the vader sentiment analyser imported from nltk
#Using Vader as it has high accuracy
nltk.download('vader_lexicon')
dlfpolarityanalyser = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:
#Initialising sentiment column in the dataframe
df['sentiment']=''

In [None]:
#Iteraing through all the tweets by date and then storing the sentiment values in the sentiment column
for j in range(0,len(df)):
    df.sentiment.iloc[j]=dlfpolarityanalyser.polarity_scores(df.Text.iloc[j])['compound']

In [None]:
df.head()

Unnamed: 0,Datetime,Text,sentiment
0,2016-01-01,2/n. Apple has peaked in my view. The stock $a...,-0.9052
1,2016-01-02,"AAPL Apple, Inc. % Change\nhttps://t.co/1F3brx...",-0.6844
2,2016-01-03,"AAPL Apple, Inc. P/E Ratio\nhttps://t.co/iFloX...",0.4098
3,2016-01-04,Apple Stock Price: 105.35 #apple $AAPL Apple S...,-0.5962
4,2016-01-05,Remember that time when 40 months passed and A...,-0.2824


In [None]:
#Since we won't be needing the tweet text after this, we can drop the column
df=df.drop('Text',1)
df

Unnamed: 0,Datetime,sentiment
0,2016-01-01,-0.9052
1,2016-01-02,-0.6844
2,2016-01-03,0.4098
3,2016-01-04,-0.5962
4,2016-01-05,-0.2824
...,...,...
2129,2021-11-28,0
2130,2021-11-29,0.9731
2131,2021-11-30,0.7199
2132,2021-12-01,0.9446


In [None]:
#dropping any null values and checking the shape of the Dataframe
df.dropna(inplace=True)
df.shape

(2134, 2)

In [None]:
from google.colab import files
uploaded1=files.upload()

Saving AAPL.csv to AAPL (1).csv


In [None]:
#Reading the apple historical stock prices downlaoded using Yahoo Finance
aapl_df=pd.read_csv(io.BytesIO(uploaded1["AAPL.csv"]))

In [None]:
aapl_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-01-04,25.6525,26.342501,25.5,26.3375,24.251436,270597600
1,2016-01-05,26.4375,26.4625,25.602501,25.6775,23.643705,223164000
2,2016-01-06,25.139999,25.592501,24.967501,25.174999,23.181009,273829600
3,2016-01-07,24.67,25.032499,24.1075,24.112499,22.202667,324377600
4,2016-01-08,24.637501,24.7775,24.190001,24.24,22.320068,283192000


In [None]:
#Converting Date to string to merge it with the tweet sentiment dataset
aapl_df['Date']=aapl_df['Date'].astype(str)

In [None]:
#Renaming the Datetime column in Tweet sentiment Dataframe to Date to merge with stock dataframe
df.rename(columns={'Datetime':'Date'},inplace=True)

In [None]:
#Merging both stock price and tweet sentiment Dataframe on Date using Inner join of pandas
mg=aapl_df.merge(df,on='Date',how='inner')

In [None]:
#Dropping any null values in the new merged dataset and checking the shape (size) of the dataframe
mg.dropna(inplace=True)
mg.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,sentiment
0,2016-01-04,25.6525,26.342501,25.5,26.3375,24.251436,270597600,-0.5962
1,2016-01-05,26.4375,26.4625,25.602501,25.6775,23.643705,223164000,-0.2824
2,2016-01-06,25.139999,25.592501,24.967501,25.174999,23.181009,273829600,0.7612
3,2016-01-07,24.67,25.032499,24.1075,24.112499,22.202667,324377600,0.9349
4,2016-01-08,24.637501,24.7775,24.190001,24.24,22.320068,283192000,0.7236


In [None]:
#Dropping columns that aren't required anymore
mg=mg.drop('Open',1)
mg=mg.drop('Low',1)
mg=mg.drop('Close',1)
mg=mg.drop('High',1)

In [None]:
#Creating a new column called polarity and storing Positive, Negative or Neutral based on the tweet sentiment Value
mg['polarity']=''
for i in range(0,len(mg['sentiment'])):
    if mg.sentiment.iloc[i]>0:
        mg.polarity.iloc[i]="Positive"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [None]:
for i in range(0,len(mg['sentiment'])):
    if mg.sentiment.iloc[i]<0:
        mg.polarity.iloc[i]="Negative"
    elif mg.sentiment.iloc[i]==0:
        mg.polarity.iloc[i]="Neutral"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [None]:
#dropping any null values
mg.dropna(inplace=True)

In [None]:
mg.head()

Unnamed: 0,Date,Adj Close,Volume,sentiment,polarity
0,2016-01-04,24.251436,270597600,-0.5962,Negative
1,2016-01-05,23.643705,223164000,-0.2824,Negative
2,2016-01-06,23.181009,273829600,0.7612,Positive
3,2016-01-07,22.202667,324377600,0.9349,Positive
4,2016-01-08,22.320068,283192000,0.7236,Positive


In [None]:
#renaming the Adj Close column to Close
mg.rename(columns = {'Adj Close':'close'}, inplace = True)
#Creating a new column for storing the difference in the close values
#Get difference in close values by iterating over all the values 
mg['Stock_Change_Accounted']=''
for i in range(1,len(mg['close'])):
    mg.Stock_Change_Accounted.iloc[i]=mg.close.iloc[i]-mg.close.iloc[i-1]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [None]:
#dropping any null values
mg.dropna(inplace=True)

In [None]:
#Dropping the first row to make sure the Stock_Change_Accounted is not null for any row
mg.drop(mg.index[0],inplace=True)

In [None]:
mg.head()

Unnamed: 0,Date,close,Volume,sentiment,polarity,Stock_Change_Accounted
1,2016-01-05,23.643705,223164000,-0.2824,Negative,-0.607731
2,2016-01-06,23.181009,273829600,0.7612,Positive,-0.462696
3,2016-01-07,22.202667,324377600,0.9349,Positive,-0.978342
4,2016-01-08,22.320068,283192000,0.7236,Positive,0.117401
5,2016-01-11,22.681482,198957600,0.1531,Positive,0.361414


In [None]:
#Creating the label using numpy, if Stock_Change_Accounted is positive then label 1
#Else Label 0
mg['Label']=''
for i in range(0,len(mg['Stock_Change_Accounted'])):
    mg.Label.iloc[i]=np.where(mg.Stock_Change_Accounted.iloc[i]>0,1,0)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [None]:
mg.head()

Unnamed: 0,Date,close,Volume,sentiment,polarity,Stock_Change_Accounted,Label
1,2016-01-05,23.643705,223164000,-0.2824,Negative,-0.607731,0
2,2016-01-06,23.181009,273829600,0.7612,Positive,-0.462696,0
3,2016-01-07,22.202667,324377600,0.9349,Positive,-0.978342,0
4,2016-01-08,22.320068,283192000,0.7236,Positive,0.117401,1
5,2016-01-11,22.681482,198957600,0.1531,Positive,0.361414,1


In [None]:
#Since we won't be needing Stock_Change_Accounted anymore dorp the column
mg=mg.drop('Stock_Change_Accounted',1)

In [None]:
#creating a new dataframe by copying the above dataframe
dlf1=mg.copy()

In [None]:
#As we have a categorical variable polarity, we convert it into a vector using pandad get_dummies

In [None]:
dlf1=pd.get_dummies(dlf1,columns=['polarity'])

In [None]:
dlf1.head()

Unnamed: 0,Date,close,Volume,sentiment,Label,polarity_Negative,polarity_Neutral,polarity_Positive
1,2016-01-05,23.643705,223164000,-0.2824,0,1,0,0
2,2016-01-06,23.181009,273829600,0.7612,0,0,0,1
3,2016-01-07,22.202667,324377600,0.9349,0,0,0,1
4,2016-01-08,22.320068,283192000,0.7236,1,0,0,1
5,2016-01-11,22.681482,198957600,0.1531,1,0,0,1


In [None]:
#setting the date as index in the dataframe
dlf1.set_index('Date',inplace=True)

In [None]:
dlf1.head()

Unnamed: 0_level_0,close,Volume,sentiment,Label,polarity_Negative,polarity_Neutral,polarity_Positive
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-05,23.643705,223164000,-0.2824,0,1,0,0
2016-01-06,23.181009,273829600,0.7612,0,0,0,1
2016-01-07,22.202667,324377600,0.9349,0,0,0,1
2016-01-08,22.320068,283192000,0.7236,1,0,0,1
2016-01-11,22.681482,198957600,0.1531,1,0,0,1


In [None]:
dlf1["Label"]=dlf1["Label"].astype(int)

In [None]:
#Creating X features and y target from the Dataframe
#LSTM uses a window of previous values to prevent the current value, so we store a window of values in all the features and target
feature1=0
feature2=2
feature3=4
feature4=5
feature5=6
feature6=3
target1=0

In [None]:
#we define window size that we use
wind=3

In [None]:
#we create new lists for all the features and target to store all the values using the window
Xclose=[]
XSentiment=[]
XLabel=[]
XPolarityNeg=[]
XPolarityNeu=[]
XPolarityPos=[]
y1=[]

In [None]:
#Now we iterate through the dataframe and extract values from it
for i in range(len(dlf1)-wind):
    adjcl=dlf1.iloc[i:(i+wind),0]
    tweet_XSentiment=dlf1.iloc[i:(i+wind),2]
    tweet_XPolarityNeg=dlf1.iloc[i:(i+wind),4]
    tweet_XPolarityPos=dlf1.iloc[i:(i+wind),5]
    tweet_XPolarityNeu=dlf1.iloc[i:(i+wind),6]
    tweet_label=dlf1.iloc[i:(i+wind),3]
    target=dlf1.iloc[(i+wind),3]
    #Adding values into the list
    Xclose.append(adjcl)
    XSentiment.append(tweet_XSentiment)
    XPolarityNeg.append(tweet_XPolarityNeg)
    XPolarityNeu.append(tweet_XPolarityNeu)
    XPolarityPos.append(tweet_XPolarityPos)
    XLabel.append(tweet_label)
    y1.append(target)

In [None]:
#Now, we get the Feature and target vectors
X1=np.hstack((Xclose,XSentiment,XLabel,XPolarityNeg,XPolarityNeu,XPolarityPos))
y=np.array(y1).reshape(-1,1)

In [None]:
X1

array([[23.643704999999997, 23.181009, 22.202667, ..., 0, 0, 0],
       [23.181009, 22.202667, 22.320068, ..., 0, 0, 0],
       [22.202667, 22.320068, 22.681482, ..., 0, 0, 0],
       ...,
       [161.020004, 161.41000400000001, 161.940002, ..., 0, 0, 0],
       [161.41000400000001, 161.940002, 156.809998, ..., 0, 0, 0],
       [161.940002, 156.809998, 160.240005, ..., 0, 0, 0]], dtype=object)

In [None]:
#Now we define split factor to divide the training and testing data
#Here, we are taking 80% of the data as training
Xsplitfact=int(len(X1)*0.8)

In [None]:
#we define the Xtrainig 
Xtraining=X1[:Xsplitfact]

In [None]:
#we define the Xtesting 
Xtesting=X1[Xsplitfact:]

In [None]:
#we define the Ytrainig 
Ytraining=y[:Xsplitfact]

In [None]:
#we define the Ytesting 
Ytesting=y[Xsplitfact:]

In [None]:
#Now we fit the Xtrainig in the min max scaler
#we scale the data as we have a lot values with varying distributions
X_train_Scaler=MinMaxScaler().fit(Xtraining)

In [None]:
#Now we transform the Fitted data
X_train_scaleddata=X_train_Scaler.transform(Xtraining)

In [None]:
#Now we fit the Xtesting in the min max scaler
#we scale the data as we have a lot values with varying distributions
X_test_Scaler=MinMaxScaler().fit(Xtesting)

In [None]:
#Now we transform the Fitted data
X_test_scaleddata=X_test_Scaler.transform(Xtesting)

In [None]:
#Now we again reshape the scaled data for X features
X_test_scaleddata.reshape(X_test_scaleddata.shape[0],X_test_scaleddata.shape[1],1)

array([[[0.08360169],
        [0.        ],
        [0.        ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.        ],
        [0.01960816],
        [0.07381882],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.01960816],
        [0.09197953],
        [0.12254654],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       ...,

       [[0.98347102],
        [0.99047788],
        [1.        ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.99047788],
        [1.        ],
        [0.90598931],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[1.        ],
        [0.90783269],
        [0.96884644],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]]])

In [None]:
#Now we again reshape the scaled data for X features
X_train_scaleddata.reshape(X_train_scaleddata.shape[0],X_train_scaleddata.shape[1],1)

array([[[0.02326131],
        [0.01913348],
        [0.01040545],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.01913348],
        [0.01040545],
        [0.01145281],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.01040545],
        [0.01145281],
        [0.01467707],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       ...,

       [[0.78898453],
        [0.75800498],
        [0.7866833 ],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.75800498],
        [0.7866833 ],
        [0.80199608],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.7866833 ],
        [0.80199608],
        [0.76048332],
        ...,
        [0.        ],
        [0.        ],
        [0.        ]]])

In [None]:
#Now we define a sequential LSTM model
LSTMmodel=Sequential()
#we are going to use a dropout fraction of 0.2, i.e. 20 units are dropped randomly in every epoch
#we define the dimensionality of output space to 9

#Now, we add in the first layer into the LSTM Network
LSTMmodel.add(LSTM(units=9,return_sequences=True,input_shape=(X_train_scaleddata.shape[1],1)))
LSTMmodel.add(Dropout(0.2))

#Now we add the 2nd layer 
LSTMmodel.add(LSTM(units=9,return_sequences=True))
LSTMmodel.add(Dropout(0.2))

#Now we define the 3rd layer
LSTMmodel.add(LSTM(units=9,return_sequences=True))
LSTMmodel.add(Dropout(0.2))

#Now we define the final layer
LSTMmodel.add(LSTM(units=9))
LSTMmodel.add(Dropout(0.2))

#Now we add the ouput layer
LSTMmodel.add(Dense(1))

In [None]:
#we compile the LSTM model using adam optimizer and calculate loss using mean sqaure error
LSTMmodel.compile("adam","mse")

In [None]:
#We can get a summary of the LSTM model defined
LSTMmodel.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 18, 9)             396       
                                                                 
 dropout_4 (Dropout)         (None, 18, 9)             0         
                                                                 
 lstm_5 (LSTM)               (None, 18, 9)             684       
                                                                 
 dropout_5 (Dropout)         (None, 18, 9)             0         
                                                                 
 lstm_6 (LSTM)               (None, 18, 9)             684       
                                                                 
 dropout_6 (Dropout)         (None, 18, 9)             0         
                                                                 
 lstm_7 (LSTM)               (None, 9)                

In [None]:
#Now we fit our training data into the model where we define the number of epochs to 20 
LSTMmodel.fit(X_train_scaleddata,Ytraining,epochs=80,verbose=1)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


<keras.callbacks.History at 0x7f5040491bd0>

In [None]:
#Now get the real stock prices
Real_prices=Ytesting

In [None]:
Real_prices

array([[1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
    

In [None]:
#Next we evaluate our model performance on test data
LSTMmodel.evaluate(X_test_scaleddata,Ytesting)



0.24863119423389435

In [None]:
#Now we can make the predictions on the fitted model using testing data to get the predicted label
Predicted_stock_values_model=LSTMmodel.predict(X_test_scaleddata)

In [None]:
Predicted_stock_values_model

array([[0.5351484 ],
       [0.5342723 ],
       [0.53485566],
       [0.53564674],
       [0.53607297],
       [0.53596044],
       [0.53578365],
       [0.5357811 ],
       [0.53537095],
       [0.5356408 ],
       [0.5353608 ],
       [0.53571093],
       [0.5361167 ],
       [0.53640383],
       [0.5360176 ],
       [0.53572387],
       [0.5358265 ],
       [0.53561383],
       [0.5357454 ],
       [0.53576076],
       [0.5361737 ],
       [0.535679  ],
       [0.5358496 ],
       [0.53597283],
       [0.53618526],
       [0.53569764],
       [0.5348471 ],
       [0.53490925],
       [0.5346292 ],
       [0.5353338 ],
       [0.5355345 ],
       [0.53628385],
       [0.5362033 ],
       [0.5356861 ],
       [0.5358891 ],
       [0.5360167 ],
       [0.5363429 ],
       [0.5361149 ],
       [0.53626114],
       [0.53604007],
       [0.5360905 ],
       [0.5361486 ],
       [0.53610486],
       [0.5358238 ],
       [0.5359092 ],
       [0.5361239 ],
       [0.536171  ],
       [0.536

In [None]:
#Now creating a dataframe with real vs predicted stock prices and storing it in a dataframe
stock_dataframe=pd.DataFrame({'Real':Real_prices.ravel(),'Predicted':Predicted_stock_values_model.ravel()})

In [None]:
stock_dataframe.head()

Unnamed: 0,Real,Predicted
0,1,0.535148
1,1,0.534272
2,0,0.534856
3,1,0.535647
4,1,0.536073


In [None]:
#Because LSTM only outputs a continuous range and not a binary output
#We have to round of the predicted values to either 0 or 1
for i in range(0,len(stock_dataframe['Predicted'])):
    stock_dataframe.Predicted.iloc[i]=np.where(stock_dataframe.Predicted.iloc[i]>0.536,1,0)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [None]:
stock_dataframe['Predicted']=stock_dataframe['Predicted'].astype(int)

In [None]:
stock_dataframe

Unnamed: 0,Real,Predicted
0,1,0
1,1,0
2,0,0
3,1,0
4,1,1
...,...,...
291,1,1
292,1,1
293,0,1
294,1,1


In [None]:
#Now we get the total number of True Positives and True Negatives in the model 
c=0
for i in range(len(stock_dataframe)):
    if stock_dataframe.Real.iloc[i]==stock_dataframe.Predicted.iloc[i]:
        c+=1

In [None]:
print(c)

157


In [None]:
print("Accuracy:",c/len(stock_dataframe))

Accuracy: 0.5304054054054054


In [None]:
#Now we get number of true positives only i.e. only classes with label 1
d=0
for i in range(len(stock_dataframe)):
    if stock_dataframe.Real.iloc[i]==stock_dataframe.Predicted.iloc[i]==1:
        d+=1

In [None]:
#Now we get number of False positives only i.e. label 0 predicted as 1
e=0
for i in range(len(stock_dataframe)):
    if stock_dataframe.Real.iloc[i]==0 and stock_dataframe.Predicted.iloc[i]==1:
        e+=1

In [None]:
#Now we get number of True Negatives only i.e. only classes with label 0
f=0
for i in range(len(stock_dataframe)):
    if stock_dataframe.Real.iloc[i]==stock_dataframe.Predicted.iloc[i]==0:
        f+=1

In [None]:
#Now we get number of False Negatives only i.e. label 1 predicted as 0
g=0
for i in range(len(stock_dataframe)):
    if stock_dataframe.Real.iloc[i]==1 and stock_dataframe.Predicted.iloc[i]==0:
        g+=1

In [None]:
#Number of True Positives
print(d)

142


In [None]:
#Number of True Neagtives
print(f)

15


In [None]:
#Number of False Positives
print(e)

122


In [None]:
#Number of False Negatives
print(g)

17


In [None]:
#Calculating Precision for the model
print("Precision: ",d/(d+e))

Precision:  0.5378787878787878


In [None]:
#Calculating Recall for the model
print("Recall: ",d/(d+g))

Recall:  0.8930817610062893
