# Importing libraries

In [1]:
import yfinance as yf
import pandas as pd
import pandas_ta as ta
import seaborn as sns 
import matplotlib.pyplot as plt
from datetime import datetime, timedelta


# Input phase 

### (User inputs stock ticker symbol )

In [2]:
ticker = "AAPL"

# Data selection (Extracting data from Yahoo finance Api)

In [3]:
start_date = datetime.today() - timedelta(days=10*365)
end_date = datetime.today()

# Use yfinance library to extract historical data
data = yf.download(ticker, start=start_date, end=end_date)

# Convert the data to a Pandas dataframe
df = pd.DataFrame(data)

# Print the first 5 rows of the dataframe
df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-05-16,15.115714,15.6375,14.960714,15.520714,13.475037,603204000
2013-05-17,15.680357,15.7175,15.393214,15.473571,13.434109,427904400
2013-05-20,15.425357,15.921429,15.360714,15.818929,13.73395,451578400
2013-05-21,15.648214,15.91,15.507143,15.702143,13.632555,456022000
2013-05-22,15.858929,16.012501,15.650714,15.7625,13.684956,443038400


# Data Pre-processing (Dealing with missing/null values)

In [4]:
#Checking any null/missing values
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2516 entries, 2013-05-16 to 2023-05-12
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       2516 non-null   float64
 1   High       2516 non-null   float64
 2   Low        2516 non-null   float64
 3   Close      2516 non-null   float64
 4   Adj Close  2516 non-null   float64
 5   Volume     2516 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 137.6 KB


In [5]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,2516.0,2516.0,2516.0,2516.0,2516.0,2516.0
mean,67.375261,68.140154,66.652341,67.429807,65.545725,154901000.0
std,50.718077,51.379086,50.099058,50.772477,51.246215,100508700.0
min,13.977143,14.295357,13.888214,14.063571,12.20995,35195900.0
25%,27.936875,28.16125,27.576875,27.9475,25.557104,87910420.0
50%,43.43375,43.7575,43.140001,43.549999,41.424177,123760600.0
75%,119.984999,121.257498,118.434999,119.982502,118.39167,189920600.0
max,182.630005,182.940002,179.119995,182.009995,180.434296,1065523000.0


# Data transformation
##  (Transforming raw daily historical into technical indicators using pandas_ta library )

In [9]:
# Since package pandas_ta doesnt have a function to calculate LWR indicator so we created a function to calculate it
 
def calculate_lwr(data, lookback):
    high = data['High'].rolling(window=lookback).max()
    low = data['Low'].rolling(window=lookback).min()
    close = data['Close']
    lwr = ((high - close) / (high - low)) * 100
    return lwr


In [8]:
# Calculate Technical indicators
#Calculating simple and weighted moving average (Sma and Wma)
df["SMA"] = ta.sma(df["Close"], length=10)
df["WMA"] = ta.wma(df["Close"], length=7)

#Calculating Relative strength index (RSI)
df["RSI"] = ta.rsi(df["Close"], length=14)
df["MOM"] = ta.mom(df['Close'],length=14)

#Calculating Stochastic %K (STCK) and Stochastic %D (STCD)
df.ta.stoch(high=df["High"], low=df["Low"], close=df["Close"], k=14, d=3, fillna=True, append=True)

# Calculating Williams %R
df['lwr'] = calculate_lwr(df,14)

#Calculating the accumlation/distribution indicator (ADO)
df['ADO'] = ta.adosc(high=df['High'], low=df['Low'], close=df['Close'], volume=df['Volume'], fast=3, slow=10)

#Calculating Commodity Channel Inde (CCI)
df['CCI'] = ta.cci(high = df['High'] , low=df['Low'], close=df['Close'], length=20)

# Print the last 20 rows of the transformed data
df.tail(20)


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,SMA,WMA,RSI,MOM,STOCHk_14_3_3,STOCHd_14_3_3,lwr,ADO,CCI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2023-04-17,165.089996,165.389999,164.029999,165.229996,165.00177,41516200,163.915001,163.828215,62.420474,6.949997,86.126492,71.117303,14.825051,56037270.0,88.772481
2023-04-18,166.100006,167.410004,165.649994,166.470001,166.240067,49923000,163.945001,164.603215,64.450551,8.820007,86.167758,80.89164,11.662565,51997160.0,119.408815
2023-04-19,165.800003,168.160004,165.539993,167.630005,167.398468,47720200,164.145001,165.603573,66.285397,6.860001,89.062607,87.118952,6.324564,54658090.0,120.668862
2023-04-20,166.089996,167.869995,165.559998,166.649994,166.4198,52456400,164.434001,166.158928,63.312286,4.289993,87.997891,87.742752,18.019199,49838390.0,99.596961
2023-04-21,165.050003,166.449997,164.490005,165.020004,164.792068,58337300,164.470001,166.097858,58.604038,0.12001,79.395366,85.485288,37.470138,34812650.0,58.268552
2023-04-24,165.0,165.600006,163.889999,165.330002,165.101639,41949600,164.800002,165.938572,59.225082,-0.839996,70.246593,79.213283,33.770885,34633490.0,43.525662
2023-04-25,165.190002,166.309998,163.729996,163.770004,163.543793,48714100,165.097002,165.397502,54.771879,-1.860001,58.790793,69.477584,52.386597,16392470.0,28.376531
2023-04-26,163.059998,165.279999,162.800003,163.759995,163.533798,45498800,165.463,164.905357,54.743435,0.0,53.778824,60.938737,52.506045,4170961.0,-6.076418
2023-04-27,165.190002,168.559998,165.190002,168.410004,168.177383,64902300,165.748001,165.628215,64.076413,3.75,64.466333,59.011983,1.708359,17604530.0,129.773664
2023-04-28,168.490005,169.850006,167.880005,169.679993,169.445618,55209200,166.195,166.599285,66.130765,7.649994,81.36576,66.536972,1.688315,36034210.0,183.621892


### Creating target column


In [10]:
def create_target_column(data):
    # Shift the closing column by one day to get tomorrow's closing price
    data['tomorrow_closing'] = data['Close'].shift(-1)

    # Compare today's closing price with tomorrow's closing price
    data['target'] = (data['tomorrow_closing'] > data['Close']).astype(int)

    return data


In [11]:
df = create_target_column(df)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,SMA,WMA,RSI,MOM,STOCHk_14_3_3,STOCHd_14_3_3,lwr,ADO,CCI,tomorrow_closing,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2013-05-16,15.115714,15.637500,14.960714,15.520714,13.475037,603204000,,,,,,,,,,15.473571,0
2013-05-17,15.680357,15.717500,15.393214,15.473571,13.434109,427904400,,,,,,,,,,15.818929,1
2013-05-20,15.425357,15.921429,15.360714,15.818929,13.733950,451578400,,,,,,,,,,15.702143,0
2013-05-21,15.648214,15.910000,15.507143,15.702143,13.632555,456022000,,,,,,,,,,15.762500,1
2013-05-22,15.858929,16.012501,15.650714,15.762500,13.684956,443038400,,,,,,,,,,15.790714,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-08,172.479996,173.850006,172.110001,173.500000,173.260345,55962800,168.405998,170.326784,67.425498,7.029999,74.506075,64.910638,6.956548,3.675335e+07,180.082429,171.770004,0
2023-05-09,173.050003,173.539993,171.600006,171.770004,171.532745,45326900,169.205998,170.836429,62.692404,4.139999,88.231891,75.106071,21.999989,2.801383e+07,138.448386,173.559998,1
2023-05-10,173.020004,174.029999,171.899994,173.559998,173.320267,53724500,170.185999,171.718929,65.398868,6.910004,88.202877,83.646948,6.434830,3.144211e+07,140.550965,173.750000,1
2023-05-11,173.850006,174.589996,172.169998,173.750000,173.510010,49514700,170.719998,172.507143,65.683446,8.729996,88.146842,88.19387,7.124655,3.480381e+07,129.211189,172.570007,0


In [12]:
#storing today's data for the final prediction
last_day = df.tail(1)
last_day

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,SMA,WMA,RSI,MOM,STOCHk_14_3_3,STOCHd_14_3_3,lwr,ADO,CCI,tomorrow_closing,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-05-12,173.619995,174.059998,171.0,172.570007,172.570007,45497800,171.009,172.814288,62.258794,7.240005,89.769145,88.706288,17.13308,33393610.0,94.63131,,0


# Modeling (Data mining phase)

In [13]:
#Importing libraries related to data preparation and the modeling phase
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam


In [14]:
#removing data with null values the first 20 days since some techincal indicators are calculated for the previous 20 days 
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2496 entries, 2013-06-13 to 2023-05-11
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Open              2496 non-null   float64
 1   High              2496 non-null   float64
 2   Low               2496 non-null   float64
 3   Close             2496 non-null   float64
 4   Adj Close         2496 non-null   float64
 5   Volume            2496 non-null   int64  
 6   SMA               2496 non-null   float64
 7   WMA               2496 non-null   float64
 8   RSI               2496 non-null   float64
 9   MOM               2496 non-null   float64
 10  STOCHk_14_3_3     2496 non-null   object 
 11  STOCHd_14_3_3     2496 non-null   object 
 12  lwr               2496 non-null   float64
 13  ADO               2496 non-null   float64
 14  CCI               2496 non-null   float64
 15  tomorrow_closing  2496 non-null   float64
 16  target            2496 n

In [15]:
df['STOCHk_14_3_3'] = df['STOCHk_14_3_3'].astype(float)
df['STOCHd_14_3_3'] = df['STOCHd_14_3_3'].astype(float)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2496 entries, 2013-06-13 to 2023-05-11
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Open              2496 non-null   float64
 1   High              2496 non-null   float64
 2   Low               2496 non-null   float64
 3   Close             2496 non-null   float64
 4   Adj Close         2496 non-null   float64
 5   Volume            2496 non-null   int64  
 6   SMA               2496 non-null   float64
 7   WMA               2496 non-null   float64
 8   RSI               2496 non-null   float64
 9   MOM               2496 non-null   float64
 10  STOCHk_14_3_3     2496 non-null   float64
 11  STOCHd_14_3_3     2496 non-null   float64
 12  lwr               2496 non-null   float64
 13  ADO               2496 non-null   float64
 14  CCI               2496 non-null   float64
 15  tomorrow_closing  2496 non-null   float64
 16  target            2496 n

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['STOCHk_14_3_3'] = df['STOCHk_14_3_3'].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['STOCHd_14_3_3'] = df['STOCHd_14_3_3'].astype(float)


In [35]:
scaler = MinMaxScaler()
scaled_df = pd.DataFrame(scaler.fit_transform(df),columns=df.columns)
scaled_df

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,SMA,WMA,RSI,MOM,STOCHk_14_3_3,STOCHd_14_3_3,lwr,ADO,CCI,tomorrow_closing,target
0,0.008712,0.007808,0.008620,0.008970,0.007775,0.243259,0.007575,0.008066,0.369606,0.515487,0.155372,0.178865,0.745680,0.303545,0.365993,0.007713,0.0
1,0.009326,0.007628,0.008566,0.007713,0.006685,0.229704,0.007146,0.007605,0.279948,0.509032,0.106856,0.114987,0.945803,0.288922,0.316905,0.008128,1.0
2,0.008487,0.007503,0.008968,0.008128,0.007045,0.217619,0.006737,0.007366,0.322456,0.513128,0.138934,0.109613,0.877623,0.266646,0.343811,0.008079,0.0
3,0.008513,0.007334,0.008936,0.008079,0.007002,0.155125,0.006355,0.007165,0.318933,0.510421,0.091877,0.087608,0.885664,0.254166,0.355533,0.006214,0.0
4,0.008479,0.006648,0.007377,0.006214,0.005386,0.267628,0.005873,0.006567,0.204725,0.499276,0.073658,0.076102,1.000000,0.169845,0.287781,0.004904,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2491,0.930923,0.948768,0.949404,0.949746,0.957770,0.075821,0.934409,0.937342,0.688336,0.688957,0.625194,0.650330,0.063478,0.475228,0.789923,0.949329,0.0
2492,0.939817,0.946100,0.957575,0.949329,0.957355,0.020156,0.939397,0.944142,0.685712,0.662411,0.745464,0.645045,0.069565,0.490868,0.763609,0.939028,0.0
2493,0.943197,0.944261,0.954488,0.939028,0.947085,0.009833,0.944282,0.947225,0.620970,0.603850,0.883886,0.751948,0.220000,0.482260,0.707134,0.949686,1.0
2494,0.943019,0.947167,0.956304,0.949686,0.957711,0.017983,0.950265,0.952565,0.657991,0.659980,0.883593,0.841503,0.064348,0.485637,0.709986,0.950818,1.0


In [36]:
# Split the data into input features (X) and target variable (y)
X = scaled_df[['WMA','SMA','RSI','MOM','STOCHk_14_3_3','STOCHd_14_3_3','lwr','ADO','CCI']].values
y = scaled_df['target'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=233)

# Reshape the input data for LSTM (assuming a time step of 1)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])





In [18]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [24]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(9, input_shape=(1, 9)))
model.add(Dense(200, activation='relu'))
model.add(Dropout(0.1))  # Add dropout layer with a dropout rate of 0.1
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.1))  # Add dropout layer with a dropout rate of 0.1
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

# Set up early stopping
early_stopping = EarlyStopping(patience=500, restore_best_weights=True)

# Train the model with early stopping
model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test))

# Evaluate the model on the test data
loss, accuracy = model.evaluate(X_test, y_test)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100


Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Test Loss: 0.7038074731826782
Test Accuracy: 0.5600000023841858


# Evaluation

In [25]:
predictions = model.predict(X_test)
predictions



array([[0.75819755],
       [0.46888137],
       [0.45336834],
       [0.49829516],
       [0.4973843 ],
       [0.5997765 ],
       [0.71590877],
       [0.8021576 ],
       [0.53762144],
       [0.4516691 ],
       [0.5943385 ],
       [0.43481812],
       [0.6297167 ],
       [0.4371155 ],
       [0.7512604 ],
       [0.74789464],
       [0.6587478 ],
       [0.46472445],
       [0.5019707 ],
       [0.47576156],
       [0.5031075 ],
       [0.48293036],
       [0.5530876 ],
       [0.4557897 ],
       [0.56284016],
       [0.49207452],
       [0.7440577 ],
       [0.5624216 ],
       [0.52223027],
       [0.53172576],
       [0.57623696],
       [0.4575116 ],
       [0.4892899 ],
       [0.47712687],
       [0.41029403],
       [0.5962682 ],
       [0.6123559 ],
       [0.4792562 ],
       [0.45797333],
       [0.46349034],
       [0.5890817 ],
       [0.566085  ],
       [0.48984915],
       [0.6967471 ],
       [0.4787046 ],
       [0.5964419 ],
       [0.44412678],
       [0.507

In [26]:
from sklearn.metrics import classification_report

new_predictions =[]
for instance in predictions:
    if instance>0.5:
        new_predictions.append(1)
    else:
        new_predictions.append(0) 
        
print(classification_report(y_test,new_predictions))  

              precision    recall  f1-score   support

         0.0       0.57      0.50      0.53       249
         1.0       0.56      0.62      0.59       251

    accuracy                           0.56       500
   macro avg       0.56      0.56      0.56       500
weighted avg       0.56      0.56      0.56       500



In [39]:
scaled_last_day = pd.DataFrame(data=scaler.transform(last_day.values),columns=last_day.columns)
scaled_last_day



Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,SMA,WMA,RSI,MOM,STOCHk_14_3_3,STOCHd_14_3_3,lwr,ADO,CCI,tomorrow_closing,target
0,0.946577,0.947345,0.950857,0.943792,0.953251,0.009999,0.95529,0.959192,0.615039,0.666667,0.899389,0.894552,0.171331,0.487559,0.647698,,0.0


In [51]:
input_for_model= scaled_last_day[['WMA','SMA','RSI','MOM','STOCHk_14_3_3','STOCHd_14_3_3','lwr','ADO','CCI']].values
input_for_model = input_for_model.reshape(input_for_model.shape[0], 1, input_for_model.shape[1])

In [53]:
prediction_for_tommorow = model.predict(input_for_model)
prediction_for_tommorow 



array([[0.4536962]], dtype=float32)

In [57]:
if (prediction_for_tommorow > 0.5):
    print(f'StockScout thinks {ticker} is expected to rise tomorrow, StockScout recommends you should buy/keep this stock')
else:
    print(f'StockScout thinks {ticker} is expected to fall tomorrow, StockScout recommends you should sell/avoid this stock')

StockScout thinks AAPL is expected to fall tomorrow, StockScout recommends you should sell/avoid this stock
