# Predict stock price using machine learning and technical indicator


In [40]:
# in this project we predict stock price base on price and (RSI or EMA)

In [6]:
#import libs
import yfinance as yf
import pandas_ta as ta
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import mean_absolute_error,mean_squared_error
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [12]:
#load in stock ticker price with yfinance library
df=yf.download('AAPL',start="2020-01-01",end="2024-12-12")
df=df[['Open','High','Low','Close','Volume']]

# deleting multi index in columns
df.columns = df.columns.droplevel(1)

df

  df=yf.download('AAPL',start="2020-01-01",end="2024-12-12")
[*********************100%***********************]  1 of 1 completed


Price,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-02,71.627107,72.681304,71.373233,72.620857,135480400
2020-01-03,71.847087,72.676416,71.689927,71.914787,146322800
2020-01-06,71.034717,72.526541,70.783256,72.487854,118387200
2020-01-07,72.497522,72.753816,71.926907,72.146935,108872000
2020-01-08,71.849525,73.609737,71.849525,73.307503,132079200
...,...,...,...,...,...
2024-12-05,243.402850,243.951514,241.547325,242.455124,40033900
2024-12-06,242.325439,244.041301,241.497434,242.255600,36870600
2024-12-09,241.248046,246.645031,241.168237,246.156204,44649200
2024-12-10,246.295865,247.612695,244.749592,247.173752,36914800


In [15]:
# 1-trading day leg structure
# shift data backward by one day to ensure no data leakage

df['Close_Shifted']=df['Close'].shift(1)
df['Previous_Close']=df['Close'].shift(1)


In [16]:
#calculate technical indicators based on shifted data

#ema with period = 50
df['EMA_50']=ta.ema(df['Close_Shifted'],legnth=50)

#rsi with period = 14
df['RSI_14']=ta.rsi(df['Close_Shifted'],legnth=50)



Price,Open,High,Low,Close,Volume,Close_Shifted,Previous_Close,EMA_50,RSI_14
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-02,71.627107,72.681304,71.373233,72.620857,135480400,,,,
2020-01-03,71.847087,72.676416,71.689927,71.914787,146322800,72.620857,72.620857,,
2020-01-06,71.034717,72.526541,70.783256,72.487854,118387200,71.914787,71.914787,,
2020-01-07,72.497522,72.753816,71.926907,72.146935,108872000,72.487854,72.487854,,
2020-01-08,71.849525,73.609737,71.849525,73.307503,132079200,72.146935,72.146935,,
...,...,...,...,...,...,...,...,...,...
2024-12-05,243.402850,243.951514,241.547325,242.455124,40033900,242.425201,242.425201,235.930038,73.407753
2024-12-06,242.325439,244.041301,241.497434,242.255600,36870600,242.455124,242.455124,237.116417,73.441428
2024-12-09,241.248046,246.645031,241.168237,246.156204,44649200,242.255600,242.255600,238.050814,72.779605
2024-12-10,246.295865,247.612695,244.749592,247.173752,36914800,246.156204,246.156204,239.524521,77.120410


In [18]:
# drop rows with missing data 

df.dropna(inplace=True)


# 2.Modeling : backtest

In [22]:
# choose # of days for rolling taining data and choose technical indicators

window_size=20 # 4weeks

#list of indicators to test
indicators=['RSI_14','EMA_50']

# initiate a dict to store predictions, actuals, daily MAE for each indicator
results={indicator:{'predictions':[],'actual':[],'daily_mae':[]}for indicator in indicators}

In [38]:
#loop over multiple 20-day train datasets for model building and next day test datasets for model Evaluation

for i in range(window_size,len(df)-1):
    train_df=df.iloc[i-window_size:i] #trining window
    test_index= i+1 #index of next day's prediction
    actual_close_price= df['Close'].iloc[test_index] #next day's actual closing price
    
    #individual indicators as predictors (plus Previous_Close)
    for indicator in indicators: 
        x_train=train_df[[indicator,'Previous_Close']]
        y_train=train_df['Close']
        x_train=sm.add_constant(x_train) # add constant for intercept
        
        model=sm.OLS(y_train,x_train).fit()
        
        x_test=pd.DataFrame({indicator: [df[indicator].iloc[test_index]],'Previous_close':[df['Previous_Close'].iloc[test_index]]})
        x_test=sm.add_constant(x_test,has_constant='add')
        
        prediction=model.predict(x_test)[0]
        results[indicator]['predictions'].append(prediction)
        results[indicator]['actual'].append(actual_close_price)
        
        daily_mae=mean_absolute_error([actual_close_price],[prediction])
        results[indicator]['daily_mae'].append(daily_mae)
        

# 3. Prediction Evaluation

In [39]:
# calculate accuracy metrics (mae,mse) for each individual indicator and the combined model

accuracy_data={
    'Indicator':[],
    'MAE':[],
    'MSE':[]
}

for indicator in indicators:
    print(indicator)
    if results[indicator]['actual']:#check if there are results for this indicator
        mae=mean_absolute_error(results[indicator]['actual'],results[indicator]['predictions'])
        mse=mean_squared_error(results[indicator]['actual'],results[indicator]['predictions'])
        accuracy_data['Indicator'].append(indicator)
        accuracy_data['MAE'].append(mae)
        accuracy_data['MSE'].append(mse)
        
accuracy_df=pd.DataFrame(accuracy_data).sort_values(by='MAE').reset_index(drop=True)
accuracy_df

RSI_14
EMA_50


Unnamed: 0,Indicator,MAE,MSE
0,EMA_50,2.445565,10.5577
1,RSI_14,2.506056,10.966125
