In [1]:
import pandas as pd
import yfinance as yf
import numpy as np
from operator import *
import math
from sklearn.metrics import f1_score, roc_auc_score

In [2]:
company = "AEL"

In [3]:
df = yf.download(company, start="2021-01-01", end="2024-01-01")

[*********************100%%**********************]  1 of 1 completed


In [4]:
df.reset_index(inplace=True)

In [5]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-01-04,27.790001,28.0,26.610001,26.719999,26.049614,725700
1,2021-01-05,26.77,27.629999,26.700001,27.25,26.566319,649000
2,2021-01-06,27.84,29.99,27.82,29.299999,28.564884,1614400
3,2021-01-07,30.0,30.459999,29.559999,30.299999,29.539795,1323200
4,2021-01-08,30.440001,30.49,29.16,29.690001,28.945103,1208200


In [6]:
def SMA(data, period=30, column='Close'):
  return data[column].rolling(window=period).mean()


In [7]:
def EMA(data, period, column='Close'):

    ema_list = []
    for val in data[column]:
      if np.isnan(val):
        ema_list.append(np.nan)

    sma = sum(df[column][len(ema_list): len(ema_list)+period])/period
    mult = 2 / (1 + period)

    for j in range(period-1):
      ema_list.append(np.nan)

    ema_list.append(sma)

    for close in data[column][len(ema_list):]:
        ema = close*mult +(1-mult)*ema_list[-1]
        ema_list.append(ema)

    return ema_list

In [8]:
def MACD(data, period_long=26, period_short=12, column='Close'):
  shortEMA = EMA(data, period_short, column=column)
  longEMA = EMA(data, period_long, column=column)
  macd = [np.nan for i in range(period_long)]
  for i in range(period_long, len(data)):
    macd.append(shortEMA[i]-longEMA[i])
  data['12_Day_EMA'] = shortEMA
  data['26_Day_EMA'] = longEMA
  data['MACD'] = macd
  data['MACD_Signal'] = EMA(data, period=9, column='MACD')
  data['Histogram'] = data['MACD'] - data['MACD_Signal']

In [9]:
def RSI(data, period=14, column='Close'):
  diff = data[column].diff(1)
  diff = diff[1:]
  up = diff.copy()
  down = diff.copy()
  up[up<0] = 0
  down[down>0] = 0
  data['up'] = up
  data['down'] = abs(down)
  AVG_Gain = SMA(data, period, column='up')
  AVG_Loss = SMA(data, period, column='down')
  RS = AVG_Gain / AVG_Loss
  RSI = 100 - (100/(1+RS))
  data['RSI'] = RSI

In [10]:
def Wilders_Smooth_1(data, column, period=14):
  Smooth_14 = [np.nan for i in range(period)]
  sm_14_1 = sum(data[column][1:period+1])
  Smooth_14.append(sm_14_1)
  for i in data.index[period+1:]:
    sm_14 = Smooth_14[-1] - (Smooth_14[-1]/14) + data[column].iloc[i]
    Smooth_14.append(sm_14)
  return Smooth_14

In [11]:
def Wilders_Smooth_2(data, column, period=14):
  Smooth_14 = [np.nan for i in range(2*period)]
  sm_14_1 = np.mean(data[column][period:2*period])
  Smooth_14.append(sm_14_1)
  for i in data.index[2*period+1:]:
    sm_14 = ((Smooth_14[-1]*13) + data[column].iloc[i])/period
    Smooth_14.append(sm_14)
  return Smooth_14

In [12]:
def TR_14(data, period=14):
  TR = [np.nan,]
  for i in data.index[1:]:
    tr = max((data['High'].iloc[i]-data['Low'].iloc[i]), abs(data['High'].iloc[i]-data['Close'].iloc[i-1]), abs(data['Low'].iloc[i]-data['Close'].iloc[i-1]))
    TR.append(tr)
  data['TR'] = TR
  data['TR_14'] = Wilders_Smooth_1(data, column='TR')

In [13]:
def ADX(data, period=14):
  plus_DMs = [np.nan,]
  minus_DMs = [np.nan,]
  for i in data.index[1:]:
    move_up = data['High'].iloc[i] - data['High'].iloc[i-1]
    move_down = data['Low'].iloc[i-1] - data['Low'].iloc[i]
    if move_up > 0 and move_up > move_down:
      plus_DM = move_up
    else:
      plus_DM = 0

    if move_down > 0 and move_down > move_up:
      minus_DM = move_down
    else:
      minus_DM = 0

    plus_DMs.append(plus_DM)
    minus_DMs.append(minus_DM)
  data['+DM'] = plus_DMs
  data['-DM'] = minus_DMs
  data['Smooth(+DM)'] = Wilders_Smooth_1(data, column='+DM')
  data['Smooth(-DM)'] = Wilders_Smooth_1(data, column='-DM')
  TR_14(data)
  data['+DI'] = 100 * (data['Smooth(+DM)'] / data['TR_14'])
  data['-DI'] = 100 * (data['Smooth(-DM)'] / data['TR_14'])
  data['DX'] = 100 *( abs(data['+DI'] - data['-DI']) / (data['+DI'] + data['-DI']))
  data['ADX'] = Wilders_Smooth_2(data, column='DX')

In [14]:
def AD_Line(data):
  MFV_list = [np.nan,]
  A_D_Line = [np.nan,]
  for i in data.index[1:]:
    MFM = ((data['Close'].iloc[i-1] - data['Low'].iloc[i-1]) - (data['High'].iloc[i-1] - data['Close'].iloc[i-1])) / (data['High'].iloc[i-1] - data['Low'].iloc[i-1])
    MFV = MFM * data['Volume'].iloc[i-1]
    MFV_list.append(MFV)
  data['MFV'] = MFV_list
  for i in data.index[1:]:
    if i == 1:
      A_D_Line.append(data['MFV'].iloc[i])
    else:
      A_D_Line.append(data['MFV'].iloc[i] + A_D_Line[-1])
  data['A/D Line'] = A_D_Line

In [15]:
def BollingerBands(data, period=20, column='Close'):  # investopedia.
  df['Typical_Price'] = (df['High'] + df['Low'] + df['Close'])/3
  df["BB_mid"] = SMA(df, period=20)
  df["BB_+1_SD"] = df['BB_mid'] + df['Typical_Price'].rolling(window=period).std()  # Upper band
  df["BB_-1_SD"] = df['BB_mid'] - df['Typical_Price'].rolling(window=period).std()  # Lower band
  df["BB_+2_SD"] = df['BB_mid'] + 2 * df['Typical_Price'].rolling(window=period).std()  # Upper band
  df["BB_-2_SD"] = df['BB_mid'] - 2 * df['Typical_Price'].rolling(window=period).std()  # Lower band


In [16]:
def Stochastic_Osc(data, lookback=14, smooth=3, column='Close'):
  fast_k = [np.nan for i in range(lookback)]
  for i in data.index[lookback:]:
    low_14 = np.min(data[column][i-lookback: i])
    high_14 = np.max(data[column][i-lookback: i])
    rec_close = data[column].iloc[i-1]
    k = (rec_close - low_14)/(high_14 - low_14) * 100
    fast_k.append(k)
  data["%K_Fast"] = fast_k
  data["%D_Fast"] = SMA(data, period=3, column="%K_Fast")
  data["%K_Slow"] = data["%D_Fast"]
  data["%D_Slow"] = SMA(data, period=3, column="%D_Fast")


In [17]:
MACD(df)
RSI(df)
BollingerBands(df, period=20)
Stochastic_Osc(df)
ADX(df)
AD_Line(df)

In [18]:
df.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,12_Day_EMA,26_Day_EMA,MACD,...,Smooth(+DM),Smooth(-DM),TR,TR_14,+DI,-DI,DX,ADX,MFV,A/D Line
0,2021-01-04,27.790001,28.0,26.610001,26.719999,26.049614,725700,,,,...,,,,,,,,,,
1,2021-01-05,26.77,27.629999,26.700001,27.25,26.566319,649000,,,,...,,,0.929998,,,,,,-610842.311049,-610842.3
2,2021-01-06,27.84,29.99,27.82,29.299999,28.564884,1614400,,,,...,,,2.74,,,,,,118634.666224,-492207.6
3,2021-01-07,30.0,30.459999,29.559999,30.299999,29.539795,1323200,,,,...,,,1.16,,,,,,587730.117034,95522.47
4,2021-01-08,30.440001,30.49,29.16,29.690001,28.945103,1208200,,,,...,,,1.33,,,,,,852729.138154,948251.6
5,2021-01-11,29.120001,30.200001,29.1,29.879999,29.130333,1194500,,,,...,,,1.1,,,,,,-245272.381447,702979.2
6,2021-01-12,30.049999,30.4,29.84,30.030001,29.276571,926500,,,,...,,,0.559999,,,,,,499514.943204,1202494.0
7,2021-01-13,29.620001,30.01,29.290001,29.83,29.081589,762200,,,,...,,,0.74,,,,,,-297801.204696,904693.0
8,2021-01-14,30.040001,30.92,30.01,30.540001,29.773777,893000,,,,...,,,1.09,,,,,,381098.990429,1285792.0
9,2021-01-15,30.02,30.83,29.68,30.5,29.734777,725400,,,,...,,,1.15,,,,,,147199.324254,1432991.0


In [19]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       '12_Day_EMA', '26_Day_EMA', 'MACD', 'MACD_Signal', 'Histogram', 'up',
       'down', 'RSI', 'Typical_Price', 'BB_mid', 'BB_+1_SD', 'BB_-1_SD',
       'BB_+2_SD', 'BB_-2_SD', '%K_Fast', '%D_Fast', '%K_Slow', '%D_Slow',
       '+DM', '-DM', 'Smooth(+DM)', 'Smooth(-DM)', 'TR', 'TR_14', '+DI', '-DI',
       'DX', 'ADX', 'MFV', 'A/D Line'],
      dtype='object')

In [21]:
df_new = df.dropna()
# Dropping unnecessary columns
df_new.drop(columns=['Adj Close', 'up', 'down', 'MFV', '+DM', '-DM', 'Smooth(+DM)', 'Smooth(-DM)', 'TR', 'TR_14', '+DI', '-DI',
       'DX'], inplace=True)
df_new.reset_index(inplace=True, drop=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new.drop(columns=['Adj Close', 'up', 'down', 'MFV', '+DM', '-DM', 'Smooth(+DM)', 'Smooth(-DM)', 'TR', 'TR_14', '+DI', '-DI',


In [22]:
df_new.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,12_Day_EMA,26_Day_EMA,MACD,MACD_Signal,...,BB_+1_SD,BB_-1_SD,BB_+2_SD,BB_-2_SD,%K_Fast,%D_Fast,%K_Slow,%D_Slow,ADX,A/D Line
0,2021-02-23,28.98,29.4,28.01,28.040001,907900,29.240724,29.457416,-0.216692,0.050322,...,30.167436,29.018564,30.741873,28.444127,9.090909,3.030303,3.030303,17.895512,13.781975,-456799.1
1,2021-02-24,28.15,28.76,27.639999,27.950001,1325400,29.042151,29.345756,-0.303605,-0.020463,...,30.170271,28.836729,30.837042,28.169958,0.0,3.030303,3.030303,7.182064,14.733103,-1325508.0
2,2021-02-25,28.200001,28.379999,26.91,27.4,798800,28.789512,29.201625,-0.412113,-0.098793,...,30.209573,28.610427,31.009147,27.810854,0.0,3.030303,3.030303,3.030303,16.238124,-1917202.0
3,2021-02-26,27.26,27.82,26.209999,27.629999,946600,28.611126,29.085209,-0.474083,-0.173851,...,30.255359,28.366641,31.199719,27.422281,0.0,0.0,0.0,2.020202,18.139029,-2183469.0
4,2021-03-01,28.110001,28.32,27.4,28.1,1555900,28.532491,29.01223,-0.479739,-0.235029,...,30.252673,28.260327,31.248847,27.264153,7.516326,2.505442,2.505442,1.845248,19.109606,-1460290.0
5,2021-03-02,28.09,28.41,27.940001,27.959999,1068300,28.444415,28.934287,-0.489872,-0.285998,...,30.202565,28.143435,31.232131,27.11387,24.137961,10.551429,10.551429,4.35229,19.870941,-648514.1
6,2021-03-03,28.16,29.23,28.120001,28.76,1279300,28.492967,28.921377,-0.42841,-0.31448,...,30.123166,28.090834,31.139331,27.074669,21.052613,17.568966,17.568966,10.208612,19.397672,-1625901.0
7,2021-03-04,28.77,30.219999,28.549999,28.93,1163900,28.560203,28.922016,-0.361813,-0.323947,...,30.060762,28.059238,31.061523,27.058477,53.543312,32.911295,32.911295,20.343897,18.214962,-1429972.0
8,2021-03-05,29.52,29.84,28.459999,29.799999,709600,28.750941,28.987052,-0.236111,-0.30638,...,30.020614,28.074386,30.993728,27.101272,60.236226,44.94405,44.94405,31.808104,17.015985,-2064192.0
9,2021-03-08,30.299999,31.290001,30.129999,30.6,833000,29.035411,29.106529,-0.071118,-0.259327,...,30.108091,28.067909,31.128182,27.047818,94.48814,69.422559,69.422559,49.092635,17.256021,-1395729.0


In [23]:
def MACD_Decision(data):
  macd_dec = [np.nan,]
  for i in data.index[1:]:
    if data['MACD'].iloc[i] > data['MACD_Signal'].iloc[i] and data['MACD'].iloc[i-1] < data['MACD_Signal'].iloc[i-1]:
      macd_dec.append(1)
    elif data['MACD'].iloc[i] < data['MACD_Signal'].iloc[i] and data['MACD'].iloc[i-1] > data['MACD_Signal'].iloc[i-1]:
      macd_dec.append(-1)
    else:
      macd_dec.append(0)
  return macd_dec

In [24]:
def RSI_Decision_trend(data):
  rsi_dec = [np.nan,]
  for i in data.index[1:]:
    if data['RSI'].iloc[i] > 70 and data['A/D Line'].iloc[i] < data['A/D Line'].iloc[i-1]:
      rsi_dec.append(-1)
    elif data['RSI'].iloc[i] < 30 and data['A/D Line'].iloc[i] > data['A/D Line'].iloc[i-1]:
      rsi_dec.append(1)
    else:
      rsi_dec.append(0)

  return rsi_dec

In [25]:
def RSI_Decision_range(data):
  rsi_dec = [np.nan,]
  for i in data.index[1:]:
    if data['RSI'].iloc[i] > 70:
      rsi_dec.append(-1)
    elif data['RSI'].iloc[i] < 30:
      rsi_dec.append(1)
    else:
      rsi_dec.append(0)

  return rsi_dec

In [26]:
def ADX_Decision(data):  # using it as a trend filter
  adx_dec = [np.nan,]
  for i in data.index[1:]:
    if data['ADX'].iloc[i] > 25:
      adx_dec.append(2) # 2 for trending market
    elif data['ADX'].iloc[i] < 25:
      adx_dec.append(-2)  # -2 for range/sideways market
  return adx_dec

In [27]:
def BB_Decision(data):
  bol_dec = [np.nan]
  for i in data.index[1:]:
    if data['Close'].iloc[i] > data['BB_+1_SD'].iloc[i] and data['Close'].iloc[i] < data['BB_+2_SD'].iloc[i]:
      bol_dec.append(1)
    elif data['Close'].iloc[i] > data['BB_-2_SD'].iloc[i] and data['Close'].iloc[i] < data['BB_-1_SD'].iloc[i]:
      bol_dec.append(-1)
    else:
      bol_dec.append(0)
  return bol_dec

In [28]:
def Stochastic_Decision(data):
  macd_dec = [np.nan,]
  for i in data.index[1:]:
    if data['%K_Slow'].iloc[i] > data['%D_Slow'].iloc[i] and data['%K_Slow'].iloc[i-1] < data['%D_Slow'].iloc[i-1]:
      macd_dec.append(1)
    elif data['%K_Slow'].iloc[i] < data['%D_Slow'].iloc[i] and data['%K_Slow'].iloc[i-1] > data['%D_Slow'].iloc[i-1]:
      macd_dec.append(-1)
    else:
      macd_dec.append(0)
  return macd_dec

In [30]:
df_new['MACD_Dec'] = MACD_Decision(df_new)
df_new['RSI_Dec_trend'] = RSI_Decision_trend(df_new)
df_new['RSI_Dec_range'] = RSI_Decision_range(df_new)
df_new['ADX_Dec'] = ADX_Decision(df_new)
df_new['BB_Dec'] = BB_Decision(df_new)
df_new['Stoch_Dec'] = Stochastic_Decision(df_new)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new['MACD_Dec'] = MACD_Decision(df_new)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new['RSI_Dec_trend'] = RSI_Decision_trend(df_new)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new['RSI_Dec_range'] = RSI_Decision_range(df_new)
A value is trying to be set on a copy of a slice from a 

In [31]:
df_new.reset_index(inplace=True, drop=True)

In [32]:
def Construct_Trend_Feat(data):
  trend_feat = []
  for i in data.index:
    if data['ADX_Dec'].iloc[i] == 2:
      val = 10
      if data['RSI_Dec_trend'].iloc[i] == -1:
        val -= 5
      elif data['RSI_Dec_trend'].iloc[i] == 1:
        val += 5
    else:
      val = 0
    trend_feat.append(val)
  data['Trend_Feature'] = trend_feat

In [33]:
def Construct_Range_Feat(data):
  range_feat = []
  for i in data.index:
    if data['ADX_Dec'].iloc[i] == -2:
      val = 10
      if data['RSI_Dec_range'].iloc[i] in [-1, 1]:
        val += (data['RSI_Dec_range'].iloc[i] + data['BB_Dec'].iloc[i])*2.5
      elif data['Stoch_Dec'].iloc[i] in [-1, 1] or data['MACD_Dec'].iloc[i] in [-1, 1]:
        val += (data['Stoch_Dec'].iloc[i] + data['MACD_Dec'].iloc[i])*2.5
    else:
      val = 0
    range_feat.append(val)
  data['Range_Feature'] = range_feat


In [34]:
def Construct_Final_Feature(data):
  Construct_Trend_Feat(data)
  Construct_Range_Feat(data)
  final_feat = []
  for i in data.index:
    final_feat.append(data['Trend_Feature'].iloc[i] + data['Range_Feature'].iloc[i])
  data['Final_Feature'] = final_feat

In [35]:
Construct_Final_Feature(df_new)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Trend_Feature'] = trend_feat
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Range_Feature'] = range_feat
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Final_Feature'] = final_feat


In [36]:
def Make_Decision(data):
  Decision = [np.nan,]
  for i in data.index[1:]:
    if data['Final_Feature'].iloc[i] > 10:
      Decision.append(1)
    elif data['Final_Feature'].iloc[i] < 10:
      Decision.append(-1)
    else:
      Decision.append(0)
  return Decision

In [37]:
df_new['Decision'] = Make_Decision(df_new)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new['Decision'] = Make_Decision(df_new)


In [38]:
df_new.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,12_Day_EMA,26_Day_EMA,MACD,MACD_Signal,...,MACD_Dec,RSI_Dec_trend,RSI_Dec_range,ADX_Dec,BB_Dec,Stoch_Dec,Trend_Feature,Range_Feature,Final_Feature,Decision
0,2021-02-23,28.98,29.4,28.01,28.040001,907900,29.240724,29.457416,-0.216692,0.050322,...,,,,,,,0,0.0,0.0,
1,2021-02-24,28.15,28.76,27.639999,27.950001,1325400,29.042151,29.345756,-0.303605,-0.020463,...,0.0,0.0,1.0,-2.0,0.0,0.0,0,12.5,12.5,1.0
2,2021-02-25,28.200001,28.379999,26.91,27.4,798800,28.789512,29.201625,-0.412113,-0.098793,...,0.0,0.0,1.0,-2.0,0.0,0.0,0,12.5,12.5,1.0
3,2021-02-26,27.26,27.82,26.209999,27.629999,946600,28.611126,29.085209,-0.474083,-0.173851,...,0.0,0.0,1.0,-2.0,-1.0,0.0,0,10.0,10.0,0.0
4,2021-03-01,28.110001,28.32,27.4,28.1,1555900,28.532491,29.01223,-0.479739,-0.235029,...,0.0,1.0,1.0,-2.0,-1.0,1.0,0,10.0,10.0,0.0
5,2021-03-02,28.09,28.41,27.940001,27.959999,1068300,28.444415,28.934287,-0.489872,-0.285998,...,0.0,1.0,1.0,-2.0,-1.0,0.0,0,10.0,10.0,0.0
6,2021-03-03,28.16,29.23,28.120001,28.76,1279300,28.492967,28.921377,-0.42841,-0.31448,...,0.0,0.0,0.0,-2.0,0.0,0.0,0,10.0,10.0,0.0
7,2021-03-04,28.77,30.219999,28.549999,28.93,1163900,28.560203,28.922016,-0.361813,-0.323947,...,0.0,0.0,0.0,-2.0,0.0,0.0,0,10.0,10.0,0.0
8,2021-03-05,29.52,29.84,28.459999,29.799999,709600,28.750941,28.987052,-0.236111,-0.30638,...,1.0,0.0,0.0,-2.0,0.0,0.0,0,12.5,12.5,1.0
9,2021-03-08,30.299999,31.290001,30.129999,30.6,833000,29.035411,29.106529,-0.071118,-0.259327,...,0.0,0.0,0.0,-2.0,1.0,0.0,0,10.0,10.0,0.0


In [39]:
df_new['Decision'].value_counts()

Decision
 0.0    534
-1.0    106
 1.0     78
Name: count, dtype: int64

In [40]:
df_new.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new.dropna(inplace=True)


In [41]:
df_new.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,12_Day_EMA,26_Day_EMA,MACD,MACD_Signal,...,MACD_Dec,RSI_Dec_trend,RSI_Dec_range,ADX_Dec,BB_Dec,Stoch_Dec,Trend_Feature,Range_Feature,Final_Feature,Decision
1,2021-02-24,28.15,28.76,27.639999,27.950001,1325400,29.042151,29.345756,-0.303605,-0.020463,...,0.0,0.0,1.0,-2.0,0.0,0.0,0,12.5,12.5,1.0
2,2021-02-25,28.200001,28.379999,26.91,27.4,798800,28.789512,29.201625,-0.412113,-0.098793,...,0.0,0.0,1.0,-2.0,0.0,0.0,0,12.5,12.5,1.0
3,2021-02-26,27.26,27.82,26.209999,27.629999,946600,28.611126,29.085209,-0.474083,-0.173851,...,0.0,0.0,1.0,-2.0,-1.0,0.0,0,10.0,10.0,0.0
4,2021-03-01,28.110001,28.32,27.4,28.1,1555900,28.532491,29.01223,-0.479739,-0.235029,...,0.0,1.0,1.0,-2.0,-1.0,1.0,0,10.0,10.0,0.0
5,2021-03-02,28.09,28.41,27.940001,27.959999,1068300,28.444415,28.934287,-0.489872,-0.285998,...,0.0,1.0,1.0,-2.0,-1.0,0.0,0,10.0,10.0,0.0


In [42]:
df_enc = pd.get_dummies(df_new, columns = ['Decision'], dtype=int)
df_enc

Unnamed: 0,Date,Open,High,Low,Close,Volume,12_Day_EMA,26_Day_EMA,MACD,MACD_Signal,...,RSI_Dec_range,ADX_Dec,BB_Dec,Stoch_Dec,Trend_Feature,Range_Feature,Final_Feature,Decision_-1.0,Decision_0.0,Decision_1.0
1,2021-02-24,28.150000,28.760000,27.639999,27.950001,1325400,29.042151,29.345756,-0.303605,-0.020463,...,1.0,-2.0,0.0,0.0,0,12.5,12.5,0,0,1
2,2021-02-25,28.200001,28.379999,26.910000,27.400000,798800,28.789512,29.201625,-0.412113,-0.098793,...,1.0,-2.0,0.0,0.0,0,12.5,12.5,0,0,1
3,2021-02-26,27.260000,27.820000,26.209999,27.629999,946600,28.611126,29.085209,-0.474083,-0.173851,...,1.0,-2.0,-1.0,0.0,0,10.0,10.0,0,1,0
4,2021-03-01,28.110001,28.320000,27.400000,28.100000,1555900,28.532491,29.012230,-0.479739,-0.235029,...,1.0,-2.0,-1.0,1.0,0,10.0,10.0,0,1,0
5,2021-03-02,28.090000,28.410000,27.940001,27.959999,1068300,28.444415,28.934287,-0.489872,-0.285998,...,1.0,-2.0,-1.0,0.0,0,10.0,10.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,2023-12-22,55.889999,55.959999,55.660000,55.830002,405200,55.550322,55.192134,0.358188,0.377992,...,0.0,2.0,1.0,0.0,10,0.0,10.0,0,1,0
715,2023-12-26,55.869999,56.000000,55.810001,55.830002,367200,55.593349,55.239383,0.353966,0.373187,...,0.0,2.0,1.0,1.0,10,0.0,10.0,0,1,0
716,2023-12-27,55.840000,56.000000,55.820000,55.820000,896600,55.628219,55.282392,0.345827,0.367715,...,0.0,2.0,1.0,0.0,10,0.0,10.0,0,1,0
717,2023-12-28,55.810001,55.990002,55.810001,55.889999,431000,55.668493,55.327400,0.341093,0.362390,...,0.0,2.0,1.0,0.0,10,0.0,10.0,0,1,0


In [43]:
df_enc.rename(columns={'Decision_-1.0': 'Decision_Sell', 'Decision_0.0': 'Decision_Hold', 'Decision_1.0': 'Decision_Buy'}, inplace=True)

In [44]:
df_enc.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,12_Day_EMA,26_Day_EMA,MACD,MACD_Signal,...,RSI_Dec_range,ADX_Dec,BB_Dec,Stoch_Dec,Trend_Feature,Range_Feature,Final_Feature,Decision_Sell,Decision_Hold,Decision_Buy
1,2021-02-24,28.15,28.76,27.639999,27.950001,1325400,29.042151,29.345756,-0.303605,-0.020463,...,1.0,-2.0,0.0,0.0,0,12.5,12.5,0,0,1
2,2021-02-25,28.200001,28.379999,26.91,27.4,798800,28.789512,29.201625,-0.412113,-0.098793,...,1.0,-2.0,0.0,0.0,0,12.5,12.5,0,0,1
3,2021-02-26,27.26,27.82,26.209999,27.629999,946600,28.611126,29.085209,-0.474083,-0.173851,...,1.0,-2.0,-1.0,0.0,0,10.0,10.0,0,1,0
4,2021-03-01,28.110001,28.32,27.4,28.1,1555900,28.532491,29.01223,-0.479739,-0.235029,...,1.0,-2.0,-1.0,1.0,0,10.0,10.0,0,1,0
5,2021-03-02,28.09,28.41,27.940001,27.959999,1068300,28.444415,28.934287,-0.489872,-0.285998,...,1.0,-2.0,-1.0,0.0,0,10.0,10.0,0,1,0


In [45]:
df_train = df_enc[: round(0.7*len(df_enc))]
df_test = df_enc[round(0.7*len(df_enc)):]

In [46]:
def calc_probab(z: np.ndarray):
    sum = 0
    for i in range(len(z)):
      sum += math.exp(z[i])

    p = []
    for i in range(len(z)):
      p.append(math.exp(z[i])/sum)

    prob = np.array(p)

    return prob

In [47]:
def calc_cost(X, y, w, b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
      z_1 = np.dot(w[0], X[i]) + b[0]
      z_2 = np.dot(w[1], X[i]) + b[1]
      z_3 = np.dot(w[2], X[i]) + b[2]
      z = np.array([z_1, z_2, z_3])
      p = calc_probab(z)
      for k in range(3):
        cost +=  -y[i, k] * np.log(p[k])

    cost = cost / m
    return cost

In [48]:
def calc_gradient(X, y, w, b):
    m, n = X.shape
    dj_dw = np.zeros((3, n))
    dj_db = np.zeros((3,))

    for i in range(m):
      z_1 = np.dot(w[0], X[i]) + b[0]
      z_2 = np.dot(w[1], X[i]) + b[1]
      z_3 = np.dot(w[2], X[i]) + b[2]
      z = np.array([z_1, z_2, z_3])
      prob = calc_probab(z)
      if y[i, 0] == 1:
        err = prob[0] - y[i, 0]
        k = 0
        dj_db[k] = dj_db[k] + err
        for j in range(n):
          dj_dw[k, j] = dj_dw[k, j] + err*X[i, j]
      elif y[i, 1] == 1:
        err = prob[1] - y[i, 1]
        k = 1
        dj_db[k] = dj_db[k] + err
        for j in range(n):
          dj_dw[k, j] = dj_dw[k, j] + err*X[i, j]
      else:
        err = prob[2] - y[i, 2]
        k = 2
        dj_db[k] = dj_db[k] + err
        for j in range(n):
          dj_dw[k, j] = dj_dw[k, j] + err*X[i, j]

    dj_dw /= m
    dj_db /= m

    return dj_dw, dj_db

In [49]:
def gradient_descent(X, y, w_in, b_in, gradient_function, cost_function, alpha, num_iters):
    m = X.shape[0]
    i = 0
    J_history = []
    while i < num_iters:
      i += 1

      dj_dw, dj_db = gradient_function(X, y, w_in, b_in)

      w_in = w_in - alpha * dj_dw
      b_in = b_in - alpha * dj_db

      J = cost_function(X, y, w_in, b_in)

      J_history.append(J)

      if i% math.ceil(num_iters/10) == 0:
          print(f"Iteration {i:4}: Cost {J_history[-1]:5.4f} ")

    return w_in, b_in

In [50]:
def zscore_normalize_features(df, features: list):

    X = df[features].to_numpy()
    mu  = np.mean(X, axis=0)
    sigma  = np.std(X, axis=0)
    X_norm = (X - mu) / sigma

    return (X_norm, mu, sigma)

In [51]:
train_x_norm, mu, sigma = zscore_normalize_features(df_train, ['MACD_Dec', 'RSI_Dec_range', 'RSI_Dec_trend', 'Stoch_Dec', 'ADX_Dec', 'A/D Line', 'BB_Dec', 'Trend_Feature', 'Range_Feature', 'Final_Feature'])

In [52]:
train_y = df_train[['Decision_Sell', 'Decision_Hold', 'Decision_Buy']].to_numpy()

In [53]:
w_in = np.full((3, train_x_norm.shape[1]), 0.1)
b_in = np.full(3, 0.1)


In [54]:
w, b = gradient_descent(train_x_norm, train_y, w_in, b_in, calc_gradient, calc_cost, 0.01, 5000)

Iteration  500: Cost 0.4208 
Iteration 1000: Cost 0.2606 
Iteration 1500: Cost 0.1912 
Iteration 2000: Cost 0.1531 
Iteration 2500: Cost 0.1295 
Iteration 3000: Cost 0.1134 
Iteration 3500: Cost 0.1020 
Iteration 4000: Cost 0.0933 
Iteration 4500: Cost 0.0866 
Iteration 5000: Cost 0.0813 


In [55]:
w

array([[-0.72250906, -0.72303468, -0.23298492, -1.10907122, -0.54910167,
        -0.09994386, -0.19798981, -0.5885725 ,  0.03182149, -2.08536564],
       [-0.02004053,  0.21418762,  0.0268127 ,  0.00613491, -0.32676486,
        -0.22441414,  0.00303377, -0.26227126,  0.56883942,  0.45290539],
       [ 0.96087952,  1.03163184,  0.57252894,  1.34134455, -0.42104581,
        -0.3067428 , -0.16036203, -0.34640464,  1.20945887,  2.11800046]])

In [56]:

def predict(X, w, b):
    m, n = X.shape
    p = np.zeros((m, 3))
    probab = np.zeros((m, 3))

    for i in range(m):
        z = 0
        z_1 = np.dot(w[0], X[i]) + b[0]
        z_2 = np.dot(w[1], X[i]) + b[1]
        z_3 = np.dot(w[2], X[i]) + b[2]

        z = np.array([z_1, z_2, z_3])

        prob = calc_probab(z)
        probab[i] = prob
        max_prob = np.max(prob)
        for j in range(3):
          if prob[j] == max_prob:
            p[i,j] = 1
          else:
            p[i,j] = 0

    return p, probab

In [57]:
df_train[['Prediction_Sell', 'Prediction_Hold', 'Prediction_Buy']], probab = predict(train_x_norm, w, b)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train[['Prediction_Sell', 'Prediction_Hold', 'Prediction_Buy']], probab = predict(train_x_norm, w, b)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train[['Prediction_Sell', 'Prediction_Hold', 'Prediction_Buy']], probab = predict(train_x_norm, w, b)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-co

In [58]:
df_train.sample(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,12_Day_EMA,26_Day_EMA,MACD,MACD_Signal,...,Stoch_Dec,Trend_Feature,Range_Feature,Final_Feature,Decision_Sell,Decision_Hold,Decision_Buy,Prediction_Sell,Prediction_Hold,Prediction_Buy
209,2021-12-20,36.23,36.41,35.200001,35.91,347700,36.227767,35.691976,0.53579,0.528668,...,0.0,0,10.0,10.0,0,1,0,0.0,1.0,0.0
197,2021-12-02,34.34,35.560001,34.299999,35.279999,280100,35.069899,34.760971,0.308928,0.668624,...,0.0,0,10.0,10.0,0,1,0,0.0,1.0,0.0
234,2022-01-26,41.049999,41.759998,40.029999,40.639999,625500,41.135879,40.299862,0.836017,1.221915,...,0.0,10,0.0,10.0,0,1,0,0.0,1.0,0.0
259,2022-03-03,37.959999,38.549999,37.57,38.41,509600,38.595027,39.571581,-0.976553,-0.585292,...,0.0,10,0.0,10.0,0,1,0,0.0,1.0,0.0
32,2021-04-09,31.209999,31.59,30.93,31.209999,299600,31.284742,30.803653,0.481088,0.436294,...,0.0,0,10.0,10.0,0,1,0,0.0,1.0,0.0
50,2021-05-05,31.35,31.66,30.99,31.52,283600,31.454162,31.300693,0.153469,0.2079,...,1.0,0,12.5,12.5,0,0,1,0.0,0.0,1.0
431,2022-11-07,42.209999,42.66,41.380001,42.0,800400,41.234067,40.454091,0.779975,0.815401,...,0.0,10,0.0,10.0,0,1,0,0.0,1.0,0.0
183,2021-11-11,36.810001,38.099998,36.810001,37.68,466800,34.75741,33.54179,1.21562,0.898129,...,0.0,10,0.0,10.0,0,1,0,0.0,1.0,0.0
134,2021-09-02,32.529999,32.68,32.189999,32.279999,215200,32.118848,32.12566,-0.006811,0.041349,...,0.0,0,10.0,10.0,0,1,0,0.0,1.0,0.0
204,2021-12-13,36.439999,36.450001,35.720001,36.110001,277300,35.710715,35.238079,0.472636,0.446595,...,0.0,0,10.0,10.0,0,1,0,0.0,1.0,0.0


In [59]:
df_train.reset_index(inplace=True, drop=True)

In [60]:
def Calculate_Accuracy(data):
  right_pred = 0
  for idx in data.index:
    if data.Prediction_Sell.iloc[idx]==data.Decision_Sell.iloc[idx] and data.Prediction_Buy.iloc[idx]==data.Decision_Buy.iloc[idx]:
      right_pred += 1
  print("Accuracy is", right_pred/len(data)*100)

In [61]:
Calculate_Accuracy(df_train)

Accuracy is 97.8131212723658


In [62]:
# F1-Score
y_true = df_train[['Prediction_Sell', 'Prediction_Hold', 'Prediction_Buy']].to_numpy()
y_pred = df_train[['Decision_Sell', 'Decision_Hold', 'Decision_Buy']].to_numpy()
print(f1_score(y_pred=y_pred, y_true=y_true, average=None))

[0.95890411 0.98511502 0.95867769]


In [63]:
# AUC-ROC Score
print(roc_auc_score(y_true=y_true, y_score=y_pred, average=None))

[0.96549844 0.96239825 0.96031746]


In [66]:
test_x_norm, mu_test, sigma_test = zscore_normalize_features(df_test, features=['MACD_Dec', 'RSI_Dec_range', 'RSI_Dec_trend', 'Stoch_Dec', 'ADX_Dec', 'A/D Line', 'BB_Dec', 'Trend_Feature', 'Range_Feature', 'Final_Feature'])

In [67]:
df_test[['Prediction_Sell', 'Prediction_Hold', 'Prediction_Buy']], probab_test = predict(test_x_norm, w, b)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test[['Prediction_Sell', 'Prediction_Hold', 'Prediction_Buy']], probab_test = predict(test_x_norm, w, b)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test[['Prediction_Sell', 'Prediction_Hold', 'Prediction_Buy']], probab_test = predict(test_x_norm, w, b)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versu

In [68]:
df_test.reset_index(inplace=True, drop=True)

In [69]:
df_test.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,12_Day_EMA,26_Day_EMA,MACD,MACD_Signal,...,Stoch_Dec,Trend_Feature,Range_Feature,Final_Feature,Decision_Sell,Decision_Hold,Decision_Buy,Prediction_Sell,Prediction_Hold,Prediction_Buy
0,2023-02-23,41.23,41.84,41.09,41.490002,624900,44.454516,45.318482,-0.863966,0.043435,...,0.0,15,0.0,15.0,0,0,1,0.0,0.0,1.0
1,2023-02-24,40.860001,41.57,40.130001,41.52,430000,44.003052,45.037113,-1.034061,-0.172064,...,1.0,15,0.0,15.0,0,0,1,0.0,0.0,1.0
2,2023-02-27,41.529999,42.209999,41.41,41.610001,652600,43.63489,44.783252,-1.148362,-0.367324,...,0.0,15,0.0,15.0,0,0,1,0.0,0.0,1.0
3,2023-02-28,41.540001,42.419998,41.540001,41.650002,776500,43.329523,44.55116,-1.221637,-0.538187,...,0.0,10,0.0,10.0,0,1,0,0.0,1.0,0.0
4,2023-03-01,41.639999,42.41,41.48,41.610001,653200,43.064981,44.333296,-1.268315,-0.684212,...,0.0,10,0.0,10.0,0,1,0,0.0,1.0,0.0


In [70]:
Calculate_Accuracy(df_test)

Accuracy is 97.67441860465115


In [71]:
# F1-Score
y_true_ = df_test[['Prediction_Sell', 'Prediction_Hold', 'Prediction_Buy']].to_numpy()
y_pred_  = df_test[['Decision_Sell', 'Decision_Hold', 'Decision_Buy']].to_numpy()
print(f1_score(y_pred=y_pred_, y_true=y_true_, average=None))

[0.98550725 0.98442368 0.9       ]


In [72]:
# AUC-ROC Score
print(roc_auc_score(y_true=y_true_, y_score=y_pred_, average=None))

[0.99723757 0.97216471 0.94487179]
