In [6]:
import pandas as pd
import glob
import random as r
import ta
from matplotlib import pyplot as plt
from ta.volatility import BollingerBands as BB
import pickle
#7194 Stocks (6009 final)
f = pd.read_csv('Stocks_ML_Indicators_2.csv')
f = f.drop_duplicates(subset='Stock')
f.reset_index(inplace=True)
f

Unnamed: 0,index,MFI,CMF,EMV_simple,EMV_complex,VPT,NVI,BB,KC,DC,...,RSI,stochRSI,TSI,UO,SO,WR,AO,PPO,Stock,Buy
0,0,56.978804,0.166320,True,False,False,True,-0.611453,False,False,...,46.772879,0.259549,False,53.945892,False,-65.457686,False,False,nano_0,Buy
1,1,69.319440,0.445842,True,False,False,True,2.454940,False,False,...,65.447760,1.000000,False,74.529392,False,-1.782976,False,False,nano_1,Pass
2,2,55.226139,0.000301,False,False,False,False,-0.178439,True,False,...,43.922486,0.551292,False,47.075104,False,-52.883675,False,False,nanr_0,Pass
3,3,35.241041,0.219320,False,False,False,True,-0.096035,False,False,...,43.451944,0.861474,False,57.421429,False,-30.385289,False,False,nanr_1,Buy
4,4,58.751702,0.208147,False,False,False,False,-1.662502,True,False,...,40.088347,0.000000,False,49.589151,True,-100.000000,False,False,nao_0,Pass
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12011,12047,54.521430,0.126331,False,False,True,False,1.108396,False,False,...,58.321509,0.628904,False,59.917572,False,-24.105669,False,False,wfig_1,Pass
12012,12048,60.557672,0.231902,False,False,False,False,-2.014059,True,False,...,37.521794,0.118676,False,31.135873,True,-100.000000,False,False,wyde_0,Pass
12013,12049,70.943913,0.519836,False,False,False,False,-1.115488,True,False,...,38.386762,0.745704,False,41.712016,False,-66.195874,False,False,wyde_1,Pass
12014,12050,24.402644,0.069185,False,False,True,True,-1.399046,False,False,...,38.297342,0.338008,False,52.974252,False,-79.347826,False,True,xlnx_0,Buy


In [7]:
#These blocks of comments indicate how the raw technical indicators are used to generate our ML focused features

#Volume: MFI, CMF (plus ADI), EMV, VPT, NVI (5)
    #MFI: Raw Value
    #CMF: Raw Value WRONG? --> CMF > ADI, True : else, False
    #EMV: If EMV_cur > 0 and EMV_prev < 0, buy=True : if P_c > SMA & (EMV_cur > EMV_prev) & ∆V > 0, buy=True [do SMA 20, 50]
    #VPT: If (VPT < VPT_ma)_prev and (VPT > VPT_ma)_cur : buy --> this is a cross over event, line crosses above signal line
    #NVI: If NVI > 255_day_EMA, Bull, else Bear (Bull market vs. Bear market) [do its 255, and 100 day EMA]
#df = the stock data with all of the raw technical indicators (from time-series data)
#f_res = the resultant data frame with features specifically for an ML task
#classes are called and calculated for each stock case (e.g. aaple over period 1)
class volume: #checked all Volume cases (√)
    def __init__(self, df):
        self.df = df
        self.ind = len(self.df)-1
        return
    def MFI(self): #use raw value of most recent day (√)
        return self.df.loc[self.ind]['volume_mfi']
    def CMF(self): #use raw value of most recent day (√)
        return self.df.loc[self.ind]['volume_cmf']
    def EMV_simple(self): #if any EMV from prev 3 days is <0 and most recent is >0 : True (√)
        EMVs = self.df['volume_em'].iloc[-4:]
        return (any(EMVs.iloc[:-1] < 0) and EMVs.iloc[-1] > 0)
    def EMV_complex(self): #complex condition partially based on emv (√)
        return (self.df.loc[self.ind]['Close'] > self.df.loc[self.ind]['trend_sma_slow'] and 
                self.df.loc[self.ind]['volume_em'] > self.df.loc[self.ind-1]['volume_em'] and
                self.df.loc[self.ind]['Volume'] > self.df.loc[self.ind-1]['Volume'])
    def VPT(self): #crossover event, if VPT was below average VPT, but is now above : True (√)
        vpts = self.df['volume_vpt']
        sma_vpt = ta.trend.SMAIndicator(close=self.df['volume_vpt'], window=20).sma_indicator()
        prev_vpt_less_than_sma = [vpts.iloc[(-i)-2] < sma_vpt.iloc[(-i)-2] for i in range(3)]
        return ((any(prev_vpt_less_than_sma) and vpts.iloc[-1] > sma_vpt.iloc[-1])) #if VPT < avg at any time in last 3 days, but is now above
    def NVI(self): #an NVI > 255 day ema indicates bull market. : True (√)
        vals = ta.trend.EMAIndicator(close=self.df['volume_nvi'], window=255).ema_indicator() #calc ema of nvi
        return self.df.loc[self.ind]['volume_nvi'] > vals.iloc[-1]
    def all_volume(self):
        res = {}
        res['MFI'] = volume_feats.MFI()
        res['CMF'] = volume_feats.CMF()
        res['EMV_simple'] = volume_feats.EMV_simple()
        res['EMV_complex'] = volume_feats.EMV_complex()
        res['VPT'] = volume_feats.VPT()
        res['NVI'] = volume_feats.NVI()
        return res
    
#Volatility: BB, KC, DC, UI (4)
    #BB: Value, if P_c <= SMA : abs((P_c - SMA_20)/(BBL - SMA_20)), else (P_c - SMA_20)/(BBH - SMA_20)
    #KC: If P_c < 2*ATR : buy=True
    #DC: If (P_h == DC_h) & (P_h != DC_H for prev 20 days), buy = True
    #UI: skip for now
class volatility: #checked all Volatility cases (√)
    def __init__(self, df):
        self.df = df
        self.ind = len(self.df)-1
        #print('volatility')
        return
    def BB(self): #essentially number of stand. dev. price is away from mean (√)
        close = self.df.loc[self.ind]['Close']
        sma = self.df.loc[self.ind]['volatility_bbm']
        if (close <= sma):
            return (close - sma)/abs((self.df.loc[self.ind]['volatility_bbl'] - sma)/2) #/2 because default is 2 stand. dev
        else:
            return (close - sma)/((self.df.loc[self.ind]['volatility_bbh'] - sma)/2)
    def KC(self): #if close drops below lower KC band : True (√)
        return self.df.loc[self.ind]['Close'] < self.df.loc[self.ind]['volatility_kcl']
    def DC(self): #if high of the day becomes new value of dch for the first time in 20+ days : True (√)
        cur_high = self.df.loc[self.ind]['High']
        dc_h = self.df.loc[self.ind]['volatility_dch']
        no_prev_hits = [self.df.loc[len(self.df)-(i+2)]['High'] != self.df.loc[len(self.df)-(i+2)]['volatility_dch'] for i in range(20)]
        return (all(no_prev_hits) and (cur_high == dc_h))
    def all_volatility(self):
        res = {}
        res['BB'] = volatility_feats.BB()
        res['KC'] = volatility_feats.KC()
        res['DC'] = volatility_feats.DC()
        return res
#Trend: SMA, EMA, MACD, ADX, VI (vortex), TRIX, KST, IC (ichimoku), PSAR (7 - SMA, EMA)
    #MACD: If (MACD < MACD_ema)_prev and (MACD > MACD_ema)_cur : buy --> this is a cross over event, line crosses above signal line
    #ADX: Raw Value
    #VI: If (VI+ < VI-)_prev and (VI+ > VI-)_cur : buy --> this is a cross over event, line crosses above signal line
    #TRIX: < 0, oversold (1); If (TRIX <= 0)_prev and (TRIX > 0)_cur : buy --> this is a cross over event, line crosses above signal line (2)
    #KST: >0 bulls (1); If (KST <= KST_ma)_prev and (KST > KST_ma)_cur : buy --> this is a cross over event, line crosses above signal line (2)
    #IC: if P_C < IC_low & (IC_high < IC_low) : buy=True
    #PSAR: PSAR < P_c : buy
class trend: #checked all Trend cases (√)
    def __init__(self, df):
        self.df = df
        self.ind = len(self.df)-1
        #print('trend')
        return
    def MACD(self): #crossover event, (√)
        MACDs = self.df['trend_macd'].iloc[-4:] 
        signals = self.df['trend_macd_signal'].iloc[-4:]
        prev_macds_less_than_signals = [MACDs.iloc[i] <= signals.iloc[i] for i in range(3)]
        return (any(prev_macds_less_than_signals) and MACDs.iloc[-1] > signals.iloc[-1])
    def ADX(self): #(√)
        return self.df.loc[self.ind]['trend_adx']
    def VI(self): #crossover event, (√)
        vips = self.df['trend_vortex_ind_pos'].iloc[-4:] 
        vins = self.df['trend_vortex_ind_neg'].iloc[-4:]
        prev_vips_less_than_vins = [vips.iloc[i] < vins.iloc[i] for i in range(3)]
        return (any(prev_vips_less_than_vins) and vips.iloc[-1] > vins.iloc[-1])
    def TRIX_simple(self): #(√)
        return self.df.loc[self.ind]['trend_trix'] < 0
    def TRIX_cross(self): #crossover event (√)
        trixs = self.df['trend_trix'].iloc[-4:] 
        return (any(trixs.iloc[:-1] < 0) and trixs.iloc[-1] > 0)
    def KST_simple(self): #(√)
        return self.df.loc[self.ind]['trend_kst']
    def KST_cross(self): #crossover event (√)
        ksts = self.df['trend_kst'].iloc[-4:] 
        signals = self.df['trend_kst_sig'].iloc[-4:] 
        prev_ksts_less_than_signals = [ksts.iloc[i] < signals.iloc[i] for i in range(3)]
        return (any(prev_ksts_less_than_signals) and ksts.iloc[-1] > signals.iloc[-1])
    def IC(self): #relationship event between two lines and closing price (√)
        return ((self.df['Close'].iloc[-1] < self.df['trend_ichimoku_b'].iloc[-1]) and 
                 (self.df['trend_ichimoku_b'].iloc[-1] > self.df['trend_ichimoku_a'].iloc[-1]))
    def PSAR(self): # (√)
        psars = ta.trend.PSARIndicator(self.df['High'], self.df['Low'], self.df['Close']).psar()
        return psars.iloc[-1] < self.df['Close'].iloc[-1]
    def all_trend(self):
        res = {}
        res['MACD'] = trend_feats.MACD()
        res['ADX'] = trend_feats.ADX()
        res['VI'] = trend_feats.VI()
        res['TRIX_simple'] = trend_feats.TRIX_simple()
        res['TRIX_complex'] = trend_feats.TRIX_cross()
        res['KST_simple'] = trend_feats.KST_simple()
        res['KST_cross'] = trend_feats.KST_cross()
        res['IC'] = trend_feats.IC()
        res['PSAR'] = trend_feats.PSAR()
        return res
    
#Momentum: RSI, stoch_RSI, TSI, UO, stoch, WR, AO, PPO (8)
    #RSI: Raw Value
    #stoch_RSI: Raw Value
    #TSI: If (TSI < 0)_prev and (TSI > 0)_cur : buy --> this is a cross over event, line crosses above signal line
    #UO: Raw Value
    #SO: Raw Values
    #WR: Raw Value
    #AO: If (AO < 0)_prev and (AO >= 0)_cur : buy
    #PPO: If (PPO < PPO_ema)_prev and (PPO > PPO_ema)_cur : buy --> this is a cross over event, line crosses above signal line
class momentum: #checked all Momentum cases (√)
    def __init__(self, df):
        self.df = df
        #print('momentum')
        return
    def RSI(self): #relative strength index, raw value applicable (√)
        return self.df['momentum_rsi'].iloc[-1]
    def stochRSI(self): #stochastic relative strength index, raw value applicable (√)
        return self.df['momentum_stoch_rsi'].iloc[-1]
    def TSI(self):  #true strenth index (√)
        TSI = self.df['momentum_tsi']
        return (any(TSI.iloc[-4:-1] < 0) and TSI.iloc[-1] > 0) 
    def UO(self): #ultimate oscillator, raw value applicable (√)
        return self.df['momentum_uo'].iloc[-1]
    def SO(self): #stochastic oscillator, both values below 20 is a buy usually (√)
        return (self.df['momentum_stoch'].iloc[-1] < 20) and (self.df['momentum_stoch_signal'].iloc[-1] < 20) 
    def WR(self): #Wilson's %R (√)
        return self.df['momentum_wr'].iloc[-1]
    def AO(self): #(√)
        ao = self.df['momentum_ao']
        return (any(ao.iloc[-4:-1] < 0) and ao.iloc[-1] > 0) #if any of last three days were below 0 and today is above, TRUE=Buy
    def PPO(self): #percentage price oscillator (√)
        ppo = self.df['momentum_ppo'].iloc[-4:]
        ppo_signal = self.df['momentum_ppo_signal'].iloc[-4:]
        prev_ppos_less_than_emas = [ppo.iloc[i] < ppo_signal.iloc[i] for i in range(3)] #has PPO been less than emaPPO in any of last 3 days?
        return ((any(prev_ppos_less_than_emas) and ppo.iloc[-1] > ppo_signal.iloc[-1]))
    def all_momentum(self):
        res = {}
        res['RSI'] = momentum_feats.RSI()
        res['stochRSI'] = momentum_feats.stochRSI()
        res['TSI'] = momentum_feats.TSI()
        res['UO'] = momentum_feats.UO()
        res['SO'] = momentum_feats.SO()
        res['WR'] = momentum_feats.WR()
        res['AO'] = momentum_feats.AO()
        res['PPO'] = momentum_feats.PPO()
        return res

In [8]:
#these are the columns, most of which are necessary, to calculate the various features for our data
columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume',
           'volume_adi',
           'volume_cmf', 
           'volume_mfi', 
           'volume_em', 'volume_sma_em',
           'volume_vpt', 
           'volume_nvi',
           'volatility_atr',
           'volatility_bbm', 'volatility_bbh', 'volatility_bbl',
           'volatility_kcc', 'volatility_kch', 'volatility_kcl',
           'volatility_dcl', 'volatility_dch', 'volatility_dcm',
           'volatility_ui',
           'trend_macd','trend_macd_signal', 'trend_macd_diff',
           'trend_sma_fast', 'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow', 
           'trend_adx', 'trend_adx_pos', 'trend_adx_neg', 
           'trend_vortex_ind_pos', 'trend_vortex_ind_neg', 'trend_vortex_ind_diff', 
           'trend_trix',
           'trend_kst', 'trend_kst_sig', 'trend_kst_diff', 
           'trend_ichimoku_conv', 'trend_ichimoku_base', 'trend_ichimoku_a', 'trend_ichimoku_b',
           'trend_visual_ichimoku_a', 'trend_visual_ichimoku_b',
           'trend_psar_up', 'trend_psar_down', 'trend_psar_up_indicator', 'trend_psar_down_indicator',
           'momentum_rsi',
           'momentum_stoch_rsi', 'momentum_stoch_rsi_k', 'momentum_stoch_rsi_d',
           'momentum_tsi', 
           'momentum_uo', 
           'momentum_stoch', 'momentum_stoch_signal', 
           'momentum_wr', 
           'momentum_ao',
           'momentum_ppo', 'momentum_ppo_signal', 'momentum_ppo_hist']

In [9]:
"""This is a example adding all technical analysis features implemented in
this library.
"""
def get_indicators(df): #calculates all the stock features - NOTE: I do not keep a lot, should be addressed later
    df = df.drop(columns=['Stock','Case'])
    # Clean nan values
    df = ta.utils.dropna(df)
    # Add all ta features filling nans values
    df = ta.add_all_ta_features(
        df, "Open", "High", "Low", "Close", "Volume", fillna=True
    )
    #df = df[columns]
    return df

In [10]:
def classify_worthiness(start_price, end_price):
    percent_change = 100*((end_price-start_price)/start_price) #%change up or down
    if (percent_change <= 0): return "Pass"
    elif (percent_change < 3) : return "Buy"
    else: return "Strong Buy"

In [21]:
f = pd.read_csv('Stocks_Feb20.csv')

In [22]:
final_results = []
errs = []
for i in range(len(f)//280):
    t = f[f['Case'] == i] #isolate stock case
    t.reset_index(drop=True, inplace=True)
    tt = t.iloc[:-5] #last five days are our test period
    try:
        stock = t.loc[0]['Stock'] + '_' + str(t.loc[0]['Case']%2) #stock name for later
    except:
        stock = 'Balls' + str(i)
    try:
        tt = get_indicators(tt) #calculate all indicators on historical data
        tt.reset_index(drop=True, inplace=True)
        full_features = {} #will story a dict of all features and results
        volume_feats = volume(tt) #get all the volume based indicators
        full_features.update(volume_feats.all_volume())
        volatility_feats = volatility(tt) #get all the volatility based indicators
        full_features.update(volatility_feats.all_volatility())
        trend_feats = trend(tt) #get all the trend based indicators
        full_features.update(trend_feats.all_trend())
        momentum_feats = momentum(tt) #get all the momentum based indicators
        full_features.update(momentum_feats.all_momentum())
        full_features['Stock'] = stock #add name of case
        full_features['Buy'] = classify_worthiness(t['Close'].iloc[-5], t['Close'].iloc[-1]) #classify it as a buy or not
        final_results.append(full_features) #put results in a list of dicts for later dataframe 
    except:
        errs.append(stock)
        print('Error: ' + stock)
df = pd.DataFrame(final_results)
cols = list(df.columns)
cols.insert(0, cols.pop(cols.index('Stock')))
df = df[cols]
#df.to_csv('Stocks_ML_Indicators.csv', index=False)
df

  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])
  self._kama[i] = self._kama[i - 1] + smoothing_constant[i] * (


Unnamed: 0,Stock,MFI,CMF,EMV_simple,EMV_complex,VPT,NVI,BB,KC,DC,...,PSAR,RSI,stochRSI,TSI,UO,SO,WR,AO,PPO,Buy
0,a_0,42.603324,0.231363,False,True,False,True,0.024184,False,False,...,True,50.867512,0.306774,False,51.826684,False,-63.492063,False,False,Buy
1,a_1,65.218874,0.171611,True,False,True,True,2.956210,False,False,...,True,61.959414,1.000000,True,61.480063,False,-0.000000,False,True,Buy
2,aa_0,17.246208,-0.138006,True,False,True,True,-0.584108,False,False,...,False,52.037133,0.403444,False,40.135163,False,-64.676056,False,False,Buy
3,aa_1,7.082692,-0.272774,False,False,False,False,-2.169848,True,False,...,False,19.876651,0.000000,False,36.302525,True,-98.817812,False,False,Pass
4,aaap_0,32.685672,0.019475,True,False,True,False,-0.228505,False,False,...,True,51.310771,0.749803,False,59.226952,False,-44.378698,False,False,Pass
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12013,zumz_1,88.658265,0.230060,False,False,False,True,2.101168,False,False,...,True,70.140820,0.989193,False,69.621191,False,-8.625592,False,False,Pass
12014,zx_0,83.458376,-0.113319,False,False,False,False,2.108643,False,False,...,True,71.450269,0.791888,True,45.474751,False,-42.222222,False,False,Strong Buy
12015,zx_1,78.319488,0.243923,False,False,False,False,0.865867,False,False,...,True,54.609336,0.680864,False,49.299581,False,-29.729730,False,False,Buy
12016,zyne_0,68.898249,-0.140991,False,False,True,False,0.038455,False,False,...,True,31.097868,0.902457,False,42.963405,False,-75.609756,False,False,Pass


In [23]:
df.to_csv('Stocks_forML_Feb24.csv', index=False)

In [None]:
#2/19/2021
#this block takes the raw stocks and pulls chunks of x length out (here x = 280)
f_res = pd.DataFrame(columns=['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Stock'])
for file in sorted(glob.glob('/Users/bbhoar/Documents/Stocks/*.us.txt')):
    print(file)
    stock = file[31:-7]
    try:
        f = pd.read_csv(file)
        if (len(f) < 300): continue
        used = -1
        for i in range(2):
            index = r.randint(0, len(f)-280)
            while (index == used): index = r.randint(0, len(f)-280)
            f_snippet = f.iloc[index:index+280,:-1]
            f_snippet['Stock'] = stock
            f_res = pd.concat([f_res, f_snippet])
    except:
        print('Error: ' + stock)
f_res        

In [None]:
f = pd.read_csv('Stocks_Feb20_Indicators.csv')
print("VOLUME TEST BLOCK")
volume_feats = volume(f)
print(volume_feats.MFI())
print(volume_feats.CMF())
print(volume_feats.EMV_simple())
print(volume_feats.EMV_complex())
print(volume_feats.VPT())
print(volume_feats.NVI())
print("\nVOLATILITY TEST BLOCK")
volatility_feats = volatility(f)
print(volatility_feats.BB())
print(volatility_feats.KC())
print(volatility_feats.DC())
print("\nTREND TEST BLOCK")
trend_feats = trend(f)
print(trend_feats.MACD())
print(trend_feats.ADX())
print(trend_feats.VI())
print(trend_feats.TRIX_simple())
print(trend_feats.TRIX_cross())
print(trend_feats.KST_simple())
print(trend_feats.KST_cross())
print(trend_feats.IC())
print(trend_feats.PSAR())
print("\nMOMENTUM TEST BLOCK")
momentum_feats = momentum(f)
print(momentum_feats.RSI())
print(momentum_feats.stochRSI())
print(momentum_feats.TSI())
print(momentum_feats.UO())
print(momentum_feats.SO())
print(momentum_feats.WR())
print(momentum_feats.AO())
print(momentum_feats.PPO())