# Create indicators and find signals
This code is designed to calculate the below indicators:
- EMA
- MACD
- MACD mins and maxs
- MACD positive turns
- Support price
- Target price
- Prev week volume Vs 8 week average volume

We will also bring in if the target price was hit within 8 weeks for ML training and validation.

Each share has it's own trading pattern so we will train a different model for each share but use the same set of features, just the hyper paramenters will be tuned differenty.

In [1]:
#Import libraries
import pandas as pd
import numpy as np
import re
import tables
import os
import datetime as dt

In [2]:
#Clean column names
def clean_col_name(str_in):
    str_out = str_in.lower() #Change all to lower case
    str_out = re.sub(r'.*\.','',str_out) #Remove everything before a .
    str_out = re.sub(r' ','_',str_out) #Replace spaces with _
    str_out = re.sub(r'_?&_?','_and_',str_out) #Put _ around & and @ and change to 'and' and 'at'
    str_out = re.sub(r'_?@_?','_at_',str_out) #Put _ around & and @ and change to 'and' and 'at'
    str_out = re.sub(r'[^0-9a-z_]','',str_out) #Remove all non allowed characters
    str_out = re.sub(r'^_','',str_out) #Remove leading _
    return str_out

In [3]:
class process_time:
    def __init__(self,name:str = ''):
        self.st_time = dt.datetime.now()
        self.lap_li = []
        self.en_time = None
        self.name = name
    def calc_el_time(self,st_time,en_time):
        diff_time = en_time - st_time
        duration_in_s = diff_time.total_seconds()
        hours = int(divmod(duration_in_s, 3600)[0])
        duration_in_s += -(hours * 3600)
        minutes = int(divmod(duration_in_s, 60)[0])
        duration_in_s += -(minutes * 60)
        seconds = int(duration_in_s)
        return [hours,minutes,seconds]
    def lap(self):
        self.lap_li.append(dt.datetime.now())
    def end(self):
        self.en_time = dt.datetime.now()
        lap_time = self.calc_el_time(self.st_time,self.en_time)
        if self.name != '':
            print('TOTAL ELAPSED TIME OF {} -> {}:{}:{}'.format(self.name,lap_time[0],lap_time[1],lap_time[2]))
        else:
            print('TOTAL ELAPSED TIME -> {}:{}:{}'.format(lap_time[0],lap_time[1],lap_time[2]))
    def show_lap_times(self):
        tmp_count = 0
        for lap in self.lap_li:
            tmp_count += 1
            lap_time = self.calc_el_time(self.st_time,lap)
            print('LAP {} TIME -> {}:{}:{}'.format(tmp_count,lap_time[0],lap_time[1],lap_time[2]))
    def show_latest_lap_time(self):
        if len(self.lap_li) == 0:
            return
        elif len(self.lap_li) < 2:
            lap_time = self.calc_el_time(self.st_time,self.lap_li[-1])
        else:
            lap_time = self.calc_el_time(self.lap_li[-2],self.lap_li[-1])
        print('LAP {} TIME -> {}:{}:{}'.format(len(self.lap_li),lap_time[0],lap_time[1],lap_time[2]))

In [4]:
#Import the ftse list
path = "C:\\Users\\Robert\\Documents\\python_scripts\\stock_trading_ml_modelling\\historical_prices\\"
tick_ftse = pd.read_csv(path + "tick_ftse.csv")
tick_ftse = tick_ftse.iloc[:,1:]
for col in tick_ftse:
    tick_ftse.rename(columns={col:clean_col_name(col)},inplace=True)
tick_ftse.head()

Unnamed: 0,company,ticker,index
0,3I,III,FTSE100
1,ADMIRAL GROUP,ADM,FTSE100
2,ANGLO AMERICAN PLC,AAL,FTSE100
3,ANTOFAGASTA,ANTO,FTSE100
4,ASHTEAD GROUP,AHT,FTSE100


In [5]:
#Import and combine prices files
# df_prices_w = pd.read_csv(path + tick_ftse["ticker"][1] + "_hist_prices_w.csv")
df_prices_w = pd.read_hdf(path + "all_hist_prices_w.h5")
#Rename columns
for col in df_prices_w:
    df_prices_w.rename(columns={col:clean_col_name(col)},inplace=True)
#Drop unwanted columns
try:
    df_prices_w.drop(columns=["unnamed_0","index"],inplace=True)
except Exception as e:
    print(e)
#Reformat columns where neccessary
df_prices_w["date"] = df_prices_w["date"].astype("datetime64")
print(df_prices_w.shape)
print(df_prices_w.dtypes)
df_prices_w.head()

"['unnamed_0' 'index'] not found in axis"
(258389, 13)
ticker               object
date         datetime64[ns]
high                float64
low                 float64
volume              float64
open                float64
close               float64
change              float64
ema12               float64
ema26               float64
macd_line           float64
signal              float64
macd                float64
dtype: object


Unnamed: 0,ticker,date,high,low,volume,open,close,change,ema12,ema26,macd_line,signal,macd
0,III,2007-12-31,1023.0,964.0,4511565.0,995.0,965.0,-30.0,,,,,
1,III,2008-01-07,989.0,917.5,16056554.0,967.5,924.0,-43.5,,,,,
2,III,2008-01-14,936.0,881.0,21691287.0,917.0,901.0,-16.0,,,,,
3,III,2008-01-21,965.0,847.0,17850580.0,891.0,917.5,26.5,,,,,
4,III,2008-01-28,971.0,903.0,12079245.0,911.0,961.0,50.0,,,,,


In [6]:
#Remove tickers with fewer than 34 entries as this is where the MACD can be calculated
print('START ROW COUNT -> {}'.format(df_prices_w.shape[0]))
print('START TICK COUNT -> {}'.format(tick_ftse.shape[0]))
for tick in tick_ftse.ticker:
    if df_prices_w[df_prices_w.ticker == tick].shape[0] < 34:
        #Remove from dataframe
        print('\nTOO FEW RECORDS FOR {}'.format(tick))
        df_prices_w = df_prices_w.loc[df_prices_w.ticker != tick]
        print('    NEW ROW COUNT -> {}'.format(df_prices_w.shape[0]))
        #Remove from tick_ftse
        tick_ftse = tick_ftse.loc[tick_ftse.ticker != tick]
        print('    NEW TICK COUNT -> {}'.format(tick_ftse.shape[0]))
print('\nEND ROW COUNT -> {}'.format(df_prices_w.shape[0]))
print('END TICK COUNT -> {}'.format(tick_ftse.shape[0]))

START ROW COUNT -> 258389
START TICK COUNT -> 350

TOO FEW RECORDS FOR FLTR
    NEW ROW COUNT -> 258374
    NEW TICK COUNT -> 349

TOO FEW RECORDS FOR AGT
    NEW ROW COUNT -> 258362
    NEW TICK COUNT -> 348

TOO FEW RECORDS FOR NETW
    NEW ROW COUNT -> 258339
    NEW TICK COUNT -> 347

END ROW COUNT -> 258339
END TICK COUNT -> 347


In [7]:
#Get in-row price change
def calc_changes(df_in,var_col,prev_col):
    df_in["change"] = df_in[var_col] - df_in[prev_col]
    df_in["per_change"] = df_in["change"] / df_in[prev_col]
    return (df_in["change"],df_in["per_change"])

#Relabel signal to signal_line
df_prices_w.rename(columns={"signal":"signal_line"},inplace=True)
for col in df_prices_w:
    df_prices_w.rename(columns={col:col.lower()},inplace=True)
    
df_prices_w["change_price"],df_prices_w["per_change_price"] = calc_changes(df_prices_w[["close","open"]].copy(),"close","open")
df_prices_w.drop(columns=["change"],inplace=True)
df_prices_w.head()

Unnamed: 0,ticker,date,high,low,volume,open,close,ema12,ema26,macd_line,signal_line,macd,change_price,per_change_price
0,III,2007-12-31,1023.0,964.0,4511565.0,995.0,965.0,,,,,,-30.0,-0.030151
1,III,2008-01-07,989.0,917.5,16056554.0,967.5,924.0,,,,,,-43.5,-0.044961
2,III,2008-01-14,936.0,881.0,21691287.0,917.0,901.0,,,,,,-16.0,-0.017448
3,III,2008-01-21,965.0,847.0,17850580.0,891.0,917.5,,,,,,26.5,0.029742
4,III,2008-01-28,971.0,903.0,12079245.0,911.0,961.0,,,,,,50.0,0.054885


In [8]:
#Function for calculating ema
def calc_ema(_s_in,_periods):
    #Calc mod val
    _mod = 2/(_periods+1)
    #Calc sma
    _sma_s = [0] * _s_in.shape[0]
    for _i in range(0,_periods):
        _sma_s += _s_in.shift(_i) / _periods
    #Calc ema
    _ema_s = _sma_s.copy()
    _ema_s[(_ema_s > 0) & (np.isnan(_ema_s) == False)] = _mod*(_s_in - _ema_s.shift(1)) + _ema_s.shift(1)
    return _ema_s.copy()

In [9]:
#Function for calculating the MACD
def calc_macd(_ema_lng_s,_ema_sht_s,_sig_period:int):
    #Make a df
    _tmp_df = pd.DataFrame([])
    _tmp_df['ema_lng'] = _ema_lng_s
    _tmp_df['ema_sht'] = _ema_sht_s
    #Calc the signal line
    _tmp_df['macd_line'] = _tmp_df['ema_sht'] - _tmp_df['ema_lng']
    _tmp_df['signal'] = calc_ema(_tmp_df['macd_line'],_sig_period)
    _tmp_df['macd_hist'] = _tmp_df['macd_line'] - _tmp_df['signal']
    return (_tmp_df['macd_line'].copy(),_tmp_df['signal'].copy(),_tmp_df['macd_hist'].copy())

In [10]:
#Calc the ema and macds for the data
def calc_ema_macd(_df_in):
    _tick_df = _df_in.copy()
    try:
        #Add in the ema and macd
        _tick_df = _tick_df.sort_values(by='date')
#         _tick_df = _tick_df.reset_index(drop=True)
        _tick_df['ema12'] = calc_ema(_tick_df['close'],12)
        _tick_df['ema26'] = calc_ema(_tick_df['close'],26)
        _tick_df['macd_line'],_tick_df['signal'],_tick_df['macd'] = calc_macd(_tick_df['ema26'],_tick_df['ema12'],9)
        return _tick_df
    except Exception as e:
        print('ERROR:{}'.format(e))
        return _tick_df

In [11]:
#Create a function which normalises a feature based only on the values which have come before it - avoids time series bias
def norm_time_s(_index,_s_in):
    _min = np.nanmin(_s_in[:_index+1].values)
    _max = np.nanmax(_s_in[:_index+1].values)
    _norm_val = (_s_in[_index] - _min) / (_max - _min)
    return _norm_val

In [12]:
#Runt he functions
def norm_prices(_df_in):
    _df_out = _df_in.copy()
    
    #Normalise the columns which need it
    _norm_cols = [
        #Standard features
        "open"
        ,"close"
        ,"high"
        ,"low"
        ,"volume"
    ]
    #Reset the index
    _df_out.sort_values(['date'],ascending=True,inplace=True)
    #Normalise
    for _col in _norm_cols:
        _tmp_s = _df_out[_col].copy() #Take a copy so as the values are changed this does not affect following calculations
        _df_out[_col] = [norm_time_s(_x,_tmp_s) for _x in _df_out.index]
    
    return _df_out

In [13]:
#Normalize the prices by ticker and time then create emas and macds for each ticker
df_prices_w = df_prices_w.sort_values(['ticker','date'],ascending=[True,True])
df_prices_w.reset_index(inplace=True,drop=True)
print('NORALISING AND CALCULATING EMA & MACD VALUES')
count = 0
error_li = []
run_time = process_time()
run_time.lap()
for tick in tick_ftse.ticker:
    count += 1
    print('\nRUN FOR {} - {}'.format(tick,count))
    try:
        this_tick_df = df_prices_w[df_prices_w.ticker == tick]
        this_tick_df = norm_prices(this_tick_df.copy())
        #Calculate the ema and macd
        this_tick_df = calc_ema_macd(this_tick_df)
        #Append back on to the dataframe
        df_prices_w[df_prices_w.ticker == tick] = this_tick_df.copy()
        print('\tSUCCESS')
        run_time.lap()
        run_time.show_latest_lap_time()
    except Exception as e:
        print('\tERROR -> {}'.format(e))
        error_li.append(e)
run_time.end()
print('\n\nCOMPLETED - ERRORS ENCOUNTERED -> {}'.format(len(error_li)))
if len(error_li) > 0:
    print(error_li)

NORALISING AND CALCULATING EMA & MACD VALUES

RUN FOR III - 1
	SUCCESS
LAP 2 TIME -> 0:0:1

RUN FOR ADM - 2
	SUCCESS
LAP 3 TIME -> 0:0:1

RUN FOR AAL - 3
	SUCCESS
LAP 4 TIME -> 0:0:2

RUN FOR ANTO - 4
	SUCCESS
LAP 5 TIME -> 0:0:2

RUN FOR AHT - 5
	SUCCESS
LAP 6 TIME -> 0:0:2

RUN FOR ABF - 6
	SUCCESS
LAP 7 TIME -> 0:0:2

RUN FOR AZN - 7
	SUCCESS
LAP 8 TIME -> 0:0:2

RUN FOR AUTO - 8
	SUCCESS
LAP 9 TIME -> 0:0:0

RUN FOR AVV - 9
	SUCCESS
LAP 10 TIME -> 0:0:3

RUN FOR AV - 10
	SUCCESS
LAP 11 TIME -> 0:0:3

RUN FOR BA - 11
	SUCCESS
LAP 12 TIME -> 0:0:3

RUN FOR BARC - 12
	SUCCESS
LAP 13 TIME -> 0:0:2

RUN FOR BDEV - 13
	SUCCESS
LAP 14 TIME -> 0:0:2

RUN FOR BKG - 14
	SUCCESS
LAP 15 TIME -> 0:0:2

RUN FOR BHP - 15
	SUCCESS
LAP 16 TIME -> 0:0:1

RUN FOR BP - 16
	SUCCESS
LAP 17 TIME -> 0:0:2

RUN FOR BATS - 17
	SUCCESS
LAP 18 TIME -> 0:0:2

RUN FOR BLND - 18
	SUCCESS
LAP 19 TIME -> 0:0:2

RUN FOR BT-A - 19
	SUCCESS
LAP 20 TIME -> 0:0:2

RUN FOR BNZL - 20
	SUCCESS
LAP 21 TIME -> 0:0:2

RUN FO

  """


	SUCCESS
LAP 101 TIME -> 0:0:1

RUN FOR FOUR - 101
	SUCCESS
LAP 102 TIME -> 0:0:2

RUN FOR ASL - 102
	SUCCESS
LAP 103 TIME -> 0:0:1

RUN FOR ACA - 103
	SUCCESS
LAP 104 TIME -> 0:0:1

RUN FOR AGK - 104
	SUCCESS
LAP 105 TIME -> 0:0:2

RUN FOR ATST - 105
	SUCCESS
LAP 106 TIME -> 0:0:1

RUN FOR AMGO - 106
	SUCCESS
LAP 107 TIME -> 0:0:0

RUN FOR APAX - 107
	SUCCESS
LAP 108 TIME -> 0:0:0

RUN FOR ASCL - 108
	SUCCESS
LAP 109 TIME -> 0:0:0

RUN FOR ASHM - 109
	SUCCESS
LAP 110 TIME -> 0:0:1

RUN FOR AGR - 110
	SUCCESS
LAP 111 TIME -> 0:0:2

RUN FOR AML - 111
	SUCCESS
LAP 112 TIME -> 0:0:0

RUN FOR AVST - 112
	SUCCESS
LAP 113 TIME -> 0:0:0

RUN FOR BAB - 113
	SUCCESS
LAP 114 TIME -> 0:0:2

RUN FOR BGFD - 114
	SUCCESS
LAP 115 TIME -> 0:0:1

RUN FOR BAKK - 115
	SUCCESS
LAP 116 TIME -> 0:0:0

RUN FOR BBY - 116
	SUCCESS
LAP 117 TIME -> 0:0:2

RUN FOR BNKR - 117
	SUCCESS
LAP 118 TIME -> 0:0:1

RUN FOR BAG - 118
	SUCCESS
LAP 119 TIME -> 0:0:2

RUN FOR BBA - 119
	SUCCESS
LAP 120 TIME -> 0:0:2

RUN FOR 

	SUCCESS
LAP 264 TIME -> 0:0:2

RUN FOR PLI - 264
	SUCCESS
LAP 265 TIME -> 0:0:1

RUN FOR PSH - 265
	SUCCESS
LAP 266 TIME -> 0:0:0

RUN FOR PNL - 266
	SUCCESS
LAP 267 TIME -> 0:0:1

RUN FOR PFC - 267
	SUCCESS
LAP 268 TIME -> 0:0:1

RUN FOR PETS - 268
	SUCCESS
LAP 269 TIME -> 0:0:0

RUN FOR PTEC - 269
	SUCCESS
LAP 270 TIME -> 0:0:1

RUN FOR PLUS - 270
	SUCCESS
LAP 271 TIME -> 0:0:0

RUN FOR PCT - 271
	SUCCESS
LAP 272 TIME -> 0:0:1

RUN FOR POLY - 272
	SUCCESS
LAP 273 TIME -> 0:0:1

RUN FOR PLP - 273
	SUCCESS
LAP 274 TIME -> 0:0:0

RUN FOR PPH - 274
	SUCCESS
LAP 275 TIME -> 0:0:1

RUN FOR PMO - 275
	SUCCESS
LAP 276 TIME -> 0:0:2

RUN FOR PHP - 276
	SUCCESS
LAP 277 TIME -> 0:0:2

RUN FOR PFG - 277
	SUCCESS
LAP 278 TIME -> 0:0:2

RUN FOR PRTC - 278
	SUCCESS
LAP 279 TIME -> 0:0:0

RUN FOR PZC - 279
	SUCCESS
LAP 280 TIME -> 0:0:2

RUN FOR QQ - 280
	SUCCESS
LAP 281 TIME -> 0:0:1

RUN FOR QLT - 281
	SUCCESS
LAP 282 TIME -> 0:0:0

RUN FOR RNK - 282
	SUCCESS
LAP 283 TIME -> 0:0:2

RUN FOR RAT - 

In [14]:
df_prices_w = df_prices_w.sort_values(['ticker','date'],ascending=[True,True])
df_prices_w.reset_index(inplace=True,drop=True)
print(df_prices_w.ticker.unique())
df_prices_w.head()

['3IN' 'AAL' 'ABF' 'ACA' 'ADM' 'AGK' 'AGR' 'AHT' 'AJB' 'AMGO' 'AML' 'ANTO'
 'APAX' 'ASCL' 'ASHM' 'ASL' 'ATST' 'AUTO' 'AV' 'AVST' 'AVV' 'AZN' 'BA'
 'BAB' 'BAG' 'BAKK' 'BARC' 'BATS' 'BBA' 'BBGI' 'BBOX' 'BBY' 'BCA' 'BDEV'
 'BEZ' 'BGEO' 'BGFD' 'BGSC' 'BHP' 'BKG' 'BLND' 'BME' 'BNKR' 'BNZL' 'BOY'
 'BP' 'BRBY' 'BRSC' 'BT-A' 'BTEM' 'BVIC' 'BVS' 'BWY' 'BYG' 'CAPC' 'CARD'
 'CBG' 'CCC' 'CCFS' 'CCH' 'CCL' 'CEY' 'CINE' 'CKN' 'CLDN' 'CLI' 'CNA'
 'CNE' 'COA' 'COB' 'CPG' 'CPI' 'CRDA' 'CRH' 'CRST' 'CSP' 'CTEC' 'CTY'
 'CWK' 'CYBG' 'DC' 'DCC' 'DGE' 'DJAN' 'DLG' 'DLN' 'DNLM' 'DOM' 'DPH'
 'DPLM' 'DRX' 'ECM' 'EDIN' 'EIG' 'ELM' 'EMG' 'ENOG' 'EQN' 'ERM' 'ESNT'
 'ETO' 'EVR' 'EXPN' 'EZJ' 'FCH' 'FCPT' 'FCSS' 'FDM' 'FERG' 'FEV' 'FGP'
 'FGT' 'FOUR' 'FRCL' 'FRES' 'FSJ' 'FSV' 'FUTR' 'FXPO' 'GAW' 'GCP' 'GFRD'
 'GFS' 'GFTU' 'GLEN' 'GLO' 'GNC' 'GNK' 'GNS' 'GOG' 'GPOR' 'GRG' 'GRI'
 'GSK' 'GSS' 'GVC' 'HAS' 'HFG' 'HGT' 'HICL' 'HIK' 'HILS' 'HL' 'HLMA'
 'HMSO' 'HOC' 'HRI' 'HSBA' 'HSTG' 'HSV' 'HSX' 'HTG' 'HVPE' 'HWDN' 'IAG'


Unnamed: 0,ticker,date,high,low,volume,open,close,ema12,ema26,macd_line,signal_line,macd,change_price,per_change_price
0,3IN,2007-12-31,,,,,,,,,,,-1.75,-0.011731
1,3IN,2008-01-07,0.0,,1.0,0.0,1.0,,,,,,1.76,0.011967
2,3IN,2008-01-14,1.0,1.0,0.799111,0.668246,1.0,,,,,,0.7,0.004714
3,3IN,2008-01-21,1.0,0.0,1.0,0.336493,1.0,,,,,,4.22,0.028556
4,3IN,2008-01-28,1.0,1.0,0.670041,1.0,1.0,,,,,,2.11,0.014077


# Create buy signals, and sell signals
I am classifying a stock worth buying if it meets all the below criteria:
- The target price (previous max) is hit within x (set as variable target_price_period) periods proceeding this period
- The target price is an increase of x% (set as variable min_gain) over the proceeding day's open price
- There is a drop in the closing price of less x% (set as variable max_drop) between this period and the x periods proceeding this period

I am classifying the sell signals as:
- The close price dips below the target price (previous max) x (set as variable target_price_period) periods proceeding this period
- There is an increase in the closing price of less x% (set as variable max_drop) over the next x periods

In [15]:
#Define the variables
target_price_period = 12
period_high_volatility = 3
period_low_volatility = 1
min_gain = 0.1
max_drop = -0.05

In [16]:
#Programming note
#df.shift(1) looks 1 period into the past
#df.shift(-1) looks 1 period into the future

In [17]:
#Check if the target price is hit within the target_price_period
def min_gain_check(_var_s,_target_s,_periods:int=12):
    _check_s = [False] * _var_s.shape[0]
    for _i in range(1,_periods+1):
        _tmp_check_s = _var_s.shift(-_i) > _target_s #True if price is >= limit
        _check_s = _check_s | _tmp_check_s
    return _check_s

In [18]:
def max_drop_check(_var_s,_target_s,periods:int=12):
    _check_s = [False] * _var_s.shape[0]
    for _i in range(1,periods+1):
        _tmp_check_s = _var_s.shift(-_i) < _target_s #True if price is <= limit
        _check_s = _check_s | _tmp_check_s
    return _check_s

In [19]:
#Create a function for finding buy signals
def get_buys(var_s):
    
    #Check if the target price is hit within the target_price_period
    target_s = var_s * (1+min_gain)
    min_gain_s = min_gain_check(var_s,target_s,target_price_period) == True #Function returns True when min_gain is hit
    print('BUY min_gain_s -> {}'.format(min_gain_s[min_gain_s == True].shape))
    
    #Check if the sell price is hit within the target_price_period
    target_s = var_s * (1+max_drop)
    max_drop_s = max_drop_check(var_s,target_s,target_price_period) == False #Function returns False when does not go below target
    print('BUY max_drop_s -> {}'.format(max_drop_s[max_drop_s == True].shape))
    
    #Find the buy signals
    s_out = min_gain_s & max_drop_s
    print('BUY ALL -> {}'.format(s_out[s_out == True].shape))
    
    return s_out

In [20]:
#Function for finding sell signals
def get_sells(var_s):
    
    #Check if the target price is hit within the target_price_period
    target_s = var_s * (1+max_drop)
    max_drop_s = max_drop_check(var_s,target_s,target_price_period) == True #Function returns True when max_drop is hit
    print('SELL max_drop_s -> {}'.format(max_drop_s[max_drop_s == True].shape))
    
    #Perform if the target is crossed again
    target_s = var_s * (1+min_gain)
    min_gain_s = min_gain_check(var_s,target_s,target_price_period) == False #Function returns False when min_gain is not hit
    print('SELL min_gain_s -> {}'.format(min_gain_s[min_gain_s == True].shape))
    
    #Find the sell signals
    s_out = max_drop_s & min_gain_s
    print('SELL ALL -> {}'.format(s_out[s_out == True].shape))
    
    return s_out

In [21]:
#Get buy signals
df_prices_w['buy'] = get_buys(df_prices_w['close'])

#Get sell signals
df_prices_w['sell'] = get_sells(df_prices_w['close'])

#Get hold signals
df_prices_w["hold"] = (df_prices_w["buy"] == False) & (df_prices_w["sell"] == False)

print('BUY PERCENTAGE -> {:.2f}%'.format(df_prices_w[df_prices_w['buy'] == True].shape[0]/df_prices_w.shape[0]))
print('SELL PERCENTAGE -> {:.2f}%'.format(df_prices_w[df_prices_w['sell'] == True].shape[0]/df_prices_w.shape[0]))
print('HOLD PERCENTAGE -> {:.2f}%'.format(df_prices_w[df_prices_w['hold'] == True].shape[0]/df_prices_w.shape[0]))

BUY min_gain_s -> (138213,)
BUY max_drop_s -> (104310,)
BUY ALL -> (82965,)
SELL max_drop_s -> (154029,)
SELL min_gain_s -> (120126,)
SELL ALL -> (98781,)
BUY PERCENTAGE -> 0.32%
SELL PERCENTAGE -> 0.38%
HOLD PERCENTAGE -> 0.30%


# Create additional features

In [22]:
#Mark minimums and maximums
def flag_mins(s_in,period):
    s_out = 0
    for i in range(1,period):
        s_out += (s_in > s_in.shift(i)) & (s_in.shift(-i).isnull() == False) #Is this value > the previous value
        s_out += (s_in > s_in.shift(-i)) | (s_in.shift(-i).isnull()) #Is this value > the next value OR is it the last available value (IE the next value is null)
        s_out += s_in.isnull()
#     s_out[((s_in.index == 0) & (s_in.isnull() == False)) | ((s_in.shift(1).isnull()) & (s_in.isnull() == False))] = 0 #set the first non-NaN result to 0
    s_out = s_out == 0
    return s_out
def flag_maxs(s_in,period):
    s_out = 0
    for i in range(1,period):
        s_out += (s_in < s_in.shift(i)) & (s_in.shift(-i).isnull() == False) #Is this value < the previous value (if the previous value is not null)
        s_out += (s_in < s_in.shift(-i)) | (s_in.shift(-i).isnull()) #Is this value < the next value OR is it the last available value (IE the next value is null)
        s_out += s_in.isnull()
#     s_out[((s_in.index == 0) & (s_in.isnull() == False)) | ((s_in.shift(1).isnull()) & (s_in.isnull() == False))] = 0 #set the first non-NaN result to 0
    s_out = s_out == 0
    return s_out

In [23]:
#Calc vol as proportion of previous n-rows
def calc_prop_of_prev(s_in,periods:int = 4):
    s_cum = s_in.copy()
    for i in range(1,periods):
        s_cum += s_in.shift(i)
    return s_in / s_cum

In [24]:
#Mark points of macd positive entry
def pos_entry(s_in):
    return (s_in > s_in.shift(1)) & (s_in > 0) & (s_in.shift(1) < 0)
def neg_entry(s_in):
    return (s_in < s_in.shift(1)) & (s_in < 0) & (s_in.shift(1) > 0)

In [25]:
#Function to normalise current price compared to another
def norm_s(s_in,max_in,min_in):
    s_out = (s_in - min_in) / (max_in - min_in)
    s_out.fillna(1.0,inplace=True)
    return s_out

In [26]:
#Function to find last max and mins
def prev_value(df_in,var_col,bool_col):
    df_in["prev_val"] = df_in.loc[df_in[bool_col],var_col]
    df_in["prev_val"] = df_in["prev_val"].fillna(method='ffill')
    df_in["prev_marker_date"] = df_in.loc[df_in[bool_col],"date"]
    df_in["prev_marker_date"] = df_in["prev_marker_date"].fillna(method='ffill')
    return (df_in["prev_val"],df_in["prev_marker_date"])

In [27]:
#Create separate columns for pos and neg values - allows for normalisation
def pos_neg_cols(s_in,gt_lt = "GT"):
    if gt_lt.upper() == "GT":
        bool_s = s_in >= 0
    elif gt_lt.upper() == "LT":
        bool_s = s_in <= 0
    df_out = s_in.to_frame()
    df_out["s_in"] = s_in
    df_out["val"] = abs(s_in[bool_s])
    val_s = df_out["val"].fillna(0,method=None)
    return (bool_s,val_s)

In [28]:
#Function for calculating the max and mins within a period
def max_min_period(s_in,periods:int=4):
    #Copy into max and min
    max_s = s_in.copy()
    min_s = s_in.copy()
    #Go through periods and mark where it is a new max or min
    for i in range(1,(periods+1)):
        #Calc max
        tmp_check_s = s_in.shift(i) >= max_s
        max_s[tmp_check_s] = s_in.shift(i)
        #Calc min
        tmp_check_s = s_in.shift(i) <= min_s
        min_s[tmp_check_s] = s_in.shift(i)
    #Normalise the series
    s_out = norm_s(s_in,max_s,min_s)
    return s_out

In [29]:
#Create a dictionary of max character lengths of fields for use later in h5 file appending
def get_col_lens(_df_in):
    _col_lens = {}
    for c in _df_in:
        _tmp_s = pd.Series([len(str(x)) for x in _df_in[c]])
        _col_lens[c] = _tmp_s.max()
    return _col_lens
col_lens = get_col_lens(df_prices_w)
col_lens

{'ticker': 4,
 'date': 19,
 'high': 22,
 'low': 22,
 'volume': 22,
 'open': 22,
 'close': 22,
 'ema12': 22,
 'ema26': 22,
 'macd_line': 23,
 'signal_line': 23,
 'macd': 23,
 'change_price': 21,
 'per_change_price': 23,
 'buy': 5,
 'sell': 5,
 'hold': 5}

In [30]:
#Create a single function to run each stock through feature creation
def create_features(df_in):  
    
    df_out = df_in.copy() 
    
    #Calc vol as proportion of previous n-rows
    df_out["prop_vol"] = calc_prop_of_prev(df_out["volume"].copy().astype("float"),6)

    #Get period-period changes
    df_out["close_shift1"] = df_out["close"].shift(1)
    df_out["change_close_shift1"],df_out["per_change_close_shift1"] = calc_changes(df_out[["close","close_shift1"]].copy(),"close","close_shift1")

    df_out["vol_shift1"] = df_out["volume"].shift(1)
    df_out["change_vol_shift1"],df_out["per_change_vol_shift1"] = calc_changes(df_out[["volume","vol_shift1"]].copy(),"volume","vol_shift1")

    df_out["macd_shift1"] = df_out["macd"].shift(1)
    df_out["change_macd_shift1"],df_out["per_change_macd_shift1"] = calc_changes(df_out[["macd","vol_shift1"]].copy(),"macd","vol_shift1")

    df_out["ema26_shift1"] = df_out["ema26"].shift(1)
    df_out["change_ema26_shift1"],df_out["per_change_ema26_shift1"] = calc_changes(df_out[["ema26","ema26_shift1"]].copy(),"ema26","ema26_shift1")
    
    #Compare close to the max/mins within 13, 26 and 52 periods
    df_out["close_13_norm"] = max_min_period(df_out["close"],13)
    df_out["close_26_norm"] = max_min_period(df_out["close"],26)
    df_out["close_52_norm"] = max_min_period(df_out["close"],52)
    
    #Compare macd to the max/mins within 13, 26 and 52 periods
    df_out["macd_line_13_norm"] = max_min_period(df_out["macd_line"],13)
    df_out["macd_line_26_norm"] = max_min_period(df_out["macd_line"],26)
    df_out["macd_line_52_norm"] = max_min_period(df_out["macd_line"],52)
        
    #Mark points of macd positive entry
    df_out["macd_pos_ent"] = pos_entry(df_out["macd"])
    df_out["macd_neg_ent"] = neg_entry(df_out["macd"])
    
    #Create separate columns for pos and neg values - allows for normalisation
    df_out["macd_pos_bool"],df_out["macd_pos_val"] = pos_neg_cols(df_out["macd"],gt_lt = "GT")
    df_out["macd_neg_bool"],df_out["macd_neg_val"] = pos_neg_cols(df_out["macd"],gt_lt = "LT")
    df_out["signal_line_pos_bool"],df_out["signal_line_pos_val"] = pos_neg_cols(df_out["signal_line"],gt_lt = "GT")
    df_out["signal_line_neg_bool"],df_out["signal_line_neg_val"] = pos_neg_cols(df_out["signal_line"],gt_lt = "LT")
    df_out["change_price_pos_bool"],df_out["change_price_pos_val"] = pos_neg_cols(df_out["change_price"],gt_lt = "GT")
    df_out["change_price_neg_bool"],df_out["change_price_neg_val"] = pos_neg_cols(df_out["change_price"],gt_lt = "LT")
    df_out["per_change_price_pos_bool"],df_out["per_change_price_pos_val"] = pos_neg_cols(df_out["per_change_price"],gt_lt = "GT")
    df_out["per_change_price_neg_bool"],df_out["per_change_price_neg_val"] = pos_neg_cols(df_out["per_change_price"],gt_lt = "LT")

    #Create max min columns
    def mk_cols_max_min(tmp_df,col,period:int=4):
        tmp_df["{}_min".format(col)] = flag_mins(tmp_df[col],period)
        tmp_df["{}_max".format(col)] = flag_maxs(tmp_df[col],period)
        
    #Find previous max and mins, then look at:
        # - how many positive or negative moves in a row there has been
        # - what the move since the last (n-1) max/min was
        # - what the gradient is since the last (n-1) max/min
        # - what the move since the first max/min was
        # - what the gradient since the first max/min was
    def mk_cols_prev_max_min(tmp_df,col,period:int=4):
        #GETTING THE MAX/MINS
        tmp_df["prev_max_{}".format(col)],tmp_df["prev_max_{}_date".format(col)] = prev_value(tmp_df[["date",col,"{}_max".format(col)]].copy(),col,"{}_max".format(col))
        tmp_df["prev_min_{}".format(col)],tmp_df["prev_min_{}_date".format(col)] = prev_value(tmp_df[["date",col,"{}_min".format(col)]].copy(),col,"{}_min".format(col))
        #Shift the max min columns by n periods to not leak future information
        tmp_df["prev_max_{}".format(col)] = tmp_df["prev_max_{}".format(col)].shift(period)
        tmp_df["prev_min_{}".format(col)] = tmp_df["prev_min_{}".format(col)].shift(period)
        tmp_df["prev_max_{}_date".format(col)] = tmp_df["prev_max_{}_date".format(col)].shift(period)
        tmp_df["prev_min_{}_date".format(col)] = tmp_df["prev_min_{}_date".format(col)].shift(period)
        def mk_prev_move_float(_s_in):
            _s_out = _s_in - _s_in.shift(1)
            _s_out[_s_out == 0] = np.nan
            _s_out = _s_out.fillna(method='ffill')
            return _s_out
        def mk_prev_move_date(_s_in,_periods:int=7):
            _s_out = _s_in - _s_in.shift(1)
            _s_check = pd.Series([np.floor(_x.days) for _x in _s_out])
            _s_check[_s_check == 0] = np.nan
            _s_check = _s_check.fillna(method='ffill')
            _s_check = [np.floor(_x/_periods) for _x in _s_check]
            return _s_check
        #Create features for the cumulative sequential count of max/mins in a certain direction
        def mk_move_cum(_s_in,_gtlt:str='pos'):
            _li_out = []
            _prev_x = None
            #Check which direction we are looking for
            if _gtlt.lower() == 'pos':
                #Loop through each value in _s_in
                for _i,_x in _s_in.iteritems():
                    if _x < 0: #If less than 0 then reset to 0
                        _li_out.append(0)
                    else:
                        if len(_li_out) == 0: #If this is the first value add it to the list
                            _li_out.append(1)
                        else:
                            if _prev_x != _x: #if there has been a change in value from this and the previous value increment it by 1
                                _li_out.append(_li_out[-1] + 1)
                            else: #Otherwise just use the last added value
                                _li_out.append(_li_out[-1])
                    _prev_x = _x
            elif _gtlt.lower() == 'neg':
                for _i,_x in _s_in.iteritems():
                    if _x > 0:
                        _li_out.append(0)
                    else:
                        if len(_li_out) == 0:
                            _li_out.append(1)
                        else:
                            if _prev_x != _x:
                                _li_out.append(_li_out[-1] + 1)
                            else:
                                _li_out.append(_li_out[-1])
                    _prev_x = _x
            return _li_out
        #Create features showing the value change since the first min/max
        def mk_long_prev_move_float(_ref_s,_val_s,_gtlt:str='pos'):
            _li_out = []
            _st_x = None
            #Check which direction we are looking for
            if _gtlt.lower() == 'pos':
                #Loop through each value in _s_in
                for _i,_x in _ref_s.iteritems():
                    if _x < 0: #If less than 0 then reset to 0, and reset _st_x
                        _li_out.append(0)
                        _st_x = None
                    else:
                        if _st_x == None: #If _st_x has not been set yet set it to this value
                            _st_x = _val_s[_i]
                        _li_out.append(_val_s[_i] - _st_x) #Now calculate the difference and add it to the list
            elif _gtlt.lower() == 'neg':
                #Loop through each value in _s_in
                for _i,_x in _ref_s.iteritems():
                    if _x > 0: #If greater than 0 then reset to 0, and reset _st_x
                        _li_out.append(0)
                        _st_x = None
                    else:
                        if _st_x == None: #If _st_x has not been set yet set it to this value
                            _st_x = _val_s[_i]
                        _li_out.append(_val_s[_i] - _st_x) #Now calculate the difference and add it to the list
            return _li_out
        def mk_long_prev_move_date(_ref_s,_val_s,_gtlt:str='pos',_periods:int=7):
            _li_out = []
            _st_x = None
            #Check which direction we are looking for
            if _gtlt.lower() == 'pos':
                #Loop through each value in _s_in
                for _i,_x in _ref_s.iteritems():
                    if _x < 0: #If less than 0 then reset to 0, and reset _st_x
                        _li_out.append(0)
                        _st_x = None
                    else:
                        if _st_x == None: #If _st_x has not been set yet set it to this value
                            _st_x = _val_s[_i]
                        _li_out.append(np.floor((_val_s[_i] - _st_x).days/_periods)) #Now calculate the difference and add it to the list
            elif _gtlt.lower() == 'neg':
                #Loop through each value in _s_in
                for _i,_x in _ref_s.iteritems():
                    if _x > 0: #If less than 0 then reset to 0, and reset _st_x
                        _li_out.append(0)
                        _st_x = None
                    else:
                        if _st_x == None: #If _st_x has not been set yet set it to this value
                            _st_x = _val_s[_i]
                        _li_out.append(np.floor((_val_s[_i] - _st_x).days/_periods)) #Now calculate the difference and add it to the list
            return _li_out
        #WHAT WAS THE MOVE SINCE THE LAST (N-1) MAX/MIN
        tmp_df['prev_max_move_{}'.format(col)] = mk_prev_move_float(tmp_df["prev_max_{}".format(col)])
        tmp_df['prev_max_date_move_{}'.format(col)] = mk_prev_move_date(tmp_df["prev_max_{}_date".format(col)])        
        tmp_df['prev_min_move_{}'.format(col)] = mk_prev_move_float(tmp_df["prev_min_{}".format(col)])
        tmp_df['prev_min_date_move_{}'.format(col)] = mk_prev_move_date(tmp_df["prev_min_{}_date".format(col)])
        #WHAT IS THE GRADIENT SINCE THE LAST (N-1) MAX/MIN
        tmp_df['prev_max_grad_{}'.format(col)] = tmp_df['prev_max_move_{}'.format(col)] / tmp_df['prev_max_date_move_{}'.format(col)]
        tmp_df['prev_min_grad_{}'.format(col)] = tmp_df['prev_min_move_{}'.format(col)] / tmp_df['prev_min_date_move_{}'.format(col)]
        #HOW MANY PROGRESSIVE MAX/MINS IN A ROW HAVE THERE BEEN - UP OR DOWN FOR BOTH OPTIONS
        tmp_df['max_move_cum_pos_{}'.format(col)] = mk_move_cum(tmp_df['prev_max_move_{}'.format(col)],'pos')
        tmp_df['max_move_cum_neg_{}'.format(col)] = mk_move_cum(tmp_df['prev_max_move_{}'.format(col)],'neg')
        tmp_df['min_move_cum_pos_{}'.format(col)] = mk_move_cum(tmp_df['prev_min_move_{}'.format(col)],'pos')
        tmp_df['min_move_cum_neg_{}'.format(col)] = mk_move_cum(tmp_df['prev_min_move_{}'.format(col)],'neg')
        #WHAT WAS THE MOVE SINCE THE FIRST (N=0) MAX/MIN
        tmp_df['long_prev_max_move_pos_{}'.format(col)] = mk_long_prev_move_float(tmp_df['prev_max_move_{}'.format(col)],tmp_df['prev_max_{}'.format(col)],'pos')
        tmp_df['long_prev_max_move_neg_{}'.format(col)] = mk_long_prev_move_float(tmp_df['prev_max_move_{}'.format(col)],tmp_df['prev_max_{}'.format(col)],'neg')
        tmp_df['long_prev_min_move_pos_{}'.format(col)] = mk_long_prev_move_float(tmp_df['prev_min_move_{}'.format(col)],tmp_df['prev_min_{}'.format(col)],'pos')
        tmp_df['long_prev_min_move_neg_{}'.format(col)] = mk_long_prev_move_float(tmp_df['prev_min_move_{}'.format(col)],tmp_df['prev_min_{}'.format(col)],'neg')
        #WHAT WAS THE TIMEDELTA SINCE THE FIRST (N=0) MAX/MIN
        tmp_df['long_prev_max_move_pos_date_{}'.format(col)] = mk_long_prev_move_date(tmp_df['prev_max_move_{}'.format(col)],tmp_df['prev_max_{}_date'.format(col)],'pos')
        tmp_df['long_prev_max_move_neg_date_{}'.format(col)] = mk_long_prev_move_date(tmp_df['prev_max_move_{}'.format(col)],tmp_df['prev_max_{}_date'.format(col)],'neg')
        tmp_df['long_prev_min_move_pos_date_{}'.format(col)] = mk_long_prev_move_date(tmp_df['prev_min_move_{}'.format(col)],tmp_df['prev_min_{}_date'.format(col)],'pos')
        tmp_df['long_prev_min_move_neg_date_{}'.format(col)] = mk_long_prev_move_date(tmp_df['prev_min_move_{}'.format(col)],tmp_df['prev_min_{}_date'.format(col)],'neg')
        #WHAT IS THE GRADIENT SINCE THE FIRST (N=0) MAX/MIN
        tmp_df['long_max_grad_pos_{}'.format(col)] = tmp_df['long_prev_max_move_pos_{}'.format(col)] / tmp_df['long_prev_max_move_pos_date_{}'.format(col)]
        tmp_df['long_max_grad_pos_{}'.format(col)] = tmp_df['long_max_grad_pos_{}'.format(col)].fillna(0)
        tmp_df['long_max_grad_neg_{}'.format(col)] = tmp_df['long_prev_max_move_neg_{}'.format(col)] / tmp_df['long_prev_max_move_neg_date_{}'.format(col)]
        tmp_df['long_max_grad_neg_{}'.format(col)] = tmp_df['long_max_grad_neg_{}'.format(col)].fillna(0)
        tmp_df['long_min_grad_pos_{}'.format(col)] = tmp_df['long_prev_min_move_pos_{}'.format(col)] / tmp_df['long_prev_min_move_pos_date_{}'.format(col)]
        tmp_df['long_min_grad_pos_{}'.format(col)] = tmp_df['long_min_grad_pos_{}'.format(col)].fillna(0)
        tmp_df['long_min_grad_neg_{}'.format(col)] = tmp_df['long_prev_min_move_neg_{}'.format(col)] / tmp_df['long_prev_min_move_neg_date_{}'.format(col)]
        tmp_df['long_min_grad_neg_{}'.format(col)] = tmp_df['long_min_grad_neg_{}'.format(col)].fillna(0)
    
    #Calc the value changes and percentage changes of these movements
    def mk_cols_prev_max_min_change(tmp_df,col):
        tmp_df["max_change_{}".format(col)],tmp_df["max_per_change_{}".format(col)] = calc_changes(tmp_df[[col,"prev_max_{}".format(col)]].copy(),col,"prev_max_{}".format(col))
        tmp_df["min_change_{}".format(col)],tmp_df["min_per_change_{}".format(col)] = calc_changes(tmp_df[[col,"prev_min_{}".format(col)]].copy(),col,"prev_min_{}".format(col))
        
    #Mark date change since max and mins and convert to periods
    def mk_cols_prev_max_min_date_change(tmp_df,col,period:int=7):
        tmp_df["prev_max_{}_date_change".format(col)] = tmp_df["date"] - tmp_df["prev_max_{}_date".format(col)]
        tmp_df["prev_min_{}_date_change".format(col)] = tmp_df["date"] - tmp_df["prev_min_{}_date".format(col)]
        #Convert all to period changes
        tmp_df["prev_max_{}_date_change".format(col)] = [np.floor(x.days/period) for x in tmp_df["prev_max_{}_date_change".format(col)]]
        tmp_df["prev_min_{}_date_change".format(col)] = [np.floor(x.days/period) for x in tmp_df["prev_min_{}_date_change".format(col)]]
    
    #Convert the max min changes into pos and neg columns
    def mk_cols_prev_max_min_change_pos_neg(tmp_df,col):
        #Only do pos changes for mins and neg changes for max (as it can only go below the max and above the min)
        tmp_df["max_change_{}_pos_bool".format(col)],tmp_df["max_change_{}_pos_val".format(col)] = pos_neg_cols(tmp_df["max_change_{}".format(col)],gt_lt = "GT")
        tmp_df["max_change_{}_neg_bool".format(col)],tmp_df["max_change_{}_neg_val".format(col)] = pos_neg_cols(tmp_df["max_change_{}".format(col)],gt_lt = "LT")
        tmp_df["min_change_{}_pos_bool".format(col)],tmp_df["min_change_{}_pos_val".format(col)] = pos_neg_cols(tmp_df["min_change_{}".format(col)],gt_lt = "GT")
        tmp_df["min_change_{}_neg_bool".format(col)],tmp_df["min_change_{}_neg_val".format(col)] = pos_neg_cols(tmp_df["min_change_{}".format(col)],gt_lt = "LT")
        #Drop the old columns
        tmp_df.drop(columns=["max_change_{}".format(col),"min_change_{}".format(col)],inplace=True)

    #Run function for columns - high volatility
    for col in ['close','signal_line']:
        mk_cols_max_min(df_out,col,period_high_volatility)
        mk_cols_prev_max_min(df_out,col,period_high_volatility)
        mk_cols_prev_max_min_change(df_out,col) 
        mk_cols_prev_max_min_date_change(df_out,col,7)
        mk_cols_prev_max_min_change_pos_neg(df_out,col)
    #Run function for columns - low volatility
    for col in ['macd','ema26','macd_line']:
        mk_cols_max_min(df_out,col,period_low_volatility)
        mk_cols_prev_max_min(df_out,col,period_low_volatility)
        mk_cols_prev_max_min_change(df_out,col) 
        mk_cols_prev_max_min_date_change(df_out,col,7)
        mk_cols_prev_max_min_change_pos_neg(df_out,col)
    
    #Check for undefined
    df_out["no_signal"] = (df_out["buy"] == False) & (df_out["hold"] == False) & (df_out["sell"] == False)
    
    #Composite all singals into one column
    df_out["signal"] = None
    df_out.loc[df_out["buy"] == True,"signal"] = "buy"
    df_out.loc[df_out["hold"] == True,"signal"] = "hold"
    df_out.loc[df_out["sell"] == True,"signal"] = "sell"
    df_out.drop(columns=["buy","hold","sell"],inplace=True)
    print("SIGNAL COUNTS: \n{}".format(df_out["signal"].value_counts()))
    
    return df_out

In [31]:
#Define the columns for the output
out_cols = [
    #NON-NORMALISED COLS
    "ticker"
    ,"date"
    #NORMALISED COLS
    #Standard features
    ,"open"
    ,"close"
    ,"high"
    ,"low"
    ,"volume"
    ,"change_price"
    ,"per_change_price"
    #Shifted features
    ,"close_shift1"
    ,"change_close_shift1"
    ,"vol_shift1"
    ,"change_vol_shift1"
    ,"ema26_shift1"
    ,"change_ema26_shift1"
    #change to periodic max mins
    ,"close_13_norm"
    ,"close_26_norm"
    ,"close_52_norm"
    ,"macd_line_13_norm"
    ,"macd_line_26_norm"
    ,"macd_line_52_norm"
    #Pos neg features
    ,"macd_pos_val"
    ,"macd_neg_val"
    ,"signal_line_pos_val"
    ,"signal_line_neg_val"
    ,"change_price_pos_val"
    ,"change_price_neg_val"
    ,"per_change_price_pos_val"
    ,"per_change_price_neg_val"
]
#Append additional columns for key areas
for col in ['close','macd','ema26','signal_line','macd_line']:
    out_cols
    #Prev max/min features
    out_cols.append("prev_max_{}".format(col))
    out_cols.append("prev_min_{}".format(col))
    #date changes
    out_cols.append("prev_max_{}_date_change".format(col))
    out_cols.append("prev_min_{}_date_change".format(col))
    #Min max change features
    out_cols.append("max_change_{}_pos_val".format(col))
    out_cols.append("max_change_{}_neg_val".format(col))
    out_cols.append("min_change_{}_pos_val".format(col))
    out_cols.append("min_change_{}_neg_val".format(col))
    #prev max/mins (n-1) - compared to previous
    out_cols.append('prev_max_grad_{}'.format(col))
    out_cols.append('prev_min_grad_{}'.format(col))
    #prev max/mins (n=0) - compared to first in this run
    out_cols.append('max_move_cum_pos_{}'.format(col))
    out_cols.append('max_move_cum_neg_{}'.format(col))
    out_cols.append('min_move_cum_pos_{}'.format(col))
    out_cols.append('min_move_cum_neg_{}'.format(col))
    out_cols.append('long_prev_max_move_pos_date_{}'.format(col))
    out_cols.append('long_prev_max_move_neg_date_{}'.format(col))
    out_cols.append('long_prev_min_move_pos_date_{}'.format(col))
    out_cols.append('long_prev_min_move_neg_date_{}'.format(col))
    out_cols.append('long_max_grad_pos_{}'.format(col))
    out_cols.append('long_max_grad_neg_{}'.format(col))
    out_cols.append('long_min_grad_pos_{}'.format(col))
    out_cols.append('long_min_grad_neg_{}'.format(col))
#Append signal
out_cols.append("signal")

In [32]:
#Then loop the tickers and combine these into one large dataset
hf_store_name = path+'all_hist_prices_w_ft_eng2_TMP.h5'
h_store = pd.HDFStore(hf_store_name)
count = 0
errors = []
run_time = process_time()
for tick in tick_ftse["ticker"]:
    try:
        run_time.show_latest_lap_time()
        run_time.lap()
        count += 1
        print("\n{}".format(count))
        print("RUN FOR {}".format(tick))
        #Isolate this ticker
        this_tick_df = df_prices_w[df_prices_w["ticker"] == re.sub('[^a-zA-Z0-9\-]','',tick)].copy()
        print("shape before: {}".format(this_tick_df.shape))
        #Create the features
        this_tick_df = create_features(this_tick_df)
#         print(this_tick_df[out_cols])
        #Clarify col_lens with cur cols in data
        col_lens_tmp = {}
        for col in out_cols:
            if col in col_lens:
                col_lens_tmp[col] = col_lens[col]
        print("shape after: {}".format(this_tick_df.shape))
        #Append this data to the group
        try:
            this_tick_df[out_cols].to_hdf(hf_store_name,key='weekly_data',append=True,min_itemsize=col_lens_tmp)
            print('ADDED TO {}'.format(hf_store_name))
        except Exception as e:
            print('ERROR READING TO FILE {}'.format(e))
    except Exception as e:
        h_store.close()
        errors.append({"ticker":tick,"Error":e})
        print('ERROR PROCESSING DATA {}'.format(e))
h_store.close()
print('\n\n')
run_time.end()
print('\nERROR COUNT: {}'.format(len(errors)))
if len(errors) > 0:
    print('    ERRORS -> {}'.format(errors))


1
RUN FOR III
shape before: (612, 17)
SIGNAL COUNTS: 
sell    269
buy     185
hold    158
Name: signal, dtype: int64
shape after: (612, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 1 TIME -> 0:0:0

2
RUN FOR ADM
shape before: (368, 17)
SIGNAL COUNTS: 
buy     137
hold    129
sell    102
Name: signal, dtype: int64
shape after: (368, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 2 TIME -> 0:0:2

3
RUN FOR AAL
shape before: (1012, 17)
SIGNAL COUNTS: 
sell    405
buy     320
hold    287
Name: signal, dtype: int64
shape after: (1012, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 3 TIME -> 0:0:2

4
RUN FOR ANTO
shape before: (1050, 17)
SIGNAL COUNTS: 
sell    405
buy     382
hold    263
Name: signal, dtype: int64
shape af

SIGNAL COUNTS: 
sell    372
hold    350
buy     328
Name: signal, dtype: int64
shape after: (1050, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 29 TIME -> 0:0:3

30
RUN FOR DLG
shape before: (362, 17)
SIGNAL COUNTS: 
sell    150
buy     106
hold    106
Name: signal, dtype: int64
shape after: (362, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 30 TIME -> 0:0:3

31
RUN FOR EVR
shape before: (410, 17)
SIGNAL COUNTS: 
hold    151
sell    144
buy     115
Name: signal, dtype: int64
shape after: (410, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 31 TIME -> 0:0:2

32
RUN FOR EXPN
shape before: (675, 17)
SIGNAL COUNTS: 
sell    246
buy     234
hold    195
Name: signal, dtype: int64
shape after: (675, 253)
ADDED TO C:\Users\R

SIGNAL COUNTS: 
buy     277
hold    240
sell    232
Name: signal, dtype: int64
shape after: (749, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 57 TIME -> 0:0:3

58
RUN FOR MNDI
shape before: (630, 17)
SIGNAL COUNTS: 
sell    248
buy     210
hold    172
Name: signal, dtype: int64
shape after: (630, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 58 TIME -> 0:0:3

59
RUN FOR MRW
shape before: (1050, 17)
SIGNAL COUNTS: 
sell    442
hold    312
buy     296
Name: signal, dtype: int64
shape after: (1050, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 59 TIME -> 0:0:3

60
RUN FOR NG
shape before: (1050, 17)
SIGNAL COUNTS: 
sell    373
hold    347
buy     330
Name: signal, dtype: int64
shape after: (1050, 253)
ADDED TO C:\Users

SIGNAL COUNTS: 
sell    380
buy     345
hold    309
Name: signal, dtype: int64
shape after: (1034, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 85 TIME -> 0:0:4

86
RUN FOR SKG
shape before: (653, 17)
SIGNAL COUNTS: 
sell    282
buy     218
hold    153
Name: signal, dtype: int64
shape after: (653, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 86 TIME -> 0:0:4

87
RUN FOR SPX
shape before: (1049, 17)
SIGNAL COUNTS: 
buy     423
sell    353
hold    273
Name: signal, dtype: int64
shape after: (1049, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 87 TIME -> 0:0:3

88
RUN FOR SSE
shape before: (1050, 17)
SIGNAL COUNTS: 
sell    494
buy     306
hold    250
Name: signal, dtype: int64
shape after: (1050, 253)
ADDED TO C:\User

SIGNAL COUNTS: 
sell    406
buy     325
hold    300
Name: signal, dtype: int64
shape after: (1031, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 113 TIME -> 0:0:3

114
RUN FOR BGFD
shape before: (607, 17)
SIGNAL COUNTS: 
buy     230
sell    211
hold    166
Name: signal, dtype: int64
shape after: (607, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 114 TIME -> 0:0:4

115
RUN FOR BAKK
shape before: (96, 17)
SIGNAL COUNTS: 
hold    43
sell    38
buy     15
Name: signal, dtype: int64
shape after: (96, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 115 TIME -> 0:0:3

116
RUN FOR BBY
shape before: (1050, 17)
SIGNAL COUNTS: 
sell    475
buy     322
hold    253
Name: signal, dtype: int64
shape after: (1050, 253)
ADDED TO C:\Use

SIGNAL COUNTS: 
sell    243
buy     239
hold    165
Name: signal, dtype: int64
shape after: (647, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 141 TIME -> 0:0:3

142
RUN FOR CTY
shape before: (612, 17)
SIGNAL COUNTS: 
hold    380
sell    186
buy      46
Name: signal, dtype: int64
shape after: (612, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 142 TIME -> 0:0:4

143
RUN FOR CKN
shape before: (1032, 17)
SIGNAL COUNTS: 
buy     431
sell    389
hold    212
Name: signal, dtype: int64
shape after: (1032, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 143 TIME -> 0:0:4

144
RUN FOR CBG
shape before: (1050, 17)
SIGNAL COUNTS: 
sell    424
buy     322
hold    304
Name: signal, dtype: int64
shape after: (1050, 253)
ADDED TO C:

SIGNAL COUNTS: 
buy     248
sell    234
hold    128
Name: signal, dtype: int64
shape after: (610, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 169 TIME -> 0:0:1

170
RUN FOR EQN
shape before: (203, 17)
SIGNAL COUNTS: 
sell    81
buy     64
hold    58
Name: signal, dtype: int64
shape after: (203, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 170 TIME -> 0:0:2

171
RUN FOR ESNT
shape before: (745, 17)
SIGNAL COUNTS: 
sell    300
buy     229
hold    216
Name: signal, dtype: int64
shape after: (745, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 171 TIME -> 0:0:2

172
RUN FOR ERM
shape before: (1043, 17)
SIGNAL COUNTS: 
sell    407
buy     371
hold    265
Name: signal, dtype: int64
shape after: (1043, 253)
ADDED TO C:\Use

SIGNAL COUNTS: 
sell    444
buy     362
hold    244
Name: signal, dtype: int64
shape after: (1050, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 197 TIME -> 0:0:3

198
RUN FOR GRG
shape before: (1050, 17)
SIGNAL COUNTS: 
buy     426
sell    408
hold    216
Name: signal, dtype: int64
shape after: (1050, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 198 TIME -> 0:0:3

199
RUN FOR GVC
shape before: (733, 17)
SIGNAL COUNTS: 
sell    269
buy     241
hold    223
Name: signal, dtype: int64
shape after: (733, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 199 TIME -> 0:0:3

200
RUN FOR HMSO
shape before: (1048, 17)
SIGNAL COUNTS: 
sell    437
buy     331
hold    280
Name: signal, dtype: int64
shape after: (1048, 253)
ADDED TO 

SIGNAL COUNTS: 
buy     211
sell    185
hold    159
Name: signal, dtype: int64
shape after: (555, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 225 TIME -> 0:0:3

226
RUN FOR JLG
shape before: (240, 17)
SIGNAL COUNTS: 
hold    93
sell    74
buy     73
Name: signal, dtype: int64
shape after: (240, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 226 TIME -> 0:0:3

227
RUN FOR JII
shape before: (611, 17)
SIGNAL COUNTS: 
sell    246
buy     199
hold    166
Name: signal, dtype: int64
shape after: (611, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 227 TIME -> 0:0:2

228
RUN FOR JAM
shape before: (605, 17)
SIGNAL COUNTS: 
hold    212
buy     200
sell    193
Name: signal, dtype: int64
shape after: (605, 253)
ADDED TO C:\Users\

SIGNAL COUNTS: 
sell    465
buy     324
hold    261
Name: signal, dtype: int64
shape after: (1050, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 253 TIME -> 0:0:3

254
RUN FOR NBLS
shape before: (434, 17)
SIGNAL COUNTS: 
sell    213
buy     111
hold    110
Name: signal, dtype: int64
shape after: (434, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 254 TIME -> 0:0:4

255
RUN FOR NRR
shape before: (480, 17)
SIGNAL COUNTS: 
sell    241
buy     120
hold    119
Name: signal, dtype: int64
shape after: (480, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 255 TIME -> 0:0:3

256
RUN FOR NESF
shape before: (277, 17)
SIGNAL COUNTS: 
sell    97
hold    93
buy     87
Name: signal, dtype: int64
shape after: (277, 253)
ADDED TO C:\Use

SIGNAL COUNTS: 
hold    31
sell    22
buy     11
Name: signal, dtype: int64
shape after: (64, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 281 TIME -> 0:0:4

282
RUN FOR RNK
shape before: (1050, 17)
SIGNAL COUNTS: 
sell    462
buy     300
hold    288
Name: signal, dtype: int64
shape after: (1050, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 282 TIME -> 0:0:3

283
RUN FOR RAT
shape before: (1043, 17)
SIGNAL COUNTS: 
sell    453
buy     364
hold    226
Name: signal, dtype: int64
shape after: (1043, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 283 TIME -> 0:0:5

284
RUN FOR RDW
shape before: (1047, 17)
SIGNAL COUNTS: 
sell    389
buy     351
hold    307
Name: signal, dtype: int64
shape after: (1047, 253)
ADDED TO C:\U

SIGNAL COUNTS: 
sell    430
buy     352
hold    267
Name: signal, dtype: int64
shape after: (1049, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 309 TIME -> 0:0:4

310
RUN FOR SPD
shape before: (655, 17)
SIGNAL COUNTS: 
sell    278
buy     222
hold    155
Name: signal, dtype: int64
shape after: (655, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 310 TIME -> 0:0:4

311
RUN FOR SSPG
shape before: (271, 17)
SIGNAL COUNTS: 
buy     112
hold     85
sell     74
Name: signal, dtype: int64
shape after: (271, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 311 TIME -> 0:0:4

312
RUN FOR SMP
shape before: (1043, 17)
SIGNAL COUNTS: 
sell    415
buy     376
hold    252
Name: signal, dtype: int64
shape after: (1043, 253)
ADDED TO C:

SIGNAL COUNTS: 
hold    41
sell    23
buy      8
Name: signal, dtype: int64
shape after: (72, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 337 TIME -> 0:0:4

338
RUN FOR WEIR
shape before: (1048, 17)
SIGNAL COUNTS: 
buy     376
sell    355
hold    317
Name: signal, dtype: int64
shape after: (1048, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 338 TIME -> 0:0:3

339
RUN FOR JDW
shape before: (1046, 17)
SIGNAL COUNTS: 
sell    411
buy     344
hold    291
Name: signal, dtype: int64
shape after: (1046, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 339 TIME -> 0:0:4

340
RUN FOR SMWH
shape before: (684, 17)
SIGNAL COUNTS: 
sell    240
buy     227
hold    217
Name: signal, dtype: int64
shape after: (684, 253)
ADDED TO C:\U

In [33]:
#close any open h5 files
tables.file._open_files.close_all()

In [34]:
tmp_df = pd.read_hdf(hf_store_name,key='weekly_data',mode='r')
print("")
print("FINAL HDFSTORE SIZE: {}".format(tmp_df.shape))
print("FINAL BUY COUNT: {}".format(len(tmp_df[tmp_df["signal"] == "buy"])))
print("FINAL SELL COUNT: {}".format(len(tmp_df[tmp_df["signal"] == "sell"])))
h_store.close()
tmp_df.head(50)
# tmp_df[(tmp_df["ticker"] == 'ADM') & (tmp_df["date"] > '2013-12-01') & (tmp_df["date"] < '2014-02-01')].head(200)


FINAL HDFSTORE SIZE: (258339, 140)
FINAL BUY COUNT: 82965
FINAL SELL COUNT: 98781


Unnamed: 0,ticker,date,open,close,high,low,volume,change_price,per_change_price,close_shift1,...,min_move_cum_neg_macd_line,long_prev_max_move_pos_date_macd_line,long_prev_max_move_neg_date_macd_line,long_prev_min_move_pos_date_macd_line,long_prev_min_move_neg_date_macd_line,long_max_grad_pos_macd_line,long_max_grad_neg_macd_line,long_min_grad_pos_macd_line,long_min_grad_neg_macd_line,signal
119990,III,2007-12-31,0.837127,0.838986,0.855672,0.85312,0.053557,-30.0,-0.030151,,...,1,,,,,0.0,0.0,0.0,0.0,sell
119991,III,2008-01-07,0.809307,0.802912,0.821356,0.811475,0.23639,-43.5,-0.044961,0.838986,...,2,,,,,0.0,0.0,0.0,0.0,sell
119992,III,2008-01-14,0.75822,0.782676,0.767864,0.778785,0.325624,-16.0,-0.017448,0.802912,...,3,,,,,0.0,0.0,0.0,0.0,sell
119993,III,2008-01-21,0.731917,0.797193,0.797134,0.748334,0.264801,26.5,0.029742,0.782676,...,4,,,,,0.0,0.0,0.0,0.0,sell
119994,III,2008-01-28,0.75215,0.835467,0.803189,0.798488,0.173403,50.0,0.054885,0.797193,...,5,,,,,0.0,0.0,0.0,0.0,sell
119995,III,2008-02-04,0.811836,0.802472,0.819843,0.809236,0.226728,-46.5,-0.047938,0.835467,...,6,,,,,0.0,0.0,0.0,0.0,sell
119996,III,2008-02-11,0.751138,0.798953,0.778462,0.801175,0.175053,9.5,0.01044,0.802472,...,7,,,,,0.0,0.0,0.0,0.0,sell
119997,III,2008-02-18,0.762266,0.766838,0.792087,0.777889,0.183115,-38.0,-0.04126,0.798953,...,8,,,,,0.0,0.0,0.0,0.0,sell
119998,III,2008-02-25,0.732423,0.709648,0.738595,0.712062,0.308995,-73.5,-0.082445,0.766838,...,9,,,,,0.0,0.0,0.0,0.0,hold
119999,III,2008-03-03,0.654527,0.674013,0.659366,0.685194,0.277779,-37.0,-0.045427,0.709648,...,10,,,,,0.0,0.0,0.0,0.0,buy


In [35]:
#close any open h5 files
tables.file._open_files.close_all()

In [36]:
#Delete the old h5 file and rename the TMP
try:
    os.remove(path+'all_hist_prices_w_ft_eng2.h5')
    print('\nSUCCESSFULLY REMOVED {}'.format(path+'all_hist_prices_w_ft_eng2.h5'))
except Exception as e:
    print('\nERROR - REMOVING:{}'.format(e))
try:
    os.rename(path+'all_hist_prices_w_ft_eng2_TMP.h5',path+'all_hist_prices_w_ft_eng2.h5')
    print('\nSUCCESSFULLY RENAMED {} TO {}'.format(path+'all_hist_prices_w_ft_eng2_TMP.h5',path+'all_hist_prices_w_ft_eng2.h5'))
except Exception as e:
    print('\nERROR - RENAMING:{}'.format(e))


SUCCESSFULLY REMOVED C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2.h5

SUCCESSFULLY RENAMED C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5 TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2.h5


In [37]:
#Remove 'date' from out_cols
out_cols.remove('date')

In [39]:
#Export a list of the features for this model
file_object = open(path+r'feature_engineering_feature_list.txt','w')
feature_str = ''
for i in out_cols:
    feature_str += '{},'.format(i)
feature_str = feature_str[:-1]
file_object.write(feature_str)
file_object.close()
feature_str

'ticker,open,close,high,low,volume,change_price,per_change_price,close_shift1,change_close_shift1,vol_shift1,change_vol_shift1,ema26_shift1,change_ema26_shift1,close_13_norm,close_26_norm,close_52_norm,macd_line_13_norm,macd_line_26_norm,macd_line_52_norm,macd_pos_val,macd_neg_val,signal_line_pos_val,signal_line_neg_val,change_price_pos_val,change_price_neg_val,per_change_price_pos_val,per_change_price_neg_val,prev_max_close,prev_min_close,prev_max_close_date_change,prev_min_close_date_change,max_change_close_pos_val,max_change_close_neg_val,min_change_close_pos_val,min_change_close_neg_val,prev_max_grad_close,prev_min_grad_close,max_move_cum_pos_close,max_move_cum_neg_close,min_move_cum_pos_close,min_move_cum_neg_close,long_prev_max_move_pos_date_close,long_prev_max_move_neg_date_close,long_prev_min_move_pos_date_close,long_prev_min_move_neg_date_close,long_max_grad_pos_close,long_max_grad_neg_close,long_min_grad_pos_close,long_min_grad_neg_close,prev_max_macd,prev_min_macd,prev_max_m