# Create indicators and find signals
This code is designed to calculate the features for model training.

We will also bring in if the target price was hit within 8 weeks for ML training and validation.

Each share has it's own trading pattern so we will train a different model for each share but use the same set of features, just the hyper paramenters will be tuned differenty.

V5 build - Normalise vs rolling max 5 year window

In [1]:
#Import libraries
import pandas as pd
import numpy as np
import re
import tables
import os
import datetime as dt
from rf_modules import *

In [2]:
#Import the ftse list
path = r"C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices"
tick_ftse = pd.read_csv(path + r"\tick_ftse.csv")
tick_ftse = tick_ftse.iloc[:,1:]
for col in tick_ftse:
    tick_ftse.rename(columns={col:clean_col_name(col)},inplace=True)
tick_ftse.head()

Unnamed: 0,ticker,company,index
0,III,3I GRP.,FTSE100
1,ABF,A.B.FOOD,FTSE100
2,ADM,ADMIRAL GRP,FTSE100
3,AAL,ANGLO AMERICAN,FTSE100
4,ANTO,ANTOFAGASTA,FTSE100


In [3]:
#Import and combine prices files
df_prices_w = pd.read_hdf(path + r"\all_hist_prices_w.h5")
#Rename columns
for col in df_prices_w:
    df_prices_w.rename(columns={col:clean_col_name(col)},inplace=True)
#Drop unwanted columns
try:
    df_prices_w.drop(columns=["unnamed_0","index"],inplace=True)
except Exception as e:
    print(e)
#Reformat columns where neccessary
df_prices_w["date"] = df_prices_w["date"].astype("datetime64")
print(df_prices_w.shape)
print(df_prices_w.dtypes)
df_prices_w.head()

"['unnamed_0' 'index'] not found in axis"
(275683, 13)
ticker                 object
date           datetime64[ns]
high                  float64
low                   float64
volume                float64
open                  float64
close                 float64
change                float64
ema12                 float64
ema26                 float64
macd_line             float64
signal_line           float64
macd                  float64
dtype: object


Unnamed: 0,ticker,date,high,low,volume,open,close,change,ema12,ema26,macd_line,signal_line,macd
0,III,2007-12-31,1023.0,964.0,4511565.0,995.0,965.0,-30.0,,,,,
1,III,2008-01-07,989.0,917.5,16056554.0,967.5,924.0,-43.5,,,,,
2,III,2008-01-14,936.0,881.0,21691287.0,917.0,901.0,-16.0,,,,,
3,III,2008-01-21,965.0,847.0,17850580.0,891.0,917.5,26.5,,,,,
4,III,2008-01-28,971.0,903.0,12079245.0,911.0,961.0,50.0,,,,,


In [4]:
#Remove tickers with fewer than 34 entries as this is where the MACD can be calculated
print('START ROW COUNT -> {}'.format(df_prices_w.shape[0]))
print('START TICK COUNT -> {}'.format(tick_ftse.shape[0]))
for tick in tick_ftse.ticker:
    print(tick,' ->',df_prices_w[df_prices_w.ticker == tick].shape[0])
    if df_prices_w[df_prices_w.ticker == tick].shape[0] < 34:
        #Remove from dataframe
        print('\nTOO FEW RECORDS FOR {}'.format(tick))
        df_prices_w = df_prices_w.loc[df_prices_w.ticker != tick]
        print('\tNEW ROW COUNT -> {}'.format(df_prices_w.shape[0]))
        #Remove from tick_ftse
        tick_ftse = tick_ftse.loc[tick_ftse.ticker != tick]
        print('\tNEW TICK COUNT -> {}'.format(tick_ftse.shape[0]))
print('\nEND ROW COUNT -> {}'.format(df_prices_w.shape[0]))
print('END TICK COUNT -> {}'.format(tick_ftse.shape[0]))

START ROW COUNT -> 275683
START TICK COUNT -> 351
III  -> 613
ABF  -> 1045
ADM  -> 353
AAL  -> 1008
ANTO  -> 1045
AHT  -> 1023
AZN  -> 1045
AUTO  -> 238
AVV  -> 1045
AV.  -> 1292
BA.  -> 1534
BARC  -> 1045
BDEV  -> 1045
BKG  -> 1045
BHP  -> 524
BP.  -> 1565
BATS  -> 1045
BLND  -> 1045
BT.A  -> 0

TOO FEW RECORDS FOR BT.A
	NEW ROW COUNT -> 275683
	NEW TICK COUNT -> 350
BNZL  -> 1042
BRBY  -> 900
CCL  -> 989
CNA  -> 1045
CCH  -> 336
CPG  -> 1031
CRH  -> 1031
CRDA  -> 1037
DCC  -> 1027
DGE  -> 1045
EVR  -> 414
EXPN  -> 678
FERG  -> 1046
FLTR  -> 18

TOO FEW RECORDS FOR FLTR
	NEW ROW COUNT -> 275665
	NEW TICK COUNT -> 349
FRES  -> 596
GSK  -> 1045
GLEN  -> 438
HLMA  -> 1013
HL.  -> 641
HIK  -> 708
HSX  -> 1012
HSBA  -> 1256
IMB  -> 1180
INF  -> 1066
IHG  -> 844
ITRK  -> 884
IAG  -> 856
ITV  -> 1012
JD.  -> 82
JMAT  -> 1237
JE.  -> 288
KGF  -> 1256
LAND  -> 1264
LGEN  -> 1256
LLOY  -> 1222
LSE  -> 932
MGGT  -> 1475
MRO  -> 812
MNDI  -> 614
MRW  -> 1256
NG.  -> 1243
NXT  -> 1523
NMC  -> 373


In [5]:
#Function for calculating ema
def calc_ema(_s_in,_periods):
    #Calc mod val
    _mod = 2/(_periods+1)
    #Calc sma
    _sma_s = [0] * _s_in.shape[0]
    for _i in range(0,_periods):
        _sma_s += _s_in.shift(_i) / _periods
    #Calc ema
    _ema_s = _sma_s.copy()
    _ema_s[(_ema_s > 0) & (np.isnan(_ema_s) == False)] = _mod*(_s_in - _ema_s.shift(1)) + _ema_s.shift(1)
    return _ema_s.copy()

In [6]:
#Function for calculating the MACD
def calc_macd(_ema_lng_s,_ema_sht_s,_sig_period:int):
    #Make a df
    _tmp_df = pd.DataFrame([])
    _tmp_df['ema_lng'] = _ema_lng_s
    _tmp_df['ema_sht'] = _ema_sht_s
    #Calc the signal line
    _tmp_df['macd_line'] = _tmp_df['ema_sht'] - _tmp_df['ema_lng']
    _tmp_df['signal_line'] = calc_ema(_tmp_df['macd_line'],_sig_period)
    _tmp_df['macd_hist'] = _tmp_df['macd_line'] - _tmp_df['signal_line']
    return (_tmp_df['macd_line'].copy(),_tmp_df['signal_line'].copy(),_tmp_df['macd_hist'].copy())

In [7]:
#Calc the ema and macds for the data
def calc_ema_macd(_df_in):
    _tick_df = _df_in.copy()
    try:
        #Add in the ema and macd
        _tick_df = _tick_df.sort_values(by='date')
#         _tick_df = _tick_df.reset_index(drop=True)
        _tick_df['ema12'] = calc_ema(_tick_df['close'],12)
        _tick_df['ema26'] = calc_ema(_tick_df['close'],26)
        _tick_df['macd_line'],_tick_df['signal_line'],_tick_df['macd'] = calc_macd(_tick_df['ema26'],_tick_df['ema12'],9)
        return _tick_df
    except Exception as e:
        print('ERROR:{}'.format(e))
        return _tick_df

In [8]:
#Create a function which normalises a feature based only on the values which have come before it - avoids time series bias
def norm_time_s(_ind,_s_in,_window):
    _this_ind = _ind - _s_in.index.min()
    if _this_ind < _window:
        _min_ind = 0
    else:
        _min_ind = _this_ind - _window
    _min = np.nanmin(_s_in[_min_ind:_this_ind+1].values)
    _max = np.nanmax(_s_in[_min_ind:_this_ind+1].values)
    _norm_val = (_s_in[_ind] - _min) / (_max - _min)
    return _norm_val

In [9]:
#Runt he functions
def norm_prices(_df_in):
    _df_out = _df_in.copy()
    #Normalise the columns which need it
    _norm_cols = [
        #Standard features
        "open"
        ,"close"
        ,"high"
        ,"low"
        ,"volume"
    ]
    #Reset the index
    _df_out.sort_values(['date'],ascending=True,inplace=True)
    #Calc the 5yr window
    _5yr = 5*52
    #Normalise
    for _col in _norm_cols:
        _tmp_s = _df_out[_col].copy() #Take a copy so as the values are changed this does not affect following calculations
        _df_out[_col] = [norm_time_s(_x,_tmp_s,_5yr) for _x in _df_out.index]
    return _df_out

In [10]:
df_prices_w[df_prices_w.ticker == 'III'].head()

Unnamed: 0,ticker,date,high,low,volume,open,close,change,ema12,ema26,macd_line,signal_line,macd
0,III,2007-12-31,1023.0,964.0,4511565.0,995.0,965.0,-30.0,,,,,
1,III,2008-01-07,989.0,917.5,16056554.0,967.5,924.0,-43.5,,,,,
2,III,2008-01-14,936.0,881.0,21691287.0,917.0,901.0,-16.0,,,,,
3,III,2008-01-21,965.0,847.0,17850580.0,891.0,917.5,26.5,,,,,
4,III,2008-01-28,971.0,903.0,12079245.0,911.0,961.0,50.0,,,,,


In [11]:
#Normalize the prices by ticker and time then create emas and macds for each ticker
df_prices_w = df_prices_w.sort_values(['ticker','date'],ascending=[True,True])
df_prices_w.reset_index(inplace=True,drop=True)
print('NORALISING AND CALCULATING EMA & MACD VALUES')
count = 0
error_li = []
run_time = process_time()
run_time.lap()
for tick in tick_ftse.ticker:
    count += 1
    print('\nRUN FOR {} - {}'.format(tick,count))
    try:
        this_tick_df = df_prices_w[df_prices_w.ticker == tick]
        this_tick_df = norm_prices(this_tick_df.copy())
        #Calculate the ema and macd
        this_tick_df = calc_ema_macd(this_tick_df)
        #Append back on to the dataframe
#         df_prices_w[df_prices_w.ticker == tick] = this_tick_df.copy()
        print('\tSUCCESS')
        run_time.lap()
        run_time.show_latest_lap_time()
    except Exception as e:
        print('\tERROR -> {}'.format(e))
        error_li.append(e)
run_time.end()
print('\n\nCOMPLETED - ERRORS ENCOUNTERED -> {}'.format(len(error_li)))
if len(error_li) > 0:
    print(error_li)

NORALISING AND CALCULATING EMA & MACD VALUES

RUN FOR III - 1


  # Remove the CWD from sys.path while we load stuff.


	SUCCESS
LAP 2 TIME -> 0:0:1

RUN FOR ABF - 2
	SUCCESS
LAP 3 TIME -> 0:0:2

RUN FOR ADM - 3
	SUCCESS
LAP 4 TIME -> 0:0:0

RUN FOR AAL - 4
	SUCCESS
LAP 5 TIME -> 0:0:2

RUN FOR ANTO - 5
	SUCCESS
LAP 6 TIME -> 0:0:2

RUN FOR AHT - 6
	SUCCESS
LAP 7 TIME -> 0:0:2

RUN FOR AZN - 7
	SUCCESS
LAP 8 TIME -> 0:0:2

RUN FOR AUTO - 8
	SUCCESS
LAP 9 TIME -> 0:0:0

RUN FOR AVV - 9
	SUCCESS
LAP 10 TIME -> 0:0:2

RUN FOR AV. - 10
	SUCCESS
LAP 11 TIME -> 0:0:3

RUN FOR BA. - 11
	SUCCESS
LAP 12 TIME -> 0:0:3

RUN FOR BARC - 12
	SUCCESS
LAP 13 TIME -> 0:0:2

RUN FOR BDEV - 13
	SUCCESS
LAP 14 TIME -> 0:0:2

RUN FOR BKG - 14
	SUCCESS
LAP 15 TIME -> 0:0:2

RUN FOR BHP - 15
	SUCCESS
LAP 16 TIME -> 0:0:1

RUN FOR BP. - 16
	SUCCESS
LAP 17 TIME -> 0:0:3

RUN FOR BATS - 17
	SUCCESS
LAP 18 TIME -> 0:0:2

RUN FOR BLND - 18
	SUCCESS
LAP 19 TIME -> 0:0:2

RUN FOR BNZL - 19
	SUCCESS
LAP 20 TIME -> 0:0:2

RUN FOR BRBY - 20
	SUCCESS
LAP 21 TIME -> 0:0:2

RUN FOR CCL - 21
	SUCCESS
LAP 22 TIME -> 0:0:2

RUN FOR CNA - 22


	SUCCESS
LAP 169 TIME -> 0:0:1

RUN FOR EQN - 169
	SUCCESS
LAP 170 TIME -> 0:0:0

RUN FOR ESNT - 170
	SUCCESS
LAP 171 TIME -> 0:0:1

RUN FOR ERM - 171
	SUCCESS
LAP 172 TIME -> 0:0:2

RUN FOR FCIT - 172
	SUCCESS
LAP 173 TIME -> 0:0:1

RUN FOR FDM - 173
	SUCCESS
LAP 174 TIME -> 0:0:0

RUN FOR FXPO - 174
	SUCCESS
LAP 175 TIME -> 0:0:1

RUN FOR FEV - 175
	SUCCESS
LAP 176 TIME -> 0:0:1

RUN FOR FSV - 176
	SUCCESS
LAP 177 TIME -> 0:0:1

RUN FOR FCSS - 177
	SUCCESS
LAP 178 TIME -> 0:0:1

RUN FOR FGT - 178
	SUCCESS
LAP 179 TIME -> 0:0:1

RUN FOR FGP - 179
	SUCCESS
LAP 180 TIME -> 0:0:2

RUN FOR FSJ - 180
	SUCCESS
LAP 181 TIME -> 0:0:2

RUN FOR FSFL - 181
	SUCCESS
LAP 182 TIME -> 0:0:0

RUN FOR FUTR - 182
	SUCCESS
LAP 183 TIME -> 0:0:2

RUN FOR GFS - 183
	SUCCESS
LAP 184 TIME -> 0:0:1

RUN FOR GFRD - 184
	SUCCESS
LAP 185 TIME -> 0:0:3

RUN FOR GAW - 185
	SUCCESS
LAP 186 TIME -> 0:0:2

RUN FOR GCP - 186
	SUCCESS
LAP 187 TIME -> 0:0:1

RUN FOR DIGS - 187
	SUCCESS
LAP 188 TIME -> 0:0:0

RUN FOR GS

	SUCCESS
LAP 332 TIME -> 0:0:0

RUN FOR VOF - 332
	SUCCESS
LAP 333 TIME -> 0:0:1

RUN FOR VVO - 333
	SUCCESS
LAP 334 TIME -> 0:0:0

RUN FOR WEIR - 334
	SUCCESS
LAP 335 TIME -> 0:0:3

RUN FOR JDW - 335
	SUCCESS
LAP 336 TIME -> 0:0:3

RUN FOR SMWH - 336
	SUCCESS
LAP 337 TIME -> 0:0:1

RUN FOR WMH - 337
	SUCCESS
LAP 338 TIME -> 0:0:2

RUN FOR WTAN - 338
	SUCCESS
LAP 339 TIME -> 0:0:1

RUN FOR WIZZ - 339
	SUCCESS
LAP 340 TIME -> 0:0:0

RUN FOR WG. - 340
	SUCCESS
LAP 341 TIME -> 0:0:2

RUN FOR WKP - 341
	SUCCESS
LAP 342 TIME -> 0:0:2

RUN FOR WWH - 342
	SUCCESS
LAP 343 TIME -> 0:0:1
TOTAL ELAPSED TIME -> 0:10:59


COMPLETED - ERRORS ENCOUNTERED -> 0


In [12]:
#Get in-row price change
def calc_changes(df_in,var_col,prev_col):
    df_in["change"] = df_in[var_col] - df_in[prev_col]
    df_in["per_change"] = df_in["change"] / df_in[prev_col]
    return (df_in["change"],df_in["per_change"])

#Relabel col names
for col in df_prices_w:
    df_prices_w.rename(columns={col:col.lower()},inplace=True)
    
df_prices_w["change_price"],df_prices_w["per_change_price"] = calc_changes(df_prices_w[["close","open"]].copy(),"close","open")
df_prices_w.drop(columns=["change"],inplace=True)
df_prices_w.head()

Unnamed: 0,ticker,date,high,low,volume,open,close,ema12,ema26,macd_line,signal_line,macd,change_price,per_change_price
0,3IN,2007-12-31,149.89,147.07,1373801.0,149.18,147.43,,,,,,-1.75,-0.011731
1,3IN,2008-01-07,149.54,147.07,2345191.0,147.07,148.83,,,,,,1.76,0.011967
2,3IN,2008-01-14,150.59,147.43,2150049.0,148.48,149.18,,,,,,0.7,0.004714
3,3IN,2008-01-21,154.82,145.32,3070968.0,147.78,152.0,,,,,,4.22,0.028556
4,3IN,2008-01-28,154.82,148.83,2510972.0,149.89,152.0,,,,,,2.11,0.014077


In [13]:
df_prices_w = df_prices_w.sort_values(['ticker','date'],ascending=[True,True])
df_prices_w.reset_index(inplace=True,drop=True)
print(df_prices_w.ticker.unique())
df_prices_w.head()

['3IN' 'AAL' 'ABF' 'ADM' 'AGK' 'AGR' 'AHT' 'AJB' 'AML' 'ANTO' 'APAX'
 'ASCL' 'ASHM' 'ASL' 'ATST' 'AUTO' 'AV.' 'AVST' 'AVV' 'AZN' 'BA.' 'BAB'
 'BAG' 'BAKK' 'BARC' 'BATS' 'BBA' 'BBGI' 'BBOX' 'BBY' 'BCA' 'BDEV' 'BEZ'
 'BGEO' 'BGFD' 'BGSC' 'BHP' 'BKG' 'BLND' 'BME' 'BNKR' 'BNZL' 'BOY' 'BP.'
 'BRBY' 'BRSC' 'BRW' 'BVIC' 'BVS' 'BWY' 'BYG' 'CAPC' 'CARD' 'CBG' 'CCC'
 'CCFS' 'CCH' 'CCL' 'CEY' 'CINE' 'CKN' 'CLDN' 'CLI' 'CNA' 'CNE' 'COA'
 'COB' 'CPG' 'CPI' 'CRDA' 'CRH' 'CRST' 'CSP' 'CTEC' 'CTY' 'CWK' 'CYBG'
 'DC.' 'DCC' 'DGE' 'DIGS' 'DJAN' 'DLG' 'DLN' 'DNLM' 'DOM' 'DPH' 'DPLM'
 'DRX' 'ECM' 'EDIN' 'EIG' 'ELM' 'EMG' 'ENOG' 'EQN' 'ERM' 'ESNT' 'ETO'
 'EVR' 'EXPN' 'EZJ' 'FCIT' 'FCSS' 'FDM' 'FERG' 'FEV' 'FGP' 'FGT' 'FOUR'
 'FRES' 'FSFL' 'FSJ' 'FSV' 'FUTR' 'FXPO' 'GAW' 'GCP' 'GFRD' 'GFS' 'GFTU'
 'GLEN' 'GLO' 'GNC' 'GNK' 'GNS' 'GOG' 'GPOR' 'GRG' 'GRI' 'GSK' 'GSS' 'GVC'
 'HAS' 'HFG' 'HGT' 'HICL' 'HIK' 'HILS' 'HL.' 'HLMA' 'HMSO' 'HOC' 'HRI'
 'HSBA' 'HSTG' 'HSV' 'HSX' 'HTG' 'HVPE' 'HWDN' 'IAG' 'IBST' 'ICP' 'I

Unnamed: 0,ticker,date,high,low,volume,open,close,ema12,ema26,macd_line,signal_line,macd,change_price,per_change_price
0,3IN,2007-12-31,149.89,147.07,1373801.0,149.18,147.43,,,,,,-1.75,-0.011731
1,3IN,2008-01-07,149.54,147.07,2345191.0,147.07,148.83,,,,,,1.76,0.011967
2,3IN,2008-01-14,150.59,147.43,2150049.0,148.48,149.18,,,,,,0.7,0.004714
3,3IN,2008-01-21,154.82,145.32,3070968.0,147.78,152.0,,,,,,4.22,0.028556
4,3IN,2008-01-28,154.82,148.83,2510972.0,149.89,152.0,,,,,,2.11,0.014077


# Create buy signals, and sell signals
I am classifying a stock worth buying if it meets all the below criteria:
- The target price (previous max) is hit within x (set as variable target_price_period) periods proceeding this period
- The target price is an increase of x% (set as variable min_gain) over the proceeding day's open price
- There is a drop in the closing price of less x% (set as variable max_drop) between this period and the x periods proceeding this period

I am classifying the sell signals as:
- The close price dips below the target price (previous max) x (set as variable target_price_period) periods proceeding this period
- There is an increase in the closing price of less x% (set as variable max_drop) over the next x periods

In [14]:
#Define the variables
target_price_period = 12
period_high_volatility = 3
period_low_volatility = 1
min_gain = 0.1
max_drop = -0.05

In [15]:
#Programming note
#df.shift(1) looks 1 period into the past
#df.shift(-1) looks 1 period into the future

In [16]:
#Check if the target price is hit within the target_price_period
def min_gain_check(_var_s,_target_s,_periods:int=12):
    _check_s = [False] * _var_s.shape[0]
    for _i in range(1,_periods+1):
        _tmp_check_s = _var_s.shift(-_i) > _target_s #True if price is >= limit
        _check_s = _check_s | _tmp_check_s
    return _check_s

In [17]:
def max_drop_check(_var_s,_target_s,periods:int=12):
    _check_s = [False] * _var_s.shape[0]
    for _i in range(1,periods+1):
        _tmp_check_s = _var_s.shift(-_i) < _target_s #True if price is <= limit
        _check_s = _check_s | _tmp_check_s
    return _check_s

In [18]:
def close_vs_close(_var_s,_shift:int=1):
    _check_s = _var_s.shift(_shift) - _var_s
    return _check_s

In [19]:
#Create a function for finding buy signals
def get_buys(var_s):
    
    #Check if the target price is hit within the target_price_period
    target_s = var_s * (1+min_gain)
    min_gain_s = min_gain_check(var_s,target_s,target_price_period) == True #Function returns True when min_gain is hit
    print('BUY min_gain_s -> {}'.format(min_gain_s[min_gain_s == True].shape))
    
    #Check if the sell price is hit within the target_price_period
    target_s = var_s * (1+max_drop)
    max_drop_s = max_drop_check(var_s,target_s,target_price_period) == False #Function returns False when does not go below target
    print('BUY max_drop_s -> {}'.format(max_drop_s[max_drop_s == True].shape))
    
    #Check if the following day is a positive change on today's close price
    close_vs_close_pos_s = close_vs_close(var_s,-1) > 0
    print('BUY close_vs_close_pos_s -> {}'.format(close_vs_close_pos_s[max_drop_s == True].shape))
    
    #Find the buy signals
    s_out = min_gain_s & max_drop_s & close_vs_close_pos_s
    print('BUY ALL -> {}'.format(s_out[s_out == True].shape))
    
    return s_out

In [20]:
#Function for finding sell signals
def get_sells(var_s):
    
    #Check if the target price is hit within the target_price_period
    target_s = var_s * (1+max_drop)
    max_drop_s = max_drop_check(var_s,target_s,target_price_period) == True #Function returns True when max_drop is hit
    print('SELL max_drop_s -> {}'.format(max_drop_s[max_drop_s == True].shape))
    
    #Perform if the target is crossed again
    target_s = var_s * (1+min_gain)
    min_gain_s = min_gain_check(var_s,target_s,target_price_period) == False #Function returns False when min_gain is not hit
    print('SELL min_gain_s -> {}'.format(min_gain_s[min_gain_s == True].shape))
    
    #Check if the following day is a negative change on today's close price
    close_vs_close_neg_s = close_vs_close(var_s,-1) < 0
    print('SELL close_vs_close_pos_s -> {}'.format(close_vs_close_neg_s[max_drop_s == True].shape))
    
    #Find the sell signals
    s_out = max_drop_s & min_gain_s & close_vs_close_neg_s
    print('SELL ALL -> {}'.format(s_out[s_out == True].shape))
    
    return s_out

In [21]:
#Get buy signals
df_prices_w['buy'] = get_buys(df_prices_w['close'])

#Get sell signals
df_prices_w['sell'] = get_sells(df_prices_w['close'])

#Get hold signals
df_prices_w["hold"] = (df_prices_w["buy"] == False) & (df_prices_w["sell"] == False)

print('BUY PERCENTAGE -> {:.2f}%'.format(df_prices_w[df_prices_w['buy'] == True].shape[0]*100/df_prices_w.shape[0]))
print('SELL PERCENTAGE -> {:.2f}%'.format(df_prices_w[df_prices_w['sell'] == True].shape[0]*100/df_prices_w.shape[0]))
print('HOLD PERCENTAGE -> {:.2f}%'.format(df_prices_w[df_prices_w['hold'] == True].shape[0]*100/df_prices_w.shape[0]))

BUY min_gain_s -> (107516,)
BUY max_drop_s -> (141432,)
BUY close_vs_close_pos_s -> (141432,)
BUY ALL -> (59397,)
SELL max_drop_s -> (134117,)
SELL min_gain_s -> (168033,)
SELL close_vs_close_pos_s -> (134117,)
SELL ALL -> (69571,)
BUY PERCENTAGE -> 21.56%
SELL PERCENTAGE -> 25.25%
HOLD PERCENTAGE -> 53.20%


In [22]:
df_prices_w[df_prices_w.ticker=='SBRY'].iloc[-5:]

Unnamed: 0,ticker,date,high,low,volume,open,close,ema12,ema26,macd_line,signal_line,macd,change_price,per_change_price,buy,sell,hold
210943,SBRY,2019-09-23,222.8,212.31,45313711.0,218.9,222.5,207.077519,213.667923,-6.590404,-12.409913,5.819509,3.6,0.016446,False,False,True
210944,SBRY,2019-09-30,223.1,219.8,15534699.0,219.8,222.1,209.38867,214.292521,-4.903851,-10.908701,6.004849,2.3,0.010464,False,False,True
210945,SBRY,2019-09-30,223.1,219.8,15534699.0,219.8,219.8,210.990413,214.700483,-3.71007,-9.468974,5.758905,0.0,0.0,True,False,False
210946,SBRY,2019-09-30,223.1,219.8,15534699.0,221.9,222.1,212.69958,215.248595,-2.549015,-8.084983,5.535968,0.2,0.000901,False,False,True
210947,SBRY,2019-09-30,223.1,219.8,15534699.0,221.9,219.8,213.791953,215.585736,-1.793784,-6.826743,5.032959,-2.1,-0.009464,True,False,False


# Create additional features

In [23]:
#Mark minimums and maximums
def flag_mins(s_in,period):
    s_out = 0
    for i in range(1,period):
        s_out += (s_in > s_in.shift(i)) & (s_in.shift(-i).isnull() == False) #Is this value > the previous value
        s_out += (s_in > s_in.shift(-i)) | (s_in.shift(-i).isnull()) #Is this value > the next value OR is it the last available value (IE the next value is null)
        s_out += s_in.isnull()
#     s_out[((s_in.index == 0) & (s_in.isnull() == False)) | ((s_in.shift(1).isnull()) & (s_in.isnull() == False))] = 0 #set the first non-NaN result to 0
    s_out = s_out == 0
    return s_out
def flag_maxs(s_in,period):
    s_out = 0
    for i in range(1,period):
        s_out += (s_in < s_in.shift(i)) & (s_in.shift(-i).isnull() == False) #Is this value < the previous value (if the previous value is not null)
        s_out += (s_in < s_in.shift(-i)) | (s_in.shift(-i).isnull()) #Is this value < the next value OR is it the last available value (IE the next value is null)
        s_out += s_in.isnull()
#     s_out[((s_in.index == 0) & (s_in.isnull() == False)) | ((s_in.shift(1).isnull()) & (s_in.isnull() == False))] = 0 #set the first non-NaN result to 0
    s_out = s_out == 0
    return s_out

In [24]:
#Calc vol as proportion of previous n-rows
def calc_prop_of_prev(s_in,periods:int = 4):
    s_cum = s_in.copy()
    for i in range(1,periods):
        s_cum += s_in.shift(i)
    return s_in / s_cum

In [25]:
#Mark points of macd positive entry
def pos_entry(s_in):
    return (s_in > s_in.shift(1)) & (s_in > 0) & (s_in.shift(1) < 0)
def neg_entry(s_in):
    return (s_in < s_in.shift(1)) & (s_in < 0) & (s_in.shift(1) > 0)

In [26]:
#Function to normalise current price compared to another
def norm_s(s_in,max_in,min_in):
    s_out = (s_in - min_in) / (max_in - min_in)
    s_out.fillna(1.0,inplace=True)
    return s_out

In [27]:
#Function to find last max and mins
def prev_value(df_in,var_col,bool_col):
    df_in["prev_val"] = df_in.loc[df_in[bool_col],var_col]
    df_in["prev_val"] = df_in["prev_val"].fillna(method='ffill')
    df_in["prev_marker_date"] = df_in.loc[df_in[bool_col],"date"]
    df_in["prev_marker_date"] = df_in["prev_marker_date"].fillna(method='ffill')
    return (df_in["prev_val"],df_in["prev_marker_date"])

In [28]:
#Create separate columns for pos and neg values - allows for normalisation
def pos_neg_cols(s_in,gt_lt = "GT"):
    if gt_lt.upper() == "GT":
        bool_s = s_in >= 0
    elif gt_lt.upper() == "LT":
        bool_s = s_in <= 0
    df_out = s_in.to_frame()
    df_out["s_in"] = s_in
    df_out["val"] = abs(s_in[bool_s])
    val_s = df_out["val"].fillna(0,method=None)
    return (bool_s,val_s)

In [29]:
#Function for calculating the max and mins within a period
def max_min_period(s_in,periods:int=4):
    #Copy into max and min
    max_s = s_in.copy()
    min_s = s_in.copy()
    #Go through periods and mark where it is a new max or min
    for i in range(1,(periods+1)):
        #Calc max
        tmp_check_s = s_in.shift(i) >= max_s
        max_s[tmp_check_s] = s_in.shift(i)
        #Calc min
        tmp_check_s = s_in.shift(i) <= min_s
        min_s[tmp_check_s] = s_in.shift(i)
    #Normalise the series
    s_out = norm_s(s_in,max_s,min_s)
    return s_out

In [30]:
#Create a dictionary of max character lengths of fields for use later in h5 file appending
def get_col_lens(_df_in):
    _col_lens = {}
    for c in _df_in:
        _tmp_s = pd.Series([len(str(x)) for x in _df_in[c]])
        _col_lens[c] = _tmp_s.max()
    return _col_lens
col_lens = get_col_lens(df_prices_w)
col_lens

{'ticker': 4,
 'date': 19,
 'high': 8,
 'low': 8,
 'volume': 13,
 'open': 8,
 'close': 8,
 'ema12': 18,
 'ema26': 18,
 'macd_line': 23,
 'signal_line': 23,
 'macd': 23,
 'change_price': 21,
 'per_change_price': 23,
 'buy': 5,
 'sell': 5,
 'hold': 5}

In [31]:
#Create a single function to run each stock through feature creation
def create_features(df_in):  
    
    df_out = df_in.copy() 
    
    #Calc vol as proportion of previous n-rows
    df_out["prop_vol"] = calc_prop_of_prev(df_out["volume"].copy().astype("float"),6)

    #Get period-period changes
    df_out["close_shift1"] = df_out["close"].shift(1)
    df_out["change_close_shift1"],df_out["per_change_close_shift1"] = calc_changes(df_out[["close","close_shift1"]].copy(),"close","close_shift1")

    df_out["vol_shift1"] = df_out["volume"].shift(1)
    df_out["change_vol_shift1"],df_out["per_change_vol_shift1"] = calc_changes(df_out[["volume","vol_shift1"]].copy(),"volume","vol_shift1")

    df_out["macd_shift1"] = df_out["macd"].shift(1)
    df_out["change_macd_shift1"],df_out["per_change_macd_shift1"] = calc_changes(df_out[["macd","vol_shift1"]].copy(),"macd","vol_shift1")

    df_out["ema26_shift1"] = df_out["ema26"].shift(1)
    df_out["change_ema26_shift1"],df_out["per_change_ema26_shift1"] = calc_changes(df_out[["ema26","ema26_shift1"]].copy(),"ema26","ema26_shift1")
    
    #Compare close to the max/mins within 13, 26 and 52 periods
    df_out["close_13_norm"] = max_min_period(df_out["close"],13)
    df_out["close_26_norm"] = max_min_period(df_out["close"],26)
    df_out["close_52_norm"] = max_min_period(df_out["close"],52)
    
    #Compare macd to the max/mins within 13, 26 and 52 periods
    df_out["macd_line_13_norm"] = max_min_period(df_out["macd_line"],13)
    df_out["macd_line_26_norm"] = max_min_period(df_out["macd_line"],26)
    df_out["macd_line_52_norm"] = max_min_period(df_out["macd_line"],52)
        
    #Mark points of macd positive entry
    df_out["macd_pos_ent"] = pos_entry(df_out["macd"])
    df_out["macd_neg_ent"] = neg_entry(df_out["macd"])
    
    #Create separate columns for pos and neg values - allows for normalisation
    df_out["macd_pos_bool"],df_out["macd_pos_val"] = pos_neg_cols(df_out["macd"],gt_lt = "GT")
    df_out["macd_neg_bool"],df_out["macd_neg_val"] = pos_neg_cols(df_out["macd"],gt_lt = "LT")
    df_out["signal_line_pos_bool"],df_out["signal_line_pos_val"] = pos_neg_cols(df_out["signal_line"],gt_lt = "GT")
    df_out["signal_line_neg_bool"],df_out["signal_line_neg_val"] = pos_neg_cols(df_out["signal_line"],gt_lt = "LT")
    df_out["change_price_pos_bool"],df_out["change_price_pos_val"] = pos_neg_cols(df_out["change_price"],gt_lt = "GT")
    df_out["change_price_neg_bool"],df_out["change_price_neg_val"] = pos_neg_cols(df_out["change_price"],gt_lt = "LT")
    df_out["per_change_price_pos_bool"],df_out["per_change_price_pos_val"] = pos_neg_cols(df_out["per_change_price"],gt_lt = "GT")
    df_out["per_change_price_neg_bool"],df_out["per_change_price_neg_val"] = pos_neg_cols(df_out["per_change_price"],gt_lt = "LT")

    #Create max min columns
    def mk_cols_max_min(tmp_df,col,period:int=4):
        tmp_df["{}_min".format(col)] = flag_mins(tmp_df[col],period)
        tmp_df["{}_max".format(col)] = flag_maxs(tmp_df[col],period)
        
    #Find previous max and mins, then look at:
        # - how many positive or negative moves in a row there has been
        # - what the move since the last (n-1) max/min was
        # - what the gradient is since the last (n-1) max/min
        # - what the move since the first max/min was
        # - what the gradient since the first max/min was
    def mk_cols_prev_max_min(tmp_df,col,period:int=4):
        #GETTING THE MAX/MINS
        tmp_df["prev_max_{}".format(col)],tmp_df["prev_max_{}_date".format(col)] = prev_value(tmp_df[["date",col,"{}_max".format(col)]].copy(),col,"{}_max".format(col))
        tmp_df["prev_min_{}".format(col)],tmp_df["prev_min_{}_date".format(col)] = prev_value(tmp_df[["date",col,"{}_min".format(col)]].copy(),col,"{}_min".format(col))
        #Shift the max min columns by n periods to not leak future information
        tmp_df["prev_max_{}".format(col)] = tmp_df["prev_max_{}".format(col)].shift(period)
        tmp_df["prev_min_{}".format(col)] = tmp_df["prev_min_{}".format(col)].shift(period)
        tmp_df["prev_max_{}_date".format(col)] = tmp_df["prev_max_{}_date".format(col)].shift(period)
        tmp_df["prev_min_{}_date".format(col)] = tmp_df["prev_min_{}_date".format(col)].shift(period)
        def mk_prev_move_float(_s_in):
            _s_out = _s_in - _s_in.shift(1)
            _s_out[_s_out == 0] = np.nan
            _s_out = _s_out.fillna(method='ffill')
            return _s_out
        def mk_prev_move_date(_s_in,_periods:int=7):
            _s_out = _s_in - _s_in.shift(1)
            _s_check = pd.Series([np.floor(_x.days) for _x in _s_out])
            _s_check[_s_check == 0] = np.nan
            _s_check = _s_check.fillna(method='ffill')
            _s_check = [np.floor(_x/_periods) for _x in _s_check]
            return _s_check
        #Create features for the cumulative sequential count of max/mins in a certain direction
        def mk_move_cum(_s_in,_gtlt:str='pos'):
            _li_out = []
            _prev_x = None
            #Check which direction we are looking for
            if _gtlt.lower() == 'pos':
                #Loop through each value in _s_in
                for _i,_x in _s_in.iteritems():
                    if _x < 0: #If less than 0 then reset to 0
                        _li_out.append(0)
                    else:
                        if len(_li_out) == 0: #If this is the first value add it to the list
                            _li_out.append(1)
                        else:
                            if _prev_x != _x: #if there has been a change in value from this and the previous value increment it by 1
                                _li_out.append(_li_out[-1] + 1)
                            else: #Otherwise just use the last added value
                                _li_out.append(_li_out[-1])
                    _prev_x = _x
            elif _gtlt.lower() == 'neg':
                for _i,_x in _s_in.iteritems():
                    if _x > 0:
                        _li_out.append(0)
                    else:
                        if len(_li_out) == 0:
                            _li_out.append(1)
                        else:
                            if _prev_x != _x:
                                _li_out.append(_li_out[-1] + 1)
                            else:
                                _li_out.append(_li_out[-1])
                    _prev_x = _x
            return _li_out
        #Create features showing the value change since the first min/max
        def mk_long_prev_move_float(_ref_s,_val_s,_gtlt:str='pos'):
            _li_out = []
            _st_x = None
            #Check which direction we are looking for
            if _gtlt.lower() == 'pos':
                #Loop through each value in _s_in
                for _i,_x in _ref_s.iteritems():
                    if _x < 0: #If less than 0 then reset to 0, and reset _st_x
                        _li_out.append(0)
                        _st_x = None
                    else:
                        if _st_x == None: #If _st_x has not been set yet set it to this value
                            _st_x = _val_s[_i]
                        _li_out.append(_val_s[_i] - _st_x) #Now calculate the difference and add it to the list
            elif _gtlt.lower() == 'neg':
                #Loop through each value in _s_in
                for _i,_x in _ref_s.iteritems():
                    if _x > 0: #If greater than 0 then reset to 0, and reset _st_x
                        _li_out.append(0)
                        _st_x = None
                    else:
                        if _st_x == None: #If _st_x has not been set yet set it to this value
                            _st_x = _val_s[_i]
                        _li_out.append(_val_s[_i] - _st_x) #Now calculate the difference and add it to the list
            return _li_out
        def mk_long_prev_move_date(_ref_s,_val_s,_gtlt:str='pos',_periods:int=7):
            _li_out = []
            _st_x = None
            #Check which direction we are looking for
            if _gtlt.lower() == 'pos':
                #Loop through each value in _s_in
                for _i,_x in _ref_s.iteritems():
                    if _x < 0: #If less than 0 then reset to 0, and reset _st_x
                        _li_out.append(0)
                        _st_x = None
                    else:
                        if _st_x == None: #If _st_x has not been set yet set it to this value
                            _st_x = _val_s[_i]
                        _li_out.append(np.floor((_val_s[_i] - _st_x).days/_periods)) #Now calculate the difference and add it to the list
            elif _gtlt.lower() == 'neg':
                #Loop through each value in _s_in
                for _i,_x in _ref_s.iteritems():
                    if _x > 0: #If less than 0 then reset to 0, and reset _st_x
                        _li_out.append(0)
                        _st_x = None
                    else:
                        if _st_x == None: #If _st_x has not been set yet set it to this value
                            _st_x = _val_s[_i]
                        _li_out.append(np.floor((_val_s[_i] - _st_x).days/_periods)) #Now calculate the difference and add it to the list
            return _li_out
        #WHAT WAS THE MOVE SINCE THE LAST (N-1) MAX/MIN
        tmp_df['prev_max_move_{}'.format(col)] = mk_prev_move_float(tmp_df["prev_max_{}".format(col)])
        tmp_df['prev_max_date_move_{}'.format(col)] = mk_prev_move_date(tmp_df["prev_max_{}_date".format(col)])        
        tmp_df['prev_min_move_{}'.format(col)] = mk_prev_move_float(tmp_df["prev_min_{}".format(col)])
        tmp_df['prev_min_date_move_{}'.format(col)] = mk_prev_move_date(tmp_df["prev_min_{}_date".format(col)])
        #WHAT IS THE GRADIENT SINCE THE LAST (N-1) MAX/MIN
        tmp_df['prev_max_grad_{}'.format(col)] = tmp_df['prev_max_move_{}'.format(col)] / tmp_df['prev_max_date_move_{}'.format(col)]
        tmp_df['prev_min_grad_{}'.format(col)] = tmp_df['prev_min_move_{}'.format(col)] / tmp_df['prev_min_date_move_{}'.format(col)]
        #HOW MANY PROGRESSIVE MAX/MINS IN A ROW HAVE THERE BEEN - UP OR DOWN FOR BOTH OPTIONS
        tmp_df['max_move_cum_pos_{}'.format(col)] = mk_move_cum(tmp_df['prev_max_move_{}'.format(col)],'pos')
        tmp_df['max_move_cum_neg_{}'.format(col)] = mk_move_cum(tmp_df['prev_max_move_{}'.format(col)],'neg')
        tmp_df['min_move_cum_pos_{}'.format(col)] = mk_move_cum(tmp_df['prev_min_move_{}'.format(col)],'pos')
        tmp_df['min_move_cum_neg_{}'.format(col)] = mk_move_cum(tmp_df['prev_min_move_{}'.format(col)],'neg')
        #WHAT WAS THE MOVE SINCE THE FIRST (N=0) MAX/MIN
        tmp_df['long_prev_max_move_pos_{}'.format(col)] = mk_long_prev_move_float(tmp_df['prev_max_move_{}'.format(col)],tmp_df['prev_max_{}'.format(col)],'pos')
        tmp_df['long_prev_max_move_neg_{}'.format(col)] = mk_long_prev_move_float(tmp_df['prev_max_move_{}'.format(col)],tmp_df['prev_max_{}'.format(col)],'neg')
        tmp_df['long_prev_min_move_pos_{}'.format(col)] = mk_long_prev_move_float(tmp_df['prev_min_move_{}'.format(col)],tmp_df['prev_min_{}'.format(col)],'pos')
        tmp_df['long_prev_min_move_neg_{}'.format(col)] = mk_long_prev_move_float(tmp_df['prev_min_move_{}'.format(col)],tmp_df['prev_min_{}'.format(col)],'neg')
        #WHAT WAS THE TIMEDELTA SINCE THE FIRST (N=0) MAX/MIN
        tmp_df['long_prev_max_move_pos_date_{}'.format(col)] = mk_long_prev_move_date(tmp_df['prev_max_move_{}'.format(col)],tmp_df['prev_max_{}_date'.format(col)],'pos')
        tmp_df['long_prev_max_move_neg_date_{}'.format(col)] = mk_long_prev_move_date(tmp_df['prev_max_move_{}'.format(col)],tmp_df['prev_max_{}_date'.format(col)],'neg')
        tmp_df['long_prev_min_move_pos_date_{}'.format(col)] = mk_long_prev_move_date(tmp_df['prev_min_move_{}'.format(col)],tmp_df['prev_min_{}_date'.format(col)],'pos')
        tmp_df['long_prev_min_move_neg_date_{}'.format(col)] = mk_long_prev_move_date(tmp_df['prev_min_move_{}'.format(col)],tmp_df['prev_min_{}_date'.format(col)],'neg')
        #WHAT IS THE GRADIENT SINCE THE FIRST (N=0) MAX/MIN
        tmp_df['long_max_grad_pos_{}'.format(col)] = tmp_df['long_prev_max_move_pos_{}'.format(col)] / tmp_df['long_prev_max_move_pos_date_{}'.format(col)]
        tmp_df['long_max_grad_pos_{}'.format(col)] = tmp_df['long_max_grad_pos_{}'.format(col)].fillna(0)
        tmp_df['long_max_grad_neg_{}'.format(col)] = tmp_df['long_prev_max_move_neg_{}'.format(col)] / tmp_df['long_prev_max_move_neg_date_{}'.format(col)]
        tmp_df['long_max_grad_neg_{}'.format(col)] = tmp_df['long_max_grad_neg_{}'.format(col)].fillna(0)
        tmp_df['long_min_grad_pos_{}'.format(col)] = tmp_df['long_prev_min_move_pos_{}'.format(col)] / tmp_df['long_prev_min_move_pos_date_{}'.format(col)]
        tmp_df['long_min_grad_pos_{}'.format(col)] = tmp_df['long_min_grad_pos_{}'.format(col)].fillna(0)
        tmp_df['long_min_grad_neg_{}'.format(col)] = tmp_df['long_prev_min_move_neg_{}'.format(col)] / tmp_df['long_prev_min_move_neg_date_{}'.format(col)]
        tmp_df['long_min_grad_neg_{}'.format(col)] = tmp_df['long_min_grad_neg_{}'.format(col)].fillna(0)
    
    #Calc the value changes and percentage changes of these movements
    def mk_cols_prev_max_min_change(tmp_df,col):
        tmp_df["max_change_{}".format(col)],tmp_df["max_per_change_{}".format(col)] = calc_changes(tmp_df[[col,"prev_max_{}".format(col)]].copy(),col,"prev_max_{}".format(col))
        tmp_df["min_change_{}".format(col)],tmp_df["min_per_change_{}".format(col)] = calc_changes(tmp_df[[col,"prev_min_{}".format(col)]].copy(),col,"prev_min_{}".format(col))
        
    #Mark date change since max and mins and convert to periods
    def mk_cols_prev_max_min_date_change(tmp_df,col,period:int=7):
        tmp_df["prev_max_{}_date_change".format(col)] = tmp_df["date"] - tmp_df["prev_max_{}_date".format(col)]
        tmp_df["prev_min_{}_date_change".format(col)] = tmp_df["date"] - tmp_df["prev_min_{}_date".format(col)]
        #Convert all to period changes
        tmp_df["prev_max_{}_date_change".format(col)] = [np.floor(x.days/period) for x in tmp_df["prev_max_{}_date_change".format(col)]]
        tmp_df["prev_min_{}_date_change".format(col)] = [np.floor(x.days/period) for x in tmp_df["prev_min_{}_date_change".format(col)]]
    
    #Convert the max min changes into pos and neg columns
    def mk_cols_prev_max_min_change_pos_neg(tmp_df,col):
        #Only do pos changes for mins and neg changes for max (as it can only go below the max and above the min)
        tmp_df["max_change_{}_pos_bool".format(col)],tmp_df["max_change_{}_pos_val".format(col)] = pos_neg_cols(tmp_df["max_change_{}".format(col)],gt_lt = "GT")
        tmp_df["max_change_{}_neg_bool".format(col)],tmp_df["max_change_{}_neg_val".format(col)] = pos_neg_cols(tmp_df["max_change_{}".format(col)],gt_lt = "LT")
        tmp_df["min_change_{}_pos_bool".format(col)],tmp_df["min_change_{}_pos_val".format(col)] = pos_neg_cols(tmp_df["min_change_{}".format(col)],gt_lt = "GT")
        tmp_df["min_change_{}_neg_bool".format(col)],tmp_df["min_change_{}_neg_val".format(col)] = pos_neg_cols(tmp_df["min_change_{}".format(col)],gt_lt = "LT")
        #Drop the old columns
        tmp_df.drop(columns=["max_change_{}".format(col),"min_change_{}".format(col)],inplace=True)

    #Run function for columns - high volatility
    for col in ['close','signal_line']:
        mk_cols_max_min(df_out,col,period_high_volatility)
        mk_cols_prev_max_min(df_out,col,period_high_volatility)
        mk_cols_prev_max_min_change(df_out,col) 
        mk_cols_prev_max_min_date_change(df_out,col,7)
        mk_cols_prev_max_min_change_pos_neg(df_out,col)
    #Run function for columns - low volatility
    for col in ['macd','ema26','macd_line']:
        mk_cols_max_min(df_out,col,period_low_volatility)
        mk_cols_prev_max_min(df_out,col,period_low_volatility)
        mk_cols_prev_max_min_change(df_out,col) 
        mk_cols_prev_max_min_date_change(df_out,col,7)
        mk_cols_prev_max_min_change_pos_neg(df_out,col)
    
    #Check for undefined
    df_out["no_signal"] = (df_out["buy"] == False) & (df_out["hold"] == False) & (df_out["sell"] == False)
    
    #Composite all singals into one column
    df_out["signal"] = None
    df_out.loc[df_out["buy"] == True,"signal"] = "buy"
    df_out.loc[df_out["hold"] == True,"signal"] = "hold"
    df_out.loc[df_out["sell"] == True,"signal"] = "sell"
    df_out.drop(columns=["buy","hold","sell"],inplace=True)
    print("SIGNAL COUNTS: \n{}".format(df_out["signal"].value_counts()))
    
    return df_out

In [32]:
#Define the columns for the output
out_cols = [
    #NON-NORMALISED COLS
    "ticker"
    ,"date"
    #NORMALISED COLS
    #Standard features
    ,"open"
    ,"close"
    ,"high"
    ,"low"
    ,"volume"
    ,"change_price"
    ,"per_change_price"
    #Shifted features
    ,"close_shift1"
    ,"change_close_shift1"
    ,"vol_shift1"
    ,"change_vol_shift1"
    ,"ema26_shift1"
    ,"change_ema26_shift1"
    #change to periodic max mins
    ,"close_13_norm"
    ,"close_26_norm"
    ,"close_52_norm"
    ,"macd_line_13_norm"
    ,"macd_line_26_norm"
    ,"macd_line_52_norm"
    #Pos neg features
    ,"macd_pos_val"
    ,"macd_neg_val"
    ,"signal_line_pos_val"
    ,"signal_line_neg_val"
    ,"change_price_pos_val"
    ,"change_price_neg_val"
    ,"per_change_price_pos_val"
    ,"per_change_price_neg_val"
]
#Append additional columns for key areas
for col in ['close','macd','ema26','signal_line','macd_line']:
    out_cols
    #Prev max/min features
    out_cols.append("prev_max_{}".format(col))
    out_cols.append("prev_min_{}".format(col))
    #date changes
    out_cols.append("prev_max_{}_date_change".format(col))
    out_cols.append("prev_min_{}_date_change".format(col))
    #Min max change features
    out_cols.append("max_change_{}_pos_val".format(col))
    out_cols.append("max_change_{}_neg_val".format(col))
    out_cols.append("min_change_{}_pos_val".format(col))
    out_cols.append("min_change_{}_neg_val".format(col))
    #prev max/mins (n-1) - compared to previous
    out_cols.append('prev_max_grad_{}'.format(col))
    out_cols.append('prev_min_grad_{}'.format(col))
    #prev max/mins (n=0) - compared to first in this run
    out_cols.append('max_move_cum_pos_{}'.format(col))
    out_cols.append('max_move_cum_neg_{}'.format(col))
    out_cols.append('min_move_cum_pos_{}'.format(col))
    out_cols.append('min_move_cum_neg_{}'.format(col))
    out_cols.append('long_prev_max_move_pos_date_{}'.format(col))
    out_cols.append('long_prev_max_move_neg_date_{}'.format(col))
    out_cols.append('long_prev_min_move_pos_date_{}'.format(col))
    out_cols.append('long_prev_min_move_neg_date_{}'.format(col))
    out_cols.append('long_max_grad_pos_{}'.format(col))
    out_cols.append('long_max_grad_neg_{}'.format(col))
    out_cols.append('long_min_grad_pos_{}'.format(col))
    out_cols.append('long_min_grad_neg_{}'.format(col))
#Append signal
out_cols.append("signal")

In [33]:
#Then loop the tickers and combine these into one large dataset
hf_store_name = path+r'\all_hist_prices_w_ft_eng2_TMP.h5'
h_store = pd.HDFStore(hf_store_name)
count = 0
errors = []
run_time = process_time()
for tick in tick_ftse["ticker"]:
    try:
        run_time.show_latest_lap_time()
        run_time.lap()
        count += 1
        print("\n{}".format(count))
        print("RUN FOR {}".format(tick))
        #Isolate this ticker
        this_tick_df = df_prices_w[df_prices_w["ticker"] == re.sub('[^a-zA-Z0-9\-]','',tick)].copy()
        print("shape before: {}".format(this_tick_df.shape))
        #Create the features
        this_tick_df = create_features(this_tick_df)
#         print(this_tick_df[out_cols])
        #Clarify col_lens with cur cols in data
        col_lens_tmp = {}
        for col in out_cols:
            if col in col_lens:
                col_lens_tmp[col] = col_lens[col]
        print("shape after: {}".format(this_tick_df.shape))
        #Append this data to the group
        try:
            this_tick_df[out_cols].to_hdf(hf_store_name,key='weekly_data',append=True,min_itemsize=col_lens_tmp)
            print('ADDED TO {}'.format(hf_store_name))
        except Exception as e:
            print('ERROR READING TO FILE {}'.format(e))
    except Exception as e:
        h_store.close()
        errors.append({"ticker":tick,"Error":e})
        print('ERROR PROCESSING DATA {}'.format(e))
h_store.close()
print('\n\n')
run_time.end()
print('\nERROR COUNT: {}'.format(len(errors)))
if len(errors) > 0:
    print('    ERRORS -> {}'.format(errors))


1
RUN FOR III
shape before: (613, 17)
SIGNAL COUNTS: 
hold    314
sell    165
buy     134
Name: signal, dtype: int64
shape after: (613, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 1 TIME -> 0:0:0

2
RUN FOR ABF
shape before: (1045, 17)
SIGNAL COUNTS: 
hold    584
sell    255
buy     206
Name: signal, dtype: int64
shape after: (1045, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 2 TIME -> 0:0:3

3
RUN FOR ADM
shape before: (353, 17)
SIGNAL COUNTS: 
hold    194
buy      99
sell     60
Name: signal, dtype: int64
shape after: (353, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 3 TIME -> 0:0:3

4
RUN FOR AAL
shape before: (1008, 17)
SIGNAL COUNTS: 
hold    455
sell    290
buy     263
Name: signal, dtype: int64
shape aft

ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 29 TIME -> 0:0:4

30
RUN FOR EXPN
shape before: (678, 17)
SIGNAL COUNTS: 
hold    376
sell    160
buy     142
Name: signal, dtype: int64
shape after: (678, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 30 TIME -> 0:0:3

31
RUN FOR FERG
shape before: (1046, 17)
SIGNAL COUNTS: 
hold    564
sell    266
buy     216
Name: signal, dtype: int64
shape after: (1046, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 31 TIME -> 0:0:3

32
RUN FOR FRES
shape before: (596, 17)
SIGNAL COUNTS: 
hold    281
sell    189
buy     126
Name: signal, dtype: int64
shape after: (596, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng

SIGNAL COUNTS: 
Series([], Name: signal, dtype: int64)
shape after: (0, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 58 TIME -> 0:0:4

59
RUN FOR NXT
shape before: (1523, 17)
SIGNAL COUNTS: 
hold    792
buy     390
sell    341
Name: signal, dtype: int64
shape after: (1523, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 59 TIME -> 0:0:1

60
RUN FOR NMC
shape before: (373, 17)
SIGNAL COUNTS: 
hold    177
buy     118
sell     78
Name: signal, dtype: int64
shape after: (373, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 60 TIME -> 0:0:4

61
RUN FOR OCDO
shape before: (462, 17)
SIGNAL COUNTS: 
hold    223
sell    120
buy     119
Name: signal, dtype: int64
shape after: (462, 253)
ADDED TO C:\Users\Robert\Documents\python_sc

ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 86 TIME -> 0:0:4

87
RUN FOR SPX
shape before: (1454, 17)
SIGNAL COUNTS: 
hold    797
buy     359
sell    298
Name: signal, dtype: int64
shape after: (1454, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 87 TIME -> 0:0:3

88
RUN FOR SSE
shape before: (1273, 17)
SIGNAL COUNTS: 
hold    778
sell    327
buy     168
Name: signal, dtype: int64
shape after: (1273, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 88 TIME -> 0:0:4

89
RUN FOR STJ
shape before: (1463, 17)
SIGNAL COUNTS: 
hold    714
sell    388
buy     361
Name: signal, dtype: int64
shape after: (1463, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_en

SIGNAL COUNTS: 
hold    50
sell    35
buy     15
Name: signal, dtype: int64
shape after: (100, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 115 TIME -> 0:0:4

116
RUN FOR BBY
shape before: (1045, 17)
SIGNAL COUNTS: 
hold    512
sell    307
buy     226
Name: signal, dtype: int64
shape after: (1045, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 116 TIME -> 0:0:4

117
RUN FOR BGEO
shape before: (621, 17)
SIGNAL COUNTS: 
hold    278
sell    172
buy     171
Name: signal, dtype: int64
shape after: (621, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 117 TIME -> 0:0:5

118
RUN FOR BNKR
shape before: (614, 17)
SIGNAL COUNTS: 
hold    442
sell    108
buy      64
Name: signal, dtype: int64
shape after: (614, 253)
ADDED TO C:\Us

SIGNAL COUNTS: 
hold    711
buy     285
sell    280
Name: signal, dtype: int64
shape after: (1276, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 143 TIME -> 0:0:5

144
RUN FOR COA
shape before: (1373, 17)
SIGNAL COUNTS: 
hold    787
sell    359
buy     227
Name: signal, dtype: int64
shape after: (1373, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 144 TIME -> 0:0:4

145
RUN FOR COB
shape before: (1256, 17)
SIGNAL COUNTS: 
hold    638
sell    340
buy     278
Name: signal, dtype: int64
shape after: (1256, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 145 TIME -> 0:0:5

146
RUN FOR CCC
shape before: (1098, 17)
SIGNAL COUNTS: 
hold    522
sell    311
buy     265
Name: signal, dtype: int64
shape after: (1098, 253)
ADDED TO

shape before: (1112, 17)
SIGNAL COUNTS: 
hold    550
sell    289
buy     273
Name: signal, dtype: int64
shape after: (1112, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 171 TIME -> 0:0:2

172
RUN FOR FCIT
shape before: (587, 17)
SIGNAL COUNTS: 
hold    434
sell     96
buy      57
Name: signal, dtype: int64
shape after: (587, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 172 TIME -> 0:0:3

173
RUN FOR FDM
shape before: (278, 17)
SIGNAL COUNTS: 
hold    138
sell     75
buy      65
Name: signal, dtype: int64
shape after: (278, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 173 TIME -> 0:0:2

174
RUN FOR FXPO
shape before: (618, 17)
SIGNAL COUNTS: 
hold    287
sell    171
buy     160
Name: signal, dtype: int64
shape after

SIGNAL COUNTS: 
hold    532
sell    308
buy     205
Name: signal, dtype: int64
shape after: (1045, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 199 TIME -> 0:0:3

200
RUN FOR HVPE
shape before: (395, 17)
SIGNAL COUNTS: 
hold    309
buy      62
sell     24
Name: signal, dtype: int64
shape after: (395, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 200 TIME -> 0:0:4

201
RUN FOR HSTG
shape before: (209, 17)
SIGNAL COUNTS: 
hold    113
sell     56
buy      40
Name: signal, dtype: int64
shape after: (209, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 201 TIME -> 0:0:3

202
RUN FOR HAS
shape before: (1026, 17)
SIGNAL COUNTS: 
hold    496
sell    294
buy     236
Name: signal, dtype: int64
shape after: (1026, 253)
ADDED TO C

SIGNAL COUNTS: 
hold    354
sell    136
buy     105
Name: signal, dtype: int64
shape after: (595, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 227 TIME -> 0:0:3

228
RUN FOR JEO
shape before: (586, 17)
SIGNAL COUNTS: 
hold    325
buy     138
sell    123
Name: signal, dtype: int64
shape after: (586, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 228 TIME -> 0:0:3

229
RUN FOR JUP
shape before: (468, 17)
SIGNAL COUNTS: 
hold    226
sell    124
buy     118
Name: signal, dtype: int64
shape after: (468, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 229 TIME -> 0:0:3

230
RUN FOR KNOS
shape before: (222, 17)
SIGNAL COUNTS: 
hold    103
buy      76
sell     43
Name: signal, dtype: int64
shape after: (222, 253)
ADDED TO C:\Us

SIGNAL COUNTS: 
hold    460
sell    236
buy     221
Name: signal, dtype: int64
shape after: (917, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 255 TIME -> 0:0:4

256
RUN FOR PIN
shape before: (585, 17)
SIGNAL COUNTS: 
hold    401
buy      95
sell     89
Name: signal, dtype: int64
shape after: (585, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 256 TIME -> 0:0:3

257
RUN FOR PAG
shape before: (1481, 17)
SIGNAL COUNTS: 
hold    744
sell    405
buy     332
Name: signal, dtype: int64
shape after: (1481, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 257 TIME -> 0:0:3

258
RUN FOR PAY
shape before: (767, 17)
SIGNAL COUNTS: 
hold    369
sell    219
buy     179
Name: signal, dtype: int64
shape after: (767, 253)
ADDED TO C:\U

SIGNAL COUNTS: 
sell    39
hold    39
buy     24
Name: signal, dtype: int64
shape after: (102, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 283 TIME -> 0:0:4

284
RUN FOR RCP
shape before: (74, 17)
SIGNAL COUNTS: 
hold    62
sell    12
Name: signal, dtype: int64
shape after: (74, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 284 TIME -> 0:0:3

285
RUN FOR RSE
shape before: (284, 17)
SIGNAL COUNTS: 
hold    150
sell     98
buy      36
Name: signal, dtype: int64
shape after: (284, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 285 TIME -> 0:0:2

286
RUN FOR ROR
shape before: (1026, 17)
SIGNAL COUNTS: 
hold    511
buy     260
sell    255
Name: signal, dtype: int64
shape after: (1026, 253)
ADDED TO C:\Users\Robert\Documen

SIGNAL COUNTS: 
hold    253
buy      43
sell     39
Name: signal, dtype: int64
shape after: (335, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 311 TIME -> 0:0:5

312
RUN FOR SYNT
shape before: (1454, 17)
SIGNAL COUNTS: 
hold    742
sell    393
buy     319
Name: signal, dtype: int64
shape after: (1454, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 312 TIME -> 0:0:4

313
RUN FOR TALK
shape before: (479, 17)
SIGNAL COUNTS: 
hold    219
sell    145
buy     115
Name: signal, dtype: int64
shape after: (479, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 313 TIME -> 0:0:6

314
RUN FOR TATE
shape before: (1260, 17)
SIGNAL COUNTS: 
hold    690
sell    307
buy     263
Name: signal, dtype: int64
shape after: (1260, 253)
ADDED TO

SIGNAL COUNTS: 
hold    102
buy      73
sell     66
Name: signal, dtype: int64
shape after: (241, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 339 TIME -> 0:0:4

340
RUN FOR WG.
shape before: (0, 17)
SIGNAL COUNTS: 
Series([], Name: signal, dtype: int64)
shape after: (0, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 340 TIME -> 0:0:4

341
RUN FOR WKP
shape before: (947, 17)
SIGNAL COUNTS: 
hold    469
buy     255
sell    223
Name: signal, dtype: int64
shape after: (947, 253)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 341 TIME -> 0:0:1

342
RUN FOR WWH
shape before: (596, 17)
SIGNAL COUNTS: 
hold    382
buy     119
sell     95
Name: signal, dtype: int64
shape after: (596, 253)
ADDED TO C:\Users\Robert\Documents\python_s

In [34]:
#close any open h5 files
tables.file._open_files.close_all()

In [35]:
tmp_df = pd.read_hdf(hf_store_name,key='weekly_data',mode='r')
print("")
print("FINAL HDFSTORE SIZE: {}".format(tmp_df.shape))
print("FINAL BUY COUNT: {}".format(len(tmp_df[tmp_df["signal"] == "buy"])))
print("FINAL SELL COUNT: {}".format(len(tmp_df[tmp_df["signal"] == "sell"])))
h_store.close()
tmp_df.head(50)
# tmp_df[(tmp_df["ticker"] == 'ADM') & (tmp_df["date"] > '2013-12-01') & (tmp_df["date"] < '2014-02-01')].head(200)


FINAL HDFSTORE SIZE: (259493, 140)
FINAL BUY COUNT: 56042
FINAL SELL COUNT: 65286


Unnamed: 0,ticker,date,open,close,high,low,volume,change_price,per_change_price,close_shift1,...,min_move_cum_neg_macd_line,long_prev_max_move_pos_date_macd_line,long_prev_max_move_neg_date_macd_line,long_prev_min_move_pos_date_macd_line,long_prev_min_move_neg_date_macd_line,long_max_grad_pos_macd_line,long_max_grad_neg_macd_line,long_min_grad_pos_macd_line,long_min_grad_neg_macd_line,signal
123991,III,2007-12-31,995.0,965.0,1023.0,964.0,4511565.0,-30.0,-0.030151,,...,1,,,,,0.0,0.0,0.0,0.0,sell
123992,III,2008-01-07,967.5,924.0,989.0,917.5,16056554.0,-43.5,-0.044961,965.0,...,2,,,,,0.0,0.0,0.0,0.0,sell
123993,III,2008-01-14,917.0,901.0,936.0,881.0,21691287.0,-16.0,-0.017448,924.0,...,3,,,,,0.0,0.0,0.0,0.0,hold
123994,III,2008-01-21,891.0,917.5,965.0,847.0,17850580.0,26.5,0.029742,901.0,...,4,,,,,0.0,0.0,0.0,0.0,hold
123995,III,2008-01-28,911.0,961.0,971.0,903.0,12079245.0,50.0,0.054885,917.5,...,5,,,,,0.0,0.0,0.0,0.0,sell
123996,III,2008-02-04,970.0,923.5,987.5,915.0,15446478.0,-46.5,-0.047938,961.0,...,6,,,,,0.0,0.0,0.0,0.0,sell
123997,III,2008-02-11,910.0,919.5,946.5,906.0,12183466.0,9.5,0.01044,923.5,...,7,,,,,0.0,0.0,0.0,0.0,sell
123998,III,2008-02-18,921.0,883.0,960.0,880.0,12692521.0,-38.0,-0.04126,919.5,...,8,,,,,0.0,0.0,0.0,0.0,sell
123999,III,2008-02-25,891.5,818.0,907.0,806.5,20641227.0,-73.5,-0.082445,883.0,...,9,,,,,0.0,0.0,0.0,0.0,hold
124000,III,2008-03-03,814.5,777.5,828.5,776.5,18670109.0,-37.0,-0.045427,818.0,...,10,,,,,0.0,0.0,0.0,0.0,hold


In [61]:
#close any open h5 files
tables.file._open_files.close_all()

Closing remaining open files:C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_d_TMP.h5...done


In [48]:
#Delete the old h5 file and rename the TMP
try:
    os.remove(path+r'\all_hist_prices_w_ft_eng2.h5')
    print('\nSUCCESSFULLY REMOVED {}'.format(path+r'\all_hist_prices_w_ft_eng2.h5'))
except Exception as e:
    print('\nERROR - REMOVING:{}'.format(e))
try:
    os.rename(path+r'\all_hist_prices_w_ft_eng2_TMP.h5',path+r'\all_hist_prices_w_ft_eng2.h5')
    print('\nSUCCESSFULLY RENAMED {} TO {}'.format(path+r'\all_hist_prices_w_ft_eng2_TMP.h5',path+r'\all_hist_prices_w_ft_eng2.h5'))
except Exception as e:
    print('\nERROR - RENAMING:{}'.format(e))


SUCCESSFULLY REMOVED C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2.h5

SUCCESSFULLY RENAMED C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5 TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2.h5


In [38]:
#Remove 'date' from out_cols
out_cols.remove('date')

In [39]:
#Export a list of the features for this model
file_object = open(path+r'\feature_engineering_feature_list.txt','w')
feature_str = ''
for i in out_cols:
    feature_str += '{},'.format(i)
feature_str = feature_str[:-1]
file_object.write(feature_str)
file_object.close()
feature_str

'ticker,open,close,high,low,volume,change_price,per_change_price,close_shift1,change_close_shift1,vol_shift1,change_vol_shift1,ema26_shift1,change_ema26_shift1,close_13_norm,close_26_norm,close_52_norm,macd_line_13_norm,macd_line_26_norm,macd_line_52_norm,macd_pos_val,macd_neg_val,signal_line_pos_val,signal_line_neg_val,change_price_pos_val,change_price_neg_val,per_change_price_pos_val,per_change_price_neg_val,prev_max_close,prev_min_close,prev_max_close_date_change,prev_min_close_date_change,max_change_close_pos_val,max_change_close_neg_val,min_change_close_pos_val,min_change_close_neg_val,prev_max_grad_close,prev_min_grad_close,max_move_cum_pos_close,max_move_cum_neg_close,min_move_cum_pos_close,min_move_cum_neg_close,long_prev_max_move_pos_date_close,long_prev_max_move_neg_date_close,long_prev_min_move_pos_date_close,long_prev_min_move_neg_date_close,long_max_grad_pos_close,long_max_grad_neg_close,long_min_grad_pos_close,long_min_grad_neg_close,prev_max_macd,prev_min_macd,prev_max_m