# Create indicators and find signals
This code is designed to calculate the below indicators:
- EMA
- MACD
- MACD mins and maxs
- MACD positive turns
- Support price
- Target price
- Prev week volume Vs 8 week average volume

We will also bring in if the target price was hit within 8 weeks for ML training and validation.

Each share has it's own trading pattern so we will train a different model for each share but use the same set of features, just the hyper paramenters will be tuned differenty.

In [1]:
#Import libraries
import pandas as pd
import numpy as np
import re
import tables
import os
import datetime as dt

In [2]:
#Clean column names
def clean_col_name(str_in):
    str_out = str_in.lower() #Change all to lower case
    str_out = re.sub(r'.*\.','',str_out) #Remove everything before a .
    str_out = re.sub(r' ','_',str_out) #Replace spaces with _
    str_out = re.sub(r'_?&_?','_and_',str_out) #Put _ around & and @ and change to 'and' and 'at'
    str_out = re.sub(r'_?@_?','_at_',str_out) #Put _ around & and @ and change to 'and' and 'at'
    str_out = re.sub(r'[^0-9a-z_]','',str_out) #Remove all non allowed characters
    str_out = re.sub(r'^_','',str_out) #Remove leading _
    return str_out

In [3]:
class process_time:
    def __init__(self,name:str = ''):
        self.st_time = dt.datetime.now()
        self.lap_li = []
        self.en_time = None
        self.name = name
    def calc_el_time(self,st_time,en_time):
        diff_time = en_time - st_time
        duration_in_s = diff_time.total_seconds()
        hours = int(divmod(duration_in_s, 3600)[0])
        duration_in_s += -(hours * 3600)
        minutes = int(divmod(duration_in_s, 60)[0])
        duration_in_s += -(minutes * 60)
        seconds = int(duration_in_s)
        return [hours,minutes,seconds]
    def lap(self):
        self.lap_li.append(dt.datetime.now())
    def end(self):
        self.en_time = dt.datetime.now()
        lap_time = self.calc_el_time(self.st_time,self.en_time)
        if self.name != '':
            print('TOTAL ELAPSED TIME OF {} -> {}:{}:{}'.format(self.name,lap_time[0],lap_time[1],lap_time[2]))
        else:
            print('TOTAL ELAPSED TIME -> {}:{}:{}'.format(lap_time[0],lap_time[1],lap_time[2]))
    def show_lap_times(self):
        tmp_count = 0
        for lap in self.lap_li:
            tmp_count += 1
            lap_time = self.calc_el_time(self.st_time,lap)
            print('LAP {} TIME -> {}:{}:{}'.format(tmp_count,lap_time[0],lap_time[1],lap_time[2]))
    def show_latest_lap_time(self):
        if len(self.lap_li) == 0:
            return
        elif len(self.lap_li) < 2:
            lap_time = self.calc_el_time(self.st_time,self.lap_li[-1])
        else:
            lap_time = self.calc_el_time(self.lap_li[-2],self.lap_li[-1])
        print('LAP {} TIME -> {}:{}:{}'.format(len(self.lap_li),lap_time[0],lap_time[1],lap_time[2]))

In [4]:
#Import the ftse list
path = "C:\\Users\\Robert\\Documents\\python_scripts\\stock_trading_ml_modelling\\historical_prices\\"
tick_ftse = pd.read_csv(path + "tick_ftse.csv")
tick_ftse = tick_ftse.iloc[:,1:]
for col in tick_ftse:
    tick_ftse.rename(columns={col:clean_col_name(col)},inplace=True)
tick_ftse.head()

Unnamed: 0,company,ticker,index
0,3I,III,FTSE100
1,ADMIRAL GROUP,ADM,FTSE100
2,ANGLO AMERICAN PLC,AAL,FTSE100
3,ANTOFAGASTA,ANTO,FTSE100
4,ASHTEAD GROUP,AHT,FTSE100


In [5]:
#Import and combine prices files
# df_prices_w = pd.read_csv(path + tick_ftse["ticker"][1] + "_hist_prices_w.csv")
df_prices_w = pd.read_hdf(path + "all_hist_prices_w.h5")
#Rename columns
for col in df_prices_w:
    df_prices_w.rename(columns={col:clean_col_name(col)},inplace=True)
#Drop unwanted columns
try:
    df_prices_w.drop(columns=["unnamed_0","index"],inplace=True)
except Exception as e:
    print(e)
#Reformat columns where neccessary
df_prices_w["date"] = df_prices_w["date"].astype("datetime64")
print(df_prices_w.shape)
print(df_prices_w.dtypes)
df_prices_w.head()

"['unnamed_0' 'index'] not found in axis"
(257341, 13)
ticker               object
date         datetime64[ns]
high                float64
low                 float64
volume              float64
open                float64
close               float64
change              float64
ema12               float64
ema26               float64
macd_line           float64
signal              float64
macd                float64
dtype: object


Unnamed: 0,ticker,date,high,low,volume,open,close,change,ema12,ema26,macd_line,signal,macd
0,III,2007-12-31,1023.0,964.0,4511565.0,995.0,965.0,-30.0,,,,,
1,III,2008-01-07,989.0,917.5,16056554.0,967.5,924.0,-43.5,,,,,
2,III,2008-01-14,936.0,881.0,21691287.0,917.0,901.0,-16.0,,,,,
3,III,2008-01-21,965.0,847.0,17850580.0,891.0,917.5,26.5,,,,,
4,III,2008-01-28,971.0,903.0,12079245.0,911.0,961.0,50.0,,,,,


In [6]:
#Get in-row price change
def calc_changes(df_in,var_col,prev_col):
    df_in["change"] = df_in[var_col] - df_in[prev_col]
    df_in["per_change"] = df_in["change"] / df_in[prev_col]
    return (df_in["change"],df_in["per_change"])

#Relabel signal to signal_line
df_prices_w.rename(columns={"signal":"signal_line"},inplace=True)
for col in df_prices_w:
    df_prices_w.rename(columns={col:col.lower()},inplace=True)
    
df_prices_w["change_price"],df_prices_w["per_change_price"] = calc_changes(df_prices_w[["close","open"]].copy(),"close","open")
df_prices_w.drop(columns=["change"],inplace=True)
df_prices_w.head()

Unnamed: 0,ticker,date,high,low,volume,open,close,ema12,ema26,macd_line,signal_line,macd,change_price,per_change_price
0,III,2007-12-31,1023.0,964.0,4511565.0,995.0,965.0,,,,,,-30.0,-0.030151
1,III,2008-01-07,989.0,917.5,16056554.0,967.5,924.0,,,,,,-43.5,-0.044961
2,III,2008-01-14,936.0,881.0,21691287.0,917.0,901.0,,,,,,-16.0,-0.017448
3,III,2008-01-21,965.0,847.0,17850580.0,891.0,917.5,,,,,,26.5,0.029742
4,III,2008-01-28,971.0,903.0,12079245.0,911.0,961.0,,,,,,50.0,0.054885


In [7]:
#Remove tickers with fewer than 34 entries as this is where the MACD can be calculated
print('START ROW COUNT -> {}'.format(df_prices_w.shape[0]))
print('START TICK COUNT -> {}'.format(tick_ftse.shape[0]))
for tick in tick_ftse.ticker:
    if df_prices_w[df_prices_w.ticker == tick].shape[0] < 34:
        #Remove from dataframe
        print('\nTOO FEW RECORDS FOR {}'.format(tick))
        df_prices_w = df_prices_w.loc[df_prices_w.ticker != tick]
        print('    NEW ROW COUNT -> {}'.format(df_prices_w.shape[0]))
        #Remove from tick_ftse
        tick_ftse = tick_ftse.loc[tick_ftse.ticker != tick]
        print('    NEW TICK COUNT -> {}'.format(tick_ftse.shape[0]))
print('\nEND ROW COUNT -> {}'.format(df_prices_w.shape[0]))
print('END TICK COUNT -> {}'.format(tick_ftse.shape[0]))

START ROW COUNT -> 257341
START TICK COUNT -> 350

TOO FEW RECORDS FOR FLTR
    NEW ROW COUNT -> 257329
    NEW TICK COUNT -> 349

TOO FEW RECORDS FOR AGT
    NEW ROW COUNT -> 257320
    NEW TICK COUNT -> 348

TOO FEW RECORDS FOR NETW
    NEW ROW COUNT -> 257300
    NEW TICK COUNT -> 347

END ROW COUNT -> 257300
END TICK COUNT -> 347


# Create additional features, buy signals, and sell signals
I am classifying a stock worth buying if it meets all the below criteria:
- The target price (previous max) is hit within x (set as variable target_price_period) periods proceeding this period
- The target price is an increase of x% (set as variable min_gain) over the proceeding day's open price
- There is a drop in the closing price of less x% (set as variable max_drop) between this period and the x periods proceeding this period

I am classifying the sell signals as:
- The close price dips below the target price (previous max) x (set as variable target_price_period) periods proceeding this period
- There is an increase in the closing price of less x% (set as variable max_drop) over the next x periods

In [8]:
#Define the variables
target_price_period = 12
period_high_volatility = 3
period_low_volatility = 1
min_gain = 0.1
max_drop = -0.05

In [9]:
#Programming note
#df.shift(1) looks 1 period into the past
#df.shift(-1) looks 1 period into the future

In [10]:
#Mark minimums and maximums
def flag_mins(s_in,period):
    s_out = 0
    for i in range(1,period):
        s_out += (s_in > s_in.shift(i)) & (s_in.shift(-i).isnull() == False) #Is this value > the previous value
        s_out += (s_in > s_in.shift(-i)) | (s_in.shift(-i).isnull()) #Is this value > the next value OR is it the last available value (IE the next value is null)
        s_out += s_in.isnull()
#     s_out[((s_in.index == 0) & (s_in.isnull() == False)) | ((s_in.shift(1).isnull()) & (s_in.isnull() == False))] = 0 #set the first non-NaN result to 0
    s_out = s_out == 0
    return s_out
def flag_maxs(s_in,period):
    s_out = 0
    for i in range(1,period):
        s_out += (s_in < s_in.shift(i)) & (s_in.shift(-i).isnull() == False) #Is this value < the previous value (if the previous value is not null)
        s_out += (s_in < s_in.shift(-i)) | (s_in.shift(-i).isnull()) #Is this value < the next value OR is it the last available value (IE the next value is null)
        s_out += s_in.isnull()
#     s_out[((s_in.index == 0) & (s_in.isnull() == False)) | ((s_in.shift(1).isnull()) & (s_in.isnull() == False))] = 0 #set the first non-NaN result to 0
    s_out = s_out == 0
    return s_out

In [11]:
#Calc vol as proportion of previous n-rows
def calc_prop_of_prev(s_in,periods:int = 4):
    s_cum = s_in.copy()
    for i in range(1,periods):
        s_cum += s_in.shift(i)
    return s_in / s_cum

In [12]:
#Mark points of macd positive entry
def pos_entry(s_in):
    return (s_in > s_in.shift(1)) & (s_in > 0) & (s_in.shift(1) < 0)
def neg_entry(s_in):
    return (s_in < s_in.shift(1)) & (s_in < 0) & (s_in.shift(1) > 0)

In [13]:
#Check if the target price is hit within the target_price_period
def target_hit_check(x,var_s,target_s,periods:int=12):
    check_s = False
    for i in range((x+1),(x+periods+1)):
        if (i >= 0) and (i < len(target_s)) and (target_s[x] <= var_s[i]):
            check_s = True
            break
    return check_s

In [14]:
def max_drop_check_b(ref_s,var_s,periods:int=12):
    lim_s = ref_s*(1+max_drop)
    check_s = lim_s == lim_s #Create an all true series
    for i in range(-(periods+1),-1):
        tmp_check_s = var_s.shift(i) >= lim_s #Treu if price is >= limit
        check_s = check_s & tmp_check_s
    return check_s

In [15]:
#Create a function for finding buy signals
def get_buys(df_in):
    tmp_df = df_in.copy()
    
    #Calc the target price
    tmp_df["target_price"] = tmp_df["open"].shift(-1) * (1+min_gain)
    
    #Check if the target price is hit within the target_price_period
    tmp_df["buy_target_check"] = [target_hit_check(x,tmp_df["close"],tmp_df["target_price"],target_price_period) for x in range(0,len(tmp_df))]
#     print("BUY_TARGET_CHECK COUNT:"+str(len(tmp_df[tmp_df["buy_target_check"]])))
    
    #min_gain check
    tmp_df["gain"] = tmp_df["prev_max_close"] - tmp_df["open"].shift(-1) #open price for the next period
    tmp_df["per_gain"] = tmp_df["gain"] / tmp_df["open"].shift(-1) #open price for the next period
    tmp_df["buy_gain_check"] = tmp_df["per_gain"] >= min_gain
    tmp_df.drop(columns=["gain","per_gain"],inplace=True)
#     tmp_df["buy_gain_check"] = True
#     print("BUY_GAIN_CHECK COUNT:"+str(len(tmp_df[tmp_df["buy_gain_check"]])))
    
    #Check if the sell price is hit within the target_price_period
#     tmp_df["buy_drop_check"] = max_drop_check_b(tmp_df["open"].shift(-1),tmp_df["close"],target_price_period)
    tmp_df["buy_drop_check"] = True
#     print("BUY_DROP_CHECK COUNT:"+str(len(tmp_df[tmp_df["buy_drop_check"]])))
    
    #Find the buy signals
    tmp_df["buy"] = tmp_df["buy_target_check"] & tmp_df["buy_gain_check"] & tmp_df["buy_drop_check"]
    print("BUY SIGNALS: "+str(len(tmp_df[tmp_df["buy"]])))
    
    #Join back into main table
    s_out = tmp_df["buy"]
    
    return s_out

In [16]:
#Perform if the target is crossed again
def target_cross_check(x,var_s,target_s,periods:int=12):
    check_s = False
    for i in range((x+1),(x+periods+1)):
        if (i >= 0) and (i < len(target_s)) and (target_s[x] >= var_s[i]):
            check_s = True
            break
    return check_s

In [17]:
def max_drop_check_s(ref_s,var_s,periods:int=12):
    lim_s = ref_s*(1+max_drop)
    check_s = lim_s != lim_s #Start as False and then if it drops below the threshold turn it to True
    for i in range(-(periods+1),-1):
        tmp_check_s = var_s.shift(i) <= lim_s #If price drops below the threshold price then turn to True
        check_s = tmp_check_s | check_s
    return check_s

In [18]:
#Function for finding sell signals
def get_sells(df_in):
    tmp_df = df_in.copy()
    
    #Perform if the target is crossed again
    tmp_df["sell_target_check"] = [target_cross_check(x,tmp_df["close"],tmp_df["prev_min_close"],target_price_period) for x in range(0,len(tmp_df))]
#     print("SELL_TARGET_CHECK COUNT:"+str(len(tmp_df[tmp_df["sell_target_check"]])))
    
    #Check if the target price is hit within the target_price_period
    tmp_df["sell_drop_check"] = max_drop_check_s(tmp_df["open"].shift(-1),tmp_df["close"],target_price_period)
#     print("SELL_DROP_CHECK COUNT:"+str(len(tmp_df[tmp_df["sell_drop_check"]])))
    
    #Find the sell signals
    tmp_df["sell"] = tmp_df["sell_target_check"] & tmp_df["sell_drop_check"]
    print("SELL SIGNALS: "+str(len(tmp_df[tmp_df["sell"]])))
    
    #Join back into main table
    s_out = tmp_df["sell"]
    
    return s_out

In [19]:
#Function to normalise current price compared to another
def norm_s(s_in,max_in,min_in):
    s_out = (s_in - min_in) / (max_in - min_in)
    s_out.fillna(1.0,inplace=True)
    return s_out

In [20]:
#Create a function which normalises a feature based only on the values which have come before it - avoids time series bias
def norm_time_s(_index,_s_in):
    _min = np.nanmin(_s_in[:_index+1].values)
    _max = np.nanmax(_s_in[:_index+1].values)
    _norm_val = (_s_in[_index] - _min) / (_max - _min)
    return _norm_val

In [21]:
#Function to find last max and mins
def prev_value(df_in,var_col,bool_col):
    df_in["prev_val"] = df_in.loc[df_in[bool_col],var_col]
    df_in["prev_val"] = df_in["prev_val"].fillna(method='ffill')
    df_in["prev_marker_date"] = df_in.loc[df_in[bool_col],"date"]
    df_in["prev_marker_date"] = df_in["prev_marker_date"].fillna(method='ffill')
    return (df_in["prev_val"],df_in["prev_marker_date"])

In [22]:
#Create separate columns for pos and neg values - allows for normalisation
def pos_neg_cols(s_in,gt_lt = "GT"):
    if gt_lt.upper() == "GT":
        bool_s = s_in >= 0
    elif gt_lt.upper() == "LT":
        bool_s = s_in <= 0
    df_out = s_in.to_frame()
    df_out["s_in"] = s_in
    df_out["val"] = abs(s_in[bool_s])
    val_s = df_out["val"].fillna(0,method=None)
    return (bool_s,val_s)

In [23]:
#Function for calculating the max and mins within a period
def max_min_period(s_in,periods:int=4):
    #Copy into max and min
    max_s = s_in.copy()
    min_s = s_in.copy()
    #Go through periods and mark where it is a new max or min
    for i in range(1,(periods+1)):
        #Calc max
        tmp_check_s = s_in.shift(i) >= max_s
        max_s[tmp_check_s] = s_in.shift(i)
        #Calc min
        tmp_check_s = s_in.shift(i) <= min_s
        min_s[tmp_check_s] = s_in.shift(i)
    #Normalise the series
    s_out = norm_s(s_in,max_s,min_s)
    return s_out

In [24]:
#Create a single function to run each stock through feature creation
def create_features(df_in):
    df_out = df_in.copy()
    
    #Calc vol as proportion of previous n-rows
    df_out["prop_vol"] = calc_prop_of_prev(df_out["volume"].copy().astype("float"),6)

    #Get period-period changes
    df_out["close_shift1"] = df_out["close"].shift(1)
    df_out["change_close_shift1"],df_out["per_change_close_shift1"] = calc_changes(df_out[["close","close_shift1"]].copy(),"close","close_shift1")

    df_out["vol_shift1"] = df_out["volume"].shift(1)
    df_out["change_vol_shift1"],df_out["per_change_vol_shift1"] = calc_changes(df_out[["volume","vol_shift1"]].copy(),"volume","vol_shift1")

    df_out["macd_shift1"] = df_out["macd"].shift(1)
    df_out["change_macd_shift1"],df_out["per_change_macd_shift1"] = calc_changes(df_out[["macd","vol_shift1"]].copy(),"macd","vol_shift1")

    df_out["ema26_shift1"] = df_out["ema26"].shift(1)
    df_out["change_ema26_shift1"],df_out["per_change_ema26_shift1"] = calc_changes(df_out[["ema26","ema26_shift1"]].copy(),"ema26","ema26_shift1")
    
    #Compare close to the max/mins within 13, 26 and 52 periods
    df_out["close_13_norm"] = max_min_period(df_out["close"],13)
    df_out["close_26_norm"] = max_min_period(df_out["close"],26)
    df_out["close_52_norm"] = max_min_period(df_out["close"],52)
    
    #Compare macd to the max/mins within 13, 26 and 52 periods
    df_out["macd_line_13_norm"] = max_min_period(df_out["macd_line"],13)
    df_out["macd_line_26_norm"] = max_min_period(df_out["macd_line"],26)
    df_out["macd_line_52_norm"] = max_min_period(df_out["macd_line"],52)
        
    #Mark points of macd positive entry
    df_out["macd_pos_ent"] = pos_entry(df_out["macd"])
    df_out["macd_neg_ent"] = neg_entry(df_out["macd"])
    
    #Create separate columns for pos and neg values - allows for normalisation
    df_out["macd_pos_bool"],df_out["macd_pos_val"] = pos_neg_cols(df_out["macd"],gt_lt = "GT")
    df_out["macd_neg_bool"],df_out["macd_neg_val"] = pos_neg_cols(df_out["macd"],gt_lt = "LT")
    df_out["signal_line_pos_bool"],df_out["signal_line_pos_val"] = pos_neg_cols(df_out["signal_line"],gt_lt = "GT")
    df_out["signal_line_neg_bool"],df_out["signal_line_neg_val"] = pos_neg_cols(df_out["signal_line"],gt_lt = "LT")
    df_out["change_price_pos_bool"],df_out["change_price_pos_val"] = pos_neg_cols(df_out["change_price"],gt_lt = "GT")
    df_out["change_price_neg_bool"],df_out["change_price_neg_val"] = pos_neg_cols(df_out["change_price"],gt_lt = "LT")
    df_out["per_change_price_pos_bool"],df_out["per_change_price_pos_val"] = pos_neg_cols(df_out["per_change_price"],gt_lt = "GT")
    df_out["per_change_price_neg_bool"],df_out["per_change_price_neg_val"] = pos_neg_cols(df_out["per_change_price"],gt_lt = "LT")

    #Create max min columns
    def mk_cols_max_min(tmp_df,col,period:int=4):
        tmp_df["{}_min".format(col)] = flag_mins(tmp_df[col],period)
        tmp_df["{}_max".format(col)] = flag_maxs(tmp_df[col],period)
        
    #Find last max and mins
    def mk_cols_prev_max_min(tmp_df,col,period:int=4):
        tmp_df["prev_max_{}".format(col)],tmp_df["prev_max_{}_date".format(col)] = prev_value(tmp_df[["date",col,"{}_max".format(col)]].copy(),col,"{}_max".format(col))
        tmp_df["prev_min_{}".format(col)],tmp_df["prev_min_{}_date".format(col)] = prev_value(tmp_df[["date",col,"{}_min".format(col)]].copy(),col,"{}_min".format(col))
        #Drop the old columns
        tmp_df.drop(columns=["{}_min".format(col),"{}_max".format(col)],inplace=True)
        #Shift the max min columns by n periods to not leak future information
        tmp_df["prev_max_{}".format(col)] = tmp_df["prev_max_{}".format(col)].shift(period)
        tmp_df["prev_min_{}".format(col)] = tmp_df["prev_min_{}".format(col)].shift(period)
        tmp_df["prev_max_{}_date".format(col)] = tmp_df["prev_max_{}_date".format(col)].shift(period)
        tmp_df["prev_min_{}_date".format(col)] = tmp_df["prev_min_{}_date".format(col)].shift(period)
    
    #Calc the value changes and percentage changes of these movements
    def mk_cols_prev_max_min_change(tmp_df,col):
        tmp_df["max_change_{}".format(col)],tmp_df["max_per_change_{}".format(col)] = calc_changes(tmp_df[[col,"prev_max_{}".format(col)]].copy(),col,"prev_max_{}".format(col))
        tmp_df["min_change_{}".format(col)],tmp_df["min_per_change_{}".format(col)] = calc_changes(tmp_df[[col,"prev_min_{}".format(col)]].copy(),col,"prev_min_{}".format(col))
     
        
    #Mark date change since max and mins and convert to periods
    def mk_cols_prev_max_min_date_change(tmp_df,col,period:int=7):
        tmp_df["prev_max_{}_date_change".format(col)] = tmp_df["date"] - tmp_df["prev_max_{}_date".format(col)]
        tmp_df["prev_min_{}_date_change".format(col)] = tmp_df["date"] - tmp_df["prev_min_{}_date".format(col)]
        #Convert all to period changes
        tmp_df["prev_max_{}_date_change".format(col)] = [np.floor(x.days/period) for x in tmp_df["prev_max_{}_date_change".format(col)]]
        tmp_df["prev_min_{}_date_change".format(col)] = [np.floor(x.days/period) for x in tmp_df["prev_min_{}_date_change".format(col)]]
    
    #Convert the max min changes into pos and neg columns
    def mk_cols_prev_max_min_change_pos_neg(tmp_df,col):
        #Only do pos changes for mins and neg changes for max (as it can only go below the max and above the min)
        tmp_df["max_change_{}_pos_bool".format(col)],tmp_df["max_change_{}_pos_val".format(col)] = pos_neg_cols(tmp_df["max_change_{}".format(col)],gt_lt = "GT")
        tmp_df["max_change_{}_neg_bool".format(col)],tmp_df["max_change_{}_neg_val".format(col)] = pos_neg_cols(tmp_df["max_change_{}".format(col)],gt_lt = "LT")
        tmp_df["min_change_{}_pos_bool".format(col)],tmp_df["min_change_{}_pos_val".format(col)] = pos_neg_cols(tmp_df["min_change_{}".format(col)],gt_lt = "GT")
        tmp_df["min_change_{}_neg_bool".format(col)],tmp_df["min_change_{}_neg_val".format(col)] = pos_neg_cols(tmp_df["min_change_{}".format(col)],gt_lt = "LT")
        #Drop the old columns
        tmp_df.drop(columns=["max_change_{}".format(col),"min_change_{}".format(col)],inplace=True)

    #Run function for columns - high volatility
    for col in ['close','signal_line']:
        mk_cols_max_min(df_out,col,period_high_volatility)
        mk_cols_prev_max_min(df_out,col,period_high_volatility)
        mk_cols_prev_max_min_change(df_out,col) 
        mk_cols_prev_max_min_date_change(df_out,col,7)
        mk_cols_prev_max_min_change_pos_neg(df_out,col)
    #Run function for columns - low volatility
    for col in ['macd','ema26','macd_line']:
        mk_cols_max_min(df_out,col,period_low_volatility)
        mk_cols_prev_max_min(df_out,col,period_low_volatility)
        mk_cols_prev_max_min_change(df_out,col) 
        mk_cols_prev_max_min_date_change(df_out,col,7)
        mk_cols_prev_max_min_change_pos_neg(df_out,col)
    
    #Get buy signals
    df_out["buy"] = get_buys(df_out[["date","open","close","prev_max_close"]])
    
    #Get buy signals
    df_out["sell"] = get_sells(df_out[["date","open","close","prev_min_close"]])
    
    #Get hold signals
    df_out["hold"] = (df_out["buy"] == False) & (df_out["sell"] == False)
    
    #Check for undefined
    df_out["no_signal"] = (df_out["buy"] == False) & (df_out["hold"] == False) & (df_out["sell"] == False)
    
    #Composite all singals into one column
    df_out["signal"] = None
    df_out.loc[df_out["buy"] == True,"signal"] = "buy"
    df_out.loc[df_out["hold"] == True,"signal"] = "hold"
    df_out.loc[df_out["sell"] == True,"signal"] = "sell"
    df_out.drop(columns=["buy","hold","sell"],inplace=True)
    print("SIGNAL COUNTS: {}".format(df_out["signal"].value_counts()))
    
    #Normalise the columns which need it
    norm_cols = [
        #Standard features
        "open"
        ,"close"
        ,"high"
        ,"low"
        ,"volume"
        ,"change_price"
        ,"per_change_price"
        #Proportional features
        ,"prop_vol"
        #Shifted features
        ,"close_shift1"
        ,"change_close_shift1"
        ,"vol_shift1"
        ,"change_vol_shift1"
        ,"ema26_shift1"
        ,"change_ema26_shift1"
        #Pos neg features
        ,"macd_pos_val"
        ,"macd_neg_val"
        ,"signal_line_pos_val"
        ,"signal_line_neg_val"
        ,"change_price_pos_val"
        ,"change_price_neg_val"
        ,"per_change_price_pos_val"
        ,"per_change_price_neg_val"
    ]
    #Append additional columns for key areas
    for col in ['close','macd','ema26','signal_line','macd_line']:
        #Prev max/min features
        norm_cols.append("prev_max_{}".format(col))
        norm_cols.append("prev_min_{}".format(col))
        #Min max change features
        norm_cols.append("max_change_{}_pos_val".format(col))
        norm_cols.append("max_change_{}_neg_val".format(col))
        norm_cols.append("min_change_{}_pos_val".format(col))
        norm_cols.append("min_change_{}_neg_val".format(col))
    #Reset the index
    df_out.reset_index(inplace=True,drop=True)
    #Normalise
    for col in norm_cols:
        tmp_s = df_out[col].copy() #Take a copy so as the values are changed this does not affect following calculations
        df_out[col] = [norm_time_s(x,tmp_s) for x in df_out.index]
    
    return df_out

In [25]:
#Define the columns for the output
out_cols = [
    #NON-NORMALISED COLS
    "ticker"
    ,"date"
    #NORMALISED COLS
    #Standard features
    ,"open"
    ,"close"
    ,"high"
    ,"low"
    ,"volume"
    ,"change_price"
    ,"per_change_price"
    #Shifted features
    ,"close_shift1"
    ,"change_close_shift1"
    ,"vol_shift1"
    ,"change_vol_shift1"
    ,"ema26_shift1"
    ,"change_ema26_shift1"
    #change to periodic max mins
    ,"close_13_norm"
    ,"close_26_norm"
    ,"close_52_norm"
    ,"macd_line_13_norm"
    ,"macd_line_26_norm"
    ,"macd_line_52_norm"
    #Pos neg features
    ,"macd_pos_val"
    ,"macd_neg_val"
    ,"signal_line_pos_val"
    ,"signal_line_neg_val"
    ,"change_price_pos_val"
    ,"change_price_neg_val"
    ,"per_change_price_pos_val"
    ,"per_change_price_neg_val"
]
#Append additional columns for key areas
for col in ['close','macd','ema26','signal_line','macd_line']:
    out_cols
    #Prev max/min features
#     out_cols.append("prev_max_{}".format(col))
#     out_cols.append("prev_min_{}".format(col))
    #date changes
    out_cols.append("prev_max_{}_date_change".format(col))
    out_cols.append("prev_min_{}_date_change".format(col))
    #Min max change features
    out_cols.append("max_change_{}_pos_val".format(col))
    out_cols.append("max_change_{}_neg_val".format(col))
    out_cols.append("min_change_{}_pos_val".format(col))
    out_cols.append("min_change_{}_neg_val".format(col))
#Append signal
out_cols.append("signal")

In [26]:
#Create a dictionary of max character lengths of fields for use later in h5 file appending
def get_col_lens(_df_in):
    _col_lens = {}
    for c in _df_in:
        _tmp_s = pd.Series([len(str(x)) for x in _df_in[c]])
        _col_lens[c] = _tmp_s.max()
    return _col_lens
col_lens = get_col_lens(df_prices_w)
col_lens

{'ticker': 4,
 'date': 19,
 'high': 7,
 'low': 7,
 'volume': 12,
 'open': 7,
 'close': 7,
 'ema12': 18,
 'ema26': 18,
 'macd_line': 23,
 'signal_line': 23,
 'macd': 23,
 'change_price': 21,
 'per_change_price': 23}

In [27]:
#Then loop the tickers and combine these into one large dataset
hf_store_name = path+'all_hist_prices_w_ft_eng2_TMP.h5'
h_store = pd.HDFStore(hf_store_name)
count = 0
errors = []
run_time = process_time()
for tick in tick_ftse["ticker"]:
    try:
        run_time.show_latest_lap_time()
        run_time.lap()
        count += 1
        print("\n{}".format(count))
        print("RUN FOR {}".format(tick))
        #Isolate this ticker
        this_tick_df = df_prices_w[df_prices_w["ticker"] == re.sub('[^a-zA-Z0-9\-]','',tick)].copy()
        print("shape before: {}".format(this_tick_df.shape))
        #Get the features and buy,sell signals
        this_tick_df = create_features(this_tick_df)
#         print(this_tick_df[out_cols])
        #Clarify col_lens with cur cols in data
        col_lens_tmp = {}
        for col in out_cols:
            if col in col_lens:
                col_lens_tmp[col] = col_lens[col]
        print("shape after: {}".format(this_tick_df.shape))
        #Append this data to the group
        try:
            this_tick_df[out_cols].to_hdf(hf_store_name,key='weekly_data',append=True,min_itemsize=col_lens_tmp)
            print('ADDED TO {}'.format(hf_store_name))
        except Exception as e:
            print('ERROR READING TO FILE {}'.format(e))
    except Exception as e:
        h_store.close()
        errors.append({"ticker":tick,"Error":e})
        print('ERROR PROCESSING DATA {}'.format(e))
# h_store.close()
print('\n\n')
run_time.end()
print('\nERROR COUNT: {}'.format(len(errors)))
if len(errors) > 0:
    print('    ERRORS -> {}'.format(errors))


1
RUN FOR III
shape before: (608, 14)
BUY SIGNALS: 75
SELL SIGNALS: 216
SIGNAL COUNTS: hold    342
sell    216
buy      50
Name: signal, dtype: int64


  """
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


shape after: (608, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 1 TIME -> 0:0:0

2
RUN FOR ADM
shape before: (365, 14)
BUY SIGNALS: 23
SELL SIGNALS: 70
SIGNAL COUNTS: hold    279
sell     70
buy      16
Name: signal, dtype: int64
shape after: (365, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 2 TIME -> 0:0:11

3
RUN FOR AAL
shape before: (1009, 14)
BUY SIGNALS: 149
SELL SIGNALS: 466
SIGNAL COUNTS: sell    466
hold    448
buy      95
Name: signal, dtype: int64
shape after: (1009, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 3 TIME -> 0:0:7

4
RUN FOR ANTO
shape before: (1047, 14)
BUY SIGNALS: 185
SELL SIGNALS: 383
SIGNAL COUNTS: hold    529
sell    383
buy     135
Name: signal, dtype: int64
shape after: (1047, 133)


ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 26 TIME -> 0:0:18

27
RUN FOR CRDA
shape before: (1039, 14)
BUY SIGNALS: 86
SELL SIGNALS: 299
SIGNAL COUNTS: hold    674
sell    299
buy      66
Name: signal, dtype: int64
shape after: (1039, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 27 TIME -> 0:0:17

28
RUN FOR DCC
shape before: (1021, 14)
BUY SIGNALS: 102
SELL SIGNALS: 332
SIGNAL COUNTS: hold    607
sell    332
buy      82
Name: signal, dtype: int64
shape after: (1021, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 28 TIME -> 0:0:18

29
RUN FOR DGE
shape before: (1047, 14)
BUY SIGNALS: 63
SELL SIGNALS: 274
SIGNAL COUNTS: hold    724
sell    274
buy      49
Name: signal, dtype: int64
shape after: (1047, 133)
ADDED TO C:\Us

BUY SIGNALS: 118
SELL SIGNALS: 403
SIGNAL COUNTS: hold    560
sell    403
buy      84
Name: signal, dtype: int64
shape after: (1047, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 52 TIME -> 0:0:21

53
RUN FOR LLOY
shape before: (1047, 14)
BUY SIGNALS: 130
SELL SIGNALS: 481
SIGNAL COUNTS: sell    481
hold    474
buy      92
Name: signal, dtype: int64
shape after: (1047, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 53 TIME -> 0:0:18

54
RUN FOR LSE
shape before: (945, 14)
BUY SIGNALS: 96
SELL SIGNALS: 275
SIGNAL COUNTS: hold    606
sell    275
buy      64
Name: signal, dtype: int64
shape after: (945, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 54 TIME -> 0:0:17

55
RUN FOR MKS
shape before: (1047, 14)
BUY SIGNALS: 10

BUY SIGNALS: 111
SELL SIGNALS: 417
SIGNAL COUNTS: hold    564
sell    417
buy      66
Name: signal, dtype: int64
shape after: (1047, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 77 TIME -> 0:0:17

78
RUN FOR SBRY
shape before: (1047, 14)
BUY SIGNALS: 101
SELL SIGNALS: 443
SIGNAL COUNTS: hold    527
sell    443
buy      77
Name: signal, dtype: int64
shape after: (1047, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 78 TIME -> 0:0:17

79
RUN FOR SDR
shape before: (1047, 14)
BUY SIGNALS: 151
SELL SIGNALS: 415
SIGNAL COUNTS: hold    519
sell    415
buy     113
Name: signal, dtype: int64
shape after: (1047, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 79 TIME -> 0:0:17

80
RUN FOR SMT
shape before: (134, 14)
BUY SIGNALS: 

BUY SIGNALS: 20
SELL SIGNALS: 168
SIGNAL COUNTS: hold    419
sell    168
buy      15
Name: signal, dtype: int64
shape after: (602, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 102 TIME -> 0:0:17

103
RUN FOR ACA
shape before: (493, 14)
BUY SIGNALS: 132
SELL SIGNALS: 260
SIGNAL COUNTS: sell    260
hold    153
buy      80
Name: signal, dtype: int64
shape after: (493, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 103 TIME -> 0:0:11

104
RUN FOR AGK
shape before: (1047, 14)
BUY SIGNALS: 161
SELL SIGNALS: 410
SIGNAL COUNTS: hold    519
sell    410
buy     118
Name: signal, dtype: int64
shape after: (1047, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 104 TIME -> 0:0:9

105
RUN FOR ATST
shape before: (608, 14)
BUY SIGNALS:

shape after: (272, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 127 TIME -> 0:0:17

128
RUN FOR BRSC
shape before: (153, 14)
BUY SIGNALS: 7
SELL SIGNALS: 17
SIGNAL COUNTS: hold    129
sell     17
buy       7
Name: signal, dtype: int64
shape after: (153, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 128 TIME -> 0:0:6

129
RUN FOR BGSC
shape before: (608, 14)
BUY SIGNALS: 27
SELL SIGNALS: 142
SIGNAL COUNTS: hold    449
sell    142
buy      17
Name: signal, dtype: int64
shape after: (608, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 129 TIME -> 0:0:4

130
RUN FOR BOY
shape before: (1047, 14)
BUY SIGNALS: 184
SELL SIGNALS: 430
SIGNAL COUNTS: hold    506
sell    430
buy     111
Name: signal, dtype: int64
shape after: (10

shape after: (1047, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 152 TIME -> 0:0:11

153
RUN FOR CRST
shape before: (341, 14)
BUY SIGNALS: 42
SELL SIGNALS: 155
SIGNAL COUNTS: sell    155
hold    153
buy      33
Name: signal, dtype: int64
shape after: (341, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 153 TIME -> 0:0:21

154
RUN FOR CYBG
shape before: (186, 14)
BUY SIGNALS: 16
SELL SIGNALS: 76
SIGNAL COUNTS: hold    98
sell    76
buy     12
Name: signal, dtype: int64
shape after: (186, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 154 TIME -> 0:0:9

155
RUN FOR DJAN
shape before: (1045, 14)
BUY SIGNALS: 70
SELL SIGNALS: 375
SIGNAL COUNTS: hold    619
sell    375
buy      51
Name: signal, dtype: int64
shape after: (10

shape after: (602, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 177 TIME -> 0:0:8

178
RUN FOR FSV
shape before: (602, 14)
BUY SIGNALS: 38
SELL SIGNALS: 171
SIGNAL COUNTS: hold    405
sell    171
buy      26
Name: signal, dtype: int64
shape after: (602, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 178 TIME -> 0:0:10

179
RUN FOR FGT
shape before: (602, 14)
BUY SIGNALS: 12
SELL SIGNALS: 133
SIGNAL COUNTS: hold    461
sell    133
buy       8
Name: signal, dtype: int64
shape after: (602, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 179 TIME -> 0:0:10

180
RUN FOR FGP
shape before: (1047, 14)
BUY SIGNALS: 159
SELL SIGNALS: 440
SIGNAL COUNTS: hold    489
sell    440
buy     118
Name: signal, dtype: int64
shape after: (1

shape after: (202, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 202 TIME -> 0:0:7

203
RUN FOR HAS
shape before: (1047, 14)
BUY SIGNALS: 129
SELL SIGNALS: 438
SIGNAL COUNTS: hold    522
sell    438
buy      87
Name: signal, dtype: int64
shape after: (1047, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 203 TIME -> 0:0:4

204
RUN FOR HRI
shape before: (602, 14)
BUY SIGNALS: 12
SELL SIGNALS: 154
SIGNAL COUNTS: hold    437
sell    154
buy      11
Name: signal, dtype: int64
shape after: (602, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 204 TIME -> 0:0:16

205
RUN FOR HGT
shape before: (603, 14)
BUY SIGNALS: 16
SELL SIGNALS: 142
SIGNAL COUNTS: hold    446
sell    142
buy      15
Name: signal, dtype: int64
shape after: (6

shape after: (608, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 227 TIME -> 0:0:5

228
RUN FOR JAM
shape before: (602, 14)
BUY SIGNALS: 26
SELL SIGNALS: 112
SIGNAL COUNTS: hold    467
sell    112
buy      23
Name: signal, dtype: int64
shape after: (602, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 228 TIME -> 0:0:10

229
RUN FOR JMG
shape before: (602, 14)
BUY SIGNALS: 31
SELL SIGNALS: 156
SIGNAL COUNTS: hold    419
sell    156
buy      27
Name: signal, dtype: int64
shape after: (602, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 229 TIME -> 0:0:11

230
RUN FOR JFJ
shape before: (608, 14)
BUY SIGNALS: 40
SELL SIGNALS: 158
SIGNAL COUNTS: hold    414
sell    158
buy      36
Name: signal, dtype: int64
shape after: (608

shape after: (602, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 252 TIME -> 0:0:21

253
RUN FOR NEX
shape before: (1047, 14)
BUY SIGNALS: 126
SELL SIGNALS: 374
SIGNAL COUNTS: hold    577
sell    374
buy      96
Name: signal, dtype: int64
shape after: (1047, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 253 TIME -> 0:0:13

254
RUN FOR NBLS
shape before: (431, 14)
BUY SIGNALS: 0
SELL SIGNALS: 37
SIGNAL COUNTS: hold    394
sell     37
Name: signal, dtype: int64
shape after: (431, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 254 TIME -> 0:0:21

255
RUN FOR NRR
shape before: (477, 14)
BUY SIGNALS: 10
SELL SIGNALS: 190
SIGNAL COUNTS: hold    277
sell    190
buy      10
Name: signal, dtype: int64
shape after: (477, 133)
AD

ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 277 TIME -> 0:0:17

278
RUN FOR PRTC
shape before: (219, 14)
BUY SIGNALS: 23
SELL SIGNALS: 82
SIGNAL COUNTS: hold    125
sell     82
buy      12
Name: signal, dtype: int64
shape after: (219, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 278 TIME -> 0:0:20

279
RUN FOR PZC
shape before: (1031, 14)
BUY SIGNALS: 84
SELL SIGNALS: 367
SIGNAL COUNTS: hold    594
sell    367
buy      70
Name: signal, dtype: int64
shape after: (1031, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 279 TIME -> 0:0:6

280
RUN FOR QQ
shape before: (707, 14)
BUY SIGNALS: 34
SELL SIGNALS: 258
SIGNAL COUNTS: hold    425
sell    258
buy      24
Name: signal, dtype: int64
shape after: (707, 133)
ADDED TO C:\User

BUY SIGNALS: 162
SELL SIGNALS: 450
SIGNAL COUNTS: hold    482
sell    450
buy     109
Name: signal, dtype: int64
shape after: (1041, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 303 TIME -> 0:0:17

304
RUN FOR SXX
shape before: (672, 14)
BUY SIGNALS: 179
SELL SIGNALS: 378
SIGNAL COUNTS: sell    378
hold    213
buy      81
Name: signal, dtype: int64
shape after: (672, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 304 TIME -> 0:0:17

305
RUN FOR SSON
shape before: (39, 14)
BUY SIGNALS: 0
SELL SIGNALS: 1
SIGNAL COUNTS: hold    38
sell     1
Name: signal, dtype: int64
shape after: (39, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 305 TIME -> 0:0:12

306
RUN FOR SCT
shape before: (198, 14)
BUY SIGNALS: 19
SELL SIGNALS: 4

SELL SIGNALS: 449
SIGNAL COUNTS: hold    452
sell    449
buy     138
Name: signal, dtype: int64
shape after: (1039, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 328 TIME -> 0:0:6

329
RUN FOR UDG
shape before: (1026, 14)
BUY SIGNALS: 99
SELL SIGNALS: 333
SIGNAL COUNTS: hold    614
sell    333
buy      79
Name: signal, dtype: int64
shape after: (1026, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 329 TIME -> 0:0:20

330
RUN FOR UKCM
shape before: (603, 14)
BUY SIGNALS: 11
SELL SIGNALS: 159
SIGNAL COUNTS: hold    435
sell    159
buy       9
Name: signal, dtype: int64
shape after: (603, 133)
ADDED TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5
LAP 330 TIME -> 0:0:20

331
RUN FOR ULE
shape before: (1043, 14)
BUY SIGNALS: 67
SELL SIGNALS

In [28]:
#close any open h5 files
tables.file._open_files.close_all()

Closing remaining open files:C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5...done


In [29]:
tmp_df = pd.read_hdf(hf_store_name,key='weekly_data',mode='r')
print("")
print("FINAL HDFSTORE SIZE: {}".format(tmp_df.shape))
print("FINAL BUY COUNT: {}".format(len(tmp_df[tmp_df["signal"] == "buy"])))
print("FINAL SELL COUNT: {}".format(len(tmp_df[tmp_df["signal"] == "sell"])))
h_store.close()
tmp_df.head(50)
# tmp_df[(tmp_df["ticker"] == 'ADM') & (tmp_df["date"] > '2013-12-01') & (tmp_df["date"] < '2014-02-01')].head(200)


FINAL HDFSTORE SIZE: (257300, 60)
FINAL BUY COUNT: 20322
FINAL SELL COUNT: 90868


Unnamed: 0,ticker,date,open,close,high,low,volume,change_price,per_change_price,close_shift1,...,max_change_signal_line_neg_val,min_change_signal_line_pos_val,min_change_signal_line_neg_val,prev_max_macd_line_date_change,prev_min_macd_line_date_change,max_change_macd_line_pos_val,max_change_macd_line_neg_val,min_change_macd_line_pos_val,min_change_macd_line_neg_val,signal
0,III,2007-12-31,,,,,,,,,...,,,,,,,,,,hold
1,III,2008-01-07,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,...,,,,1.0,1.0,,,,,hold
2,III,2008-01-14,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,,,,1.0,1.0,,,,,hold
3,III,2008-01-21,0.0,0.257812,0.333333,0.0,0.77644,1.0,1.0,0.0,...,,,,1.0,1.0,,,,,hold
4,III,2008-01-28,0.192308,0.9375,0.402299,0.478632,0.440501,1.0,1.0,0.257812,...,,,,1.0,1.0,,,,,hold
5,III,2008-02-04,0.759615,0.351562,0.591954,0.581197,0.636501,0.0,0.0,0.9375,...,,,,1.0,1.0,,,,,sell
6,III,2008-02-11,0.182692,0.289062,0.12069,0.504274,0.446567,0.580311,0.56775,0.351562,...,,,,1.0,1.0,,,,,sell
7,III,2008-02-18,0.288462,0.0,0.275862,0.282051,0.476198,0.088083,0.064953,0.289062,...,,,,1.0,1.0,,,,,sell
8,III,2008-02-25,0.004808,0.0,0.0,0.0,0.938878,0.0,0.0,0.0,...,,,,1.0,1.0,,,,,sell
9,III,2008-03-03,0.0,0.0,0.0,0.0,0.824143,0.295547,0.26956,0.0,...,,,,1.0,1.0,,,,,buy


In [30]:
#close any open h5 files
tables.file._open_files.close_all()

In [31]:
#Delete the old h5 file and rename the TMP
try:
    os.remove(path+'all_hist_prices_w_ft_eng2.h5')
    print('\nSUCCESSFULLY REMOVED {}'.format(path+'all_hist_prices_w_ft_eng2.h5'))
except Exception as e:
    print('\nERROR - REMOVING:{}'.format(e))
try:
    os.rename(path+'all_hist_prices_w_ft_eng2_TMP.h5',path+'all_hist_prices_w_ft_eng2.h5')
    print('\nSUCCESSFULLY RENAMED {} TO {}'.format(path+'all_hist_prices_w_ft_eng2_TMP.h5',path+'all_hist_prices_w_ft_eng2.h5'))
except Exception as e:
    print('\nERROR - RENAMING:{}'.format(e))


SUCCESSFULLY REMOVED C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2.h5

SUCCESSFULLY RENAMED C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2_TMP.h5 TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\all_hist_prices_w_ft_eng2.h5


In [32]:
#Export a list of the features for this model
file_object = open(path+r'random_forest_model_feature_list.txt','w')
feature_str = ''
for i in out_cols:
    feature_str += '{},'.format(i)
feature_str = feature_str[:-1]
file_object.write(feature_str)
file_object.close()
feature_str

'ticker,date,open,close,high,low,volume,change_price,per_change_price,close_shift1,change_close_shift1,vol_shift1,change_vol_shift1,ema26_shift1,change_ema26_shift1,close_13_norm,close_26_norm,close_52_norm,macd_line_13_norm,macd_line_26_norm,macd_line_52_norm,macd_pos_val,macd_neg_val,signal_line_pos_val,signal_line_neg_val,change_price_pos_val,change_price_neg_val,per_change_price_pos_val,per_change_price_neg_val,prev_max_close_date_change,prev_min_close_date_change,max_change_close_pos_val,max_change_close_neg_val,min_change_close_pos_val,min_change_close_neg_val,prev_max_macd_date_change,prev_min_macd_date_change,max_change_macd_pos_val,max_change_macd_neg_val,min_change_macd_pos_val,min_change_macd_neg_val,prev_max_ema26_date_change,prev_min_ema26_date_change,max_change_ema26_pos_val,max_change_ema26_neg_val,min_change_ema26_pos_val,min_change_ema26_neg_val,prev_max_signal_line_date_change,prev_min_signal_line_date_change,max_change_signal_line_pos_val,max_change_signal_line_neg_v