# Create indicators and find signals
This code is designed to calculate the below indicators:
- EMA
- MACD
- MACD mins and maxs
- MACD positive turns
- Support price
- Target price
- Prev week volume Vs 8 week average volume

We will also bring in if the target price was hit within 8 weeks for ML training and validation.

Each share has it's own trading pattern so we will train a different model for each share but use the same set of features, just the hyper paramenters will be tuned differenty.

In [1]:
#Import libraries
import pandas as pd
import numpy as np
import re

In [2]:
#Import the ftse list
path = "C:\\Users\\Robert\\Documents\\python_scripts\\stock_trading_ml_modelling\\historical_prices\\"
tick_ftse = pd.read_csv(path + "tick_ftse.csv")
tick_ftse = tick_ftse.iloc[:,1:]
tick_ftse.head()

Unnamed: 0,Company,Ticker
0,3I,III
1,ADMIRAL GROUP,ADM
2,ANGLO AMERICAN PLC,AAL
3,ANTOFAGASTA,ANTO
4,ASHTEAD GROUP,AHT


In [3]:
#Import and combine prices files
# df_prices_w = pd.read_csv(path + tick_ftse["Ticker"][1] + "_hist_prices_w.csv")
df_prices_w = pd.read_hdf(path + "all_hist_prices_w.h5")
#Drop unwanted columns
df_prices_w.drop(columns=["Unnamed: 0","Index"],inplace=True)
#Reformat columns where neccessary
df_prices_w["Date"] = df_prices_w["Date"].astype("datetime64")
print(df_prices_w.shape)
print(df_prices_w.dtypes)
df_prices_w.head()

(256364, 13)
Ticker               object
Date         datetime64[ns]
High                float64
Low                 float64
Volume              float64
Open                float64
Close               float64
Change              float64
EMA12               float64
EMA26               float64
MACD_line           float64
Signal              float64
MACD                float64
dtype: object


Unnamed: 0,Ticker,Date,High,Low,Volume,Open,Close,Change,EMA12,EMA26,MACD_line,Signal,MACD
0,3IN,2007-12-31,149.89,147.07,1373801.0,149.18,147.43,-1.75,,,,,
1,3IN,2008-01-07,149.54,147.07,2345191.0,147.07,148.83,1.76,,,,,
2,3IN,2008-01-14,150.59,147.43,2150049.0,148.48,149.18,0.7,,,,,
3,3IN,2008-01-21,154.82,145.32,3070968.0,147.78,152.0,4.22,,,,,
4,3IN,2008-01-28,154.82,148.83,2510972.0,149.89,152.0,2.11,,,,,


In [4]:
#Get in-row price change
def calc_changes(df_in,var_col,prev_col):
    df_in["change"] = df_in[var_col] - df_in[prev_col]
    df_in["per_change"] = df_in["change"] / df_in[prev_col]
    return (df_in["change"],df_in["per_change"])

#Relabel signal to signal_line
df_prices_w.rename(columns={"Signal":"signal_line"},inplace=True)
    
df_prices_w["change_price"],df_prices_w["per_change_price"] = calc_changes(df_prices_w[["Close","Open"]].copy(),"Close","Open")
df_prices_w.drop(columns=["Change"],inplace=True)
df_prices_w.head()

Unnamed: 0,Ticker,Date,High,Low,Volume,Open,Close,EMA12,EMA26,MACD_line,signal_line,MACD,change_price,per_change_price
0,3IN,2007-12-31,149.89,147.07,1373801.0,149.18,147.43,,,,,,-1.75,-0.011731
1,3IN,2008-01-07,149.54,147.07,2345191.0,147.07,148.83,,,,,,1.76,0.011967
2,3IN,2008-01-14,150.59,147.43,2150049.0,148.48,149.18,,,,,,0.7,0.004714
3,3IN,2008-01-21,154.82,145.32,3070968.0,147.78,152.0,,,,,,4.22,0.028556
4,3IN,2008-01-28,154.82,148.83,2510972.0,149.89,152.0,,,,,,2.11,0.014077


# Create additional features, buy signals, and sell signals
I am classifying a stock worth buying if it meets all the below criteria:
- The target price (previous max) is hit within x (set as variable target_price_period) periods proceeding this period
- The target price is an increase of x% (set as variable min_gain) over the proceeding day's open price
- There is a drop in the closing price of less x% (set as variable max_drop) between this period and the x periods proceeding this period

I am classifying the sell signals as:
- The close price dips below the target price (previous max) x (set as variable target_price_period) periods proceeding this period
- There is an increase in the closing price of less x% (set as variable max_drop) over the next x periods

In [5]:
#Define the variables
target_price_period = 12
min_max_period = 4
major_min_max_period = 26
min_gain = 0.1
max_drop = -0.05

In [6]:
#Programming note
#df.shift(1) looks 1 period into the past
#df.shift(-1) looks 1 period into the future

In [7]:
#Mark minimums and maximums
def flag_mins(s_in,period):
    s_out = 0
    for i in range(1,period):
        s_out += (s_in > s_in.shift(i)) | (s_in.shift(i).isnull())
        s_out += (s_in > s_in.shift(-i)) | (s_in.shift(-i).isnull())
        s_out += s_in.isnull()
    s_out = s_out == 0
    return s_out
def flag_maxs(s_in,period):
    s_out = 0
    for i in range(1,period):
        s_out += s_in < s_in.shift(i)
        s_out += s_in < s_in.shift(-i)
        s_out += s_in.isnull()
    s_out = s_out == 0
    return s_out

In [8]:
#Calc vol as proportion of previous n-rows
def calc_prop_of_prev(s_in,periods = 4):
    s_cum = s_in.copy()
    for i in range(1,periods):
        s_cum += s_in.shift(i)
    return s_in / s_cum

In [9]:
#Mark points of MACD positive entry
def pos_entry(s_in):
    return (s_in > s_in.shift(1)) & (s_in > 0) & (s_in.shift(1) < 0)
def neg_entry(s_in):
    return (s_in < s_in.shift(1)) & (s_in < 0) & (s_in.shift(1) > 0)

In [10]:
#Check if the target price is hit within the target_price_period
def target_hit_check(x,var_s,target_s,periods):
    check_s = False
    for i in range((x+1),(x+periods+1)):
        if (i >= 0) and (i < len(target_s)) and (target_s[x] <= var_s[i]):
            check_s = True
            break
    return check_s

In [11]:
def max_drop_check_b(ref_s,var_s,periods):
    lim_s = ref_s*(1+max_drop)
    check_s = lim_s == lim_s #Create an all true series
    for i in range(-(periods+1),-1):
        tmp_check_s = var_s.shift(i) >= lim_s #Treu if price is >= limit
        check_s = check_s & tmp_check_s
    return check_s

In [12]:
#Create a function for finding buy signals
def get_buys(df_in):
    tmp_df = df_in.copy()
    
    #Calc the target price
    tmp_df["target_price"] = tmp_df["Open"].shift(-1) * (1+min_gain)
    
    #Check if the target price is hit within the target_price_period
    tmp_df["buy_target_check"] = [target_hit_check(x,tmp_df["Close"],tmp_df["target_price"],target_price_period) for x in range(0,len(tmp_df))]
#     print("BUY_TARGET_CHECK COUNT:"+str(len(tmp_df[tmp_df["buy_target_check"]])))
    
    #min_gain check
#     tmp_df["gain"] = tmp_df["prev_max_close"] - tmp_df["Open"].shift(-1) #Open price for the next period
#     tmp_df["per_gain"] = tmp_df["gain"] / tmp_df["Open"].shift(-1) #Open price for the next period
#     tmp_df["buy_gain_check"] = tmp_df["per_gain"] >= min_gain
#     tmp_df.drop(columns=["gain","per_gain"],inplace=True)
    tmp_df["buy_gain_check"] = True
#     print("BUY_GAIN_CHECK COUNT:"+str(len(tmp_df[tmp_df["buy_gain_check"]])))
    
    #Check if the sell price is hit within the target_price_period
#     tmp_df["buy_drop_check"] = max_drop_check_b(tmp_df["Open"].shift(-1),tmp_df["Close"],target_price_period)
    tmp_df["buy_drop_check"] = True
#     print("BUY_DROP_CHECK COUNT:"+str(len(tmp_df[tmp_df["buy_drop_check"]])))
    
    #Find the buy signals
    tmp_df["buy"] = tmp_df["buy_target_check"] & tmp_df["buy_gain_check"] & tmp_df["buy_drop_check"]
    print("BUY SIGNALS: "+str(len(tmp_df[tmp_df["buy"]])))
    
    #Join back into main table
    s_out = tmp_df["buy"]
    
    return s_out

In [13]:
#Perform if the target is crossed again
def target_cross_check(x,var_s,target_s,periods):
    check_s = False
    for i in range((x+1),(x+periods+1)):
        if (i >= 0) and (i < len(target_s)) and (target_s[x] >= var_s[i]):
            check_s = True
            break
    return check_s

In [14]:
def max_drop_check_s(ref_s,var_s,periods):
    lim_s = ref_s*(1+max_drop)
    check_s = lim_s != lim_s #Start as False and then if it drops below the threshold turn it to True
    for i in range(-(periods+1),-1):
        tmp_check_s = var_s.shift(i) <= lim_s #If price drops below the threshold price then turn to True
        check_s = tmp_check_s | check_s
    return check_s

In [15]:
#Function for finding sell signals
def get_sells(df_in):
    tmp_df = df_in.copy()
    
    #Perform if the target is crossed again
    tmp_df["sell_target_check"] = [target_cross_check(x,tmp_df["Close"],tmp_df["prev_min_close"],target_price_period) for x in range(0,len(tmp_df))]
#     print("SELL_TARGET_CHECK COUNT:"+str(len(tmp_df[tmp_df["sell_target_check"]])))
    
    #Check if the target price is hit within the target_price_period
    tmp_df["sell_drop_check"] = max_drop_check_s(tmp_df["Open"].shift(-1),tmp_df["Close"],target_price_period)
#     print("SELL_DROP_CHECK COUNT:"+str(len(tmp_df[tmp_df["sell_drop_check"]])))
    
    #Find the sell signals
    tmp_df["sell"] = tmp_df["sell_target_check"] & tmp_df["sell_drop_check"]
    print("SELL SIGNALS: "+str(len(tmp_df[tmp_df["sell"]])))
    
    #Join back into main table
    s_out = tmp_df["sell"]
    
    return s_out

In [16]:
#Function to normalise current price compared to another
def norm_s(s_in,max_in,min_in):
    s_out = (s_in - min_in) / (max_in - min_in)
    s_out.fillna(1.0,inplace=True)
    return s_out

In [17]:
#Function to find last max and mins
def prev_value(df_in,var_col,bool_col):
    df_in["prev_val"] = df_in.loc[df_in[bool_col],var_col]
    df_in["prev_val"] = df_in["prev_val"].fillna(method='ffill')
    df_in["prev_marker_date"] = df_in.loc[df_in[bool_col],"Date"]
    df_in["prev_marker_date"] = df_in["prev_marker_date"].fillna(method='ffill')
    return (df_in["prev_val"],df_in["prev_marker_date"])

In [18]:
#Create separate columns for pos and neg values - allows for normalisation
def pos_neg_cols(s_in,gt_lt = "GT"):
    if gt_lt.upper() == "GT":
        bool_s = s_in > 0
    elif gt_lt.upper() == "LT":
        bool_s = s_in < 0
    df_out = s_in.to_frame()
    df_out["s_in"] = s_in
    df_out["val"] = abs(s_in[bool_s])
    val_s = df_out["val"].fillna(0,method=None)
    return (bool_s,val_s)

In [19]:
#Function for calculating the max and mins within a period
def max_min_period(s_in,periods):
    #Copy into max and min
    max_s = s_in.copy()
    min_s = s_in.copy()
    #Go through periods and mark where it is a new max or min
    for i in range(1,(periods+1)):
        #Calc max
        tmp_check_s = s_in.shift(i) >= max_s
        max_s[tmp_check_s] = s_in.shift(i)
        #Calc min
        tmp_check_s = s_in.shift(i) <= min_s
        min_s[tmp_check_s] = s_in.shift(i)
    #Normalis the series
    s_out = norm_s(s_in,max_s,min_s)
    return s_out

In [20]:
#Create a single function to run each stock through feature creation
def create_features(df_in):
    df_out = df_in.copy()
    
    #Calc vol as proportion of previous n-rows
    df_out["prop_vol"] = calc_prop_of_prev(df_out["Volume"].copy().astype("float"),6)

    #Get period-period changes
    df_out["close_shift1"] = df_out["Close"].shift(1)
    df_out["change_close_shift1"],df_out["per_change_close_shift1"] = calc_changes(df_out[["Close","close_shift1"]].copy(),"Close","close_shift1")

    df_out["vol_shift1"] = df_out["Volume"].shift(1)
    df_out["change_vol_shift1"],df_out["per_change_vol_shift1"] = calc_changes(df_out[["Volume","vol_shift1"]].copy(),"Volume","vol_shift1")

    df_out["MACD_shift1"] = df_out["MACD"].shift(1)
    df_out["change_MACD_shift1"],df_out["per_change_MACD_shift1"] = calc_changes(df_out[["MACD","vol_shift1"]].copy(),"MACD","vol_shift1")

    df_out["EMA26_shift1"] = df_out["EMA26"].shift(1)
    df_out["change_EMA26_shift1"],df_out["per_change_EMA26_shift1"] = calc_changes(df_out[["EMA26","EMA26_shift1"]].copy(),"EMA26","EMA26_shift1")

    #MACD max mins
    view_range = 4 #This variable decides how far forward or back to look
    df_out["MACD_min"] = flag_mins(df_out["MACD"],min_max_period)
    df_out["MACD_max"] = flag_maxs(df_out["MACD"],min_max_period)
    #Price max mins
    df_out["close_min"] = flag_mins(df_out["Close"],min_max_period)
    df_out["close_max"] = flag_maxs(df_out["Close"],min_max_period)
    #EMA26 max mins
    df_out["EMA26_min"] = flag_mins(df_out["EMA26"],min_max_period)
    df_out["EMA26_max"] = flag_maxs(df_out["EMA26"],min_max_period)
    #signal_line max mins
    df_out["signal_line_min"] = flag_mins(df_out["signal_line"],min_max_period)
    df_out["signal_line_max"] = flag_maxs(df_out["signal_line"],min_max_period)
    
    #Compare Close to the max/mins within 13, 26 and 52 periods
    df_out["close_13_norm"] = max_min_period(df_out["Close"],13)
    df_out["close_26_norm"] = max_min_period(df_out["Close"],26)
    df_out["close_52_norm"] = max_min_period(df_out["Close"],52)
    
    #Find last max and mins
    df_out["prev_max_close"],df_out["prev_max_close_date"] = prev_value(df_out[["Date","Close","close_max"]].copy(),"Close","close_max")
    df_out["prev_min_close"],df_out["prev_min_close_date"] = prev_value(df_out[["Date","Close","close_min"]].copy(),"Close","close_min")
    df_out["prev_max_MACD"],df_out["prev_max_MACD_date"] = prev_value(df_out[["Date","MACD","MACD_max"]].copy(),"MACD","MACD_max")
    df_out["prev_min_MACD"],df_out["prev_min_MACD_date"] = prev_value(df_out[["Date","MACD","MACD_min"]].copy(),"MACD","MACD_min")
    df_out["prev_max_EMA26"],df_out["prev_max_EMA26_date"] = prev_value(df_out[["Date","EMA26","EMA26_max"]].copy(),"EMA26","EMA26_max")
    df_out["prev_min_EMA26"],df_out["prev_min_EMA26_date"] = prev_value(df_out[["Date","EMA26","EMA26_min"]].copy(),"EMA26","EMA26_min")
    df_out["prev_max_signal"],df_out["prev_max_signal_date"] = prev_value(df_out[["Date","signal_line","signal_line_max"]].copy(),"signal_line","signal_line_max")
    df_out["prev_min_signal"],df_out["prev_min_signal_date"] = prev_value(df_out[["Date","signal_line","signal_line_min"]].copy(),"signal_line","signal_line_min")
    
    #Drop the columns which are no longer needed
    df_out.drop(columns=["MACD_min","MACD_max","close_min","close_max","EMA26_min","EMA26_max"],inplace=True)
    
    #Calc the value changes and percentage changes of these movements
    df_out["max_change_close"],df_out["max_per_change_close"] = calc_changes(df_out[["Close","prev_max_close"]].copy(),"Close","prev_max_close")
    df_out["min_change_close"],df_out["min_per_change_close"] = calc_changes(df_out[["Close","prev_min_close"]].copy(),"Close","prev_min_close")
    df_out["max_change_MACD"],df_out["max_per_change_MACD"] = calc_changes(df_out[["MACD","prev_max_MACD"]].copy(),"MACD","prev_max_MACD")
    df_out["min_change_MACD"],df_out["min_per_change_MACD"] = calc_changes(df_out[["MACD","prev_min_MACD"]].copy(),"MACD","prev_min_MACD")
    df_out["max_change_EMA26"],df_out["max_per_change_EMA26"] = calc_changes(df_out[["EMA26","prev_max_EMA26"]].copy(),"EMA26","prev_max_EMA26")
    df_out["min_change_EMA26"],df_out["min_per_change_EMA26"] = calc_changes(df_out[["EMA26","prev_min_EMA26"]].copy(),"EMA26","prev_min_EMA26")
    df_out["max_change_signal"],df_out["max_per_change_signal"] = calc_changes(df_out[["signal_line","prev_max_signal"]].copy(),"signal_line","prev_max_signal")
    df_out["min_change_signal"],df_out["min_per_change_signal"] = calc_changes(df_out[["signal_line","prev_min_signal"]].copy(),"signal_line","prev_min_signal")
    
    #Mark date change since max and mins and convert to periods
    df_out["prev_max_close_date_change"] = df_out["Date"] - df_out["prev_max_close_date"]
    df_out["prev_min_close_date_change"] = df_out["Date"] - df_out["prev_min_close_date"]
    df_out["prev_max_MACD_date_change"] = df_out["Date"] - df_out["prev_max_MACD_date"]
    df_out["prev_min_MACD_date_change"] = df_out["Date"] - df_out["prev_min_MACD_date"]
    df_out["prev_max_EMA26_date_change"] = df_out["Date"] - df_out["prev_max_EMA26_date"]
    df_out["prev_min_EMA26_date_change"] = df_out["Date"] - df_out["prev_min_EMA26_date"]
    df_out["prev_max_signal_date_change"] = df_out["Date"] - df_out["prev_max_signal_date"]
    df_out["prev_min_signal_date_change"] = df_out["Date"] - df_out["prev_min_signal_date"]
    
    #Convert all to period changes
    period = 7
    df_out.prev_max_close_date_change = [np.floor(x.days/period) for x in df_out.prev_max_close_date_change]
    df_out.prev_min_close_date_change = [np.floor(x.days/period) for x in df_out.prev_min_close_date_change]
    df_out.prev_max_MACD_date_change = [np.floor(x.days/period) for x in df_out.prev_max_MACD_date_change]
    df_out.prev_min_MACD_date_change = [np.floor(x.days/period) for x in df_out.prev_min_MACD_date_change]
    df_out.prev_max_EMA26_date_change = [np.floor(x.days/period) for x in df_out.prev_max_EMA26_date_change]
    df_out.prev_min_EMA26_date_change = [np.floor(x.days/period) for x in df_out.prev_min_EMA26_date_change]
    df_out.prev_max_signal_date_change = [np.floor(x.days/period) for x in df_out.prev_max_signal_date_change]
    df_out.prev_min_signal_date_change = [np.floor(x.days/period) for x in df_out.prev_min_signal_date_change]
        
    #Mark points of MACD positive entry
    df_out["MACD_pos_ent"] = pos_entry(df_out["MACD"])
    df_out["MACD_neg_ent"] = neg_entry(df_out["MACD"])
    
    #Create separate columns for pos and neg values - allows for normalisation
    df_out["MACD_pos_bool"],df_out["MACD_pos_val"] = pos_neg_cols(df_out["MACD"],gt_lt = "GT")
    df_out["MACD_neg_bool"],df_out["MACD_neg_val"] = pos_neg_cols(df_out["MACD"],gt_lt = "LT")
    df_out["signal_pos_bool"],df_out["signal_pos_val"] = pos_neg_cols(df_out["signal_line"],gt_lt = "GT")
    df_out["signal_neg_bool"],df_out["signal_neg_val"] = pos_neg_cols(df_out["signal_line"],gt_lt = "LT")
    df_out["change_price_pos_bool"],df_out["change_price_pos_val"] = pos_neg_cols(df_out["change_price"],gt_lt = "GT")
    df_out["change_price_neg_bool"],df_out["change_price_neg_val"] = pos_neg_cols(df_out["change_price"],gt_lt = "LT")
    df_out["per_change_price_pos_bool"],df_out["per_change_price_pos_val"] = pos_neg_cols(df_out["per_change_price"],gt_lt = "GT")
    df_out["per_change_price_neg_bool"],df_out["per_change_price_neg_val"] = pos_neg_cols(df_out["per_change_price"],gt_lt = "LT")
    #Prev max mins
    df_out["max_change_close_pos_bool"],df_out["max_change_close_pos_val"] = pos_neg_cols(df_out["max_change_close"],gt_lt = "GT")
    df_out["max_change_close_neg_bool"],df_out["max_change_close_neg_val"] = pos_neg_cols(df_out["max_change_close"],gt_lt = "LT")
    df_out["min_change_close_pos_bool"],df_out["min_change_close_pos_val"] = pos_neg_cols(df_out["min_change_close"],gt_lt = "GT")
    df_out["min_change_close_neg_bool"],df_out["min_change_close_neg_val"] = pos_neg_cols(df_out["min_change_close"],gt_lt = "LT")
    df_out["max_change_MACD_pos_bool"],df_out["max_change_MACD_pos_val"] = pos_neg_cols(df_out["max_change_MACD"],gt_lt = "GT")
    df_out["max_change_MACD_neg_bool"],df_out["max_change_MACD_neg_val"] = pos_neg_cols(df_out["max_change_MACD"],gt_lt = "LT")
    df_out["min_change_MACD_pos_bool"],df_out["min_change_MACD_pos_val"] = pos_neg_cols(df_out["min_change_MACD"],gt_lt = "GT")
    df_out["min_change_MACD_neg_bool"],df_out["min_change_MACD_neg_val"] = pos_neg_cols(df_out["min_change_MACD"],gt_lt = "LT")
    df_out["max_change_EMA26_pos_bool"],df_out["max_change_EMA26_pos_val"] = pos_neg_cols(df_out["max_change_EMA26"],gt_lt = "GT")
    df_out["max_change_EMA26_neg_bool"],df_out["max_change_EMA26_neg_val"] = pos_neg_cols(df_out["max_change_EMA26"],gt_lt = "LT")
    df_out["min_change_EMA26_pos_bool"],df_out["min_change_EMA26_pos_val"] = pos_neg_cols(df_out["min_change_EMA26"],gt_lt = "GT")
    df_out["min_change_EMA26_neg_bool"],df_out["min_change_EMA26_neg_val"] = pos_neg_cols(df_out["min_change_EMA26"],gt_lt = "LT")
    df_out["max_change_signal_pos_bool"],df_out["max_change_signal_pos_val"] = pos_neg_cols(df_out["max_change_signal"],gt_lt = "GT")
    df_out["max_change_signal_neg_bool"],df_out["max_change_signal_neg_val"] = pos_neg_cols(df_out["max_change_signal"],gt_lt = "LT")
    df_out["min_change_signal_pos_bool"],df_out["min_change_signal_pos_val"] = pos_neg_cols(df_out["min_change_signal"],gt_lt = "GT")
    df_out["min_change_signal_neg_bool"],df_out["min_change_signal_neg_val"] = pos_neg_cols(df_out["min_change_signal"],gt_lt = "LT")
    
    #Get buy signals
    df_out["buy"] = get_buys(df_out[["Date","Open","Close","prev_max_close"]])
    
    #Get buy signals
    df_out["sell"] = get_sells(df_out[["Date","Open","Close","prev_min_close"]])
    
    #Get hold signals
    df_out["hold"] = (df_out["buy"] == False) & (df_out["sell"] == False)
    print("HOLD COUNT:",str(len(df_out[df_out["hold"]])))
    
    #Check for undefined
    df_out["no_signal"] = (df_out["buy"] == False) & (df_out["hold"] == False) & (df_out["sell"] == False)
    print("NO_SIGNAL COUNT:",str(len(df_out[df_out["no_signal"]])))
    
    #Composite all singals into one column
    df_out["signal"] = None
    df_out.loc[df_out["buy"] == True,"signal"] = "buy"
    df_out.loc[df_out["hold"] == True,"signal"] = "hold"
    df_out.loc[df_out["sell"] == True,"signal"] = "sell"
    df_out.drop(columns=["buy","hold","sell"],inplace=True)
    
    #Normalise columns which need it
    norm_cols = [
        #Standard features
        "Open"
        ,"Close"
        ,"High"
        ,"Low"
        ,"Volume"
        ,"change_price"
        ,"per_change_price"
        #Proportional features
        ,"prop_vol"
        #Shifted features
        ,"close_shift1"
        ,"change_close_shift1"
        ,"vol_shift1"
        ,"change_vol_shift1"
        ,"EMA26_shift1"
        ,"change_EMA26_shift1"
        #Prev max/min features
        ,"prev_max_close"
        ,"prev_min_close"
        ,"prev_max_EMA26"
        ,"prev_min_EMA26"
        #Pos neg features
        ,"MACD_pos_val"
        ,"MACD_neg_val"
        ,"signal_pos_val"
        ,"signal_neg_val"
        ,"change_price_pos_val"
        ,"change_price_neg_val"
        ,"per_change_price_pos_val"
        ,"per_change_price_neg_val"
        ,"max_change_close_pos_val"
        ,"max_change_close_neg_val"
        ,"min_change_close_pos_val"
        ,"min_change_close_neg_val"
        ,"max_change_MACD_pos_val"
        ,"max_change_MACD_neg_val"
        ,"min_change_MACD_pos_val"
        ,"min_change_MACD_neg_val"
        ,"max_change_EMA26_pos_val"
        ,"max_change_EMA26_neg_val"
        ,"min_change_EMA26_pos_val"
        ,"min_change_EMA26_neg_val"
        ,"max_change_signal_pos_val"
        ,"max_change_signal_neg_val"
        ,"min_change_signal_pos_val"
        ,"min_change_signal_neg_val"
    ]
    for col in norm_cols:
        df_out[col] = (df_out[col] - df_out[col].min()) / (df_out[col].max() - df_out[col].min())
    
    return df_out

In [21]:
#Define the columns for the output
out_cols = [
    #NON-NORMALISED COLS
    "Ticker"
    ,"Date"
    #NORMALISED COLS
    #Standard features
    ,"Open"
    ,"Close"
    ,"High"
    ,"Low"
    ,"Volume"
    ,"change_price"
    ,"per_change_price"
    #Shifted features
    ,"close_shift1"
    ,"change_close_shift1"
    ,"vol_shift1"
    ,"change_vol_shift1"
    ,"EMA26_shift1"
    ,"change_EMA26_shift1"
    #Prev max/min features
    ,"prev_max_close"
    ,"prev_min_close"
    ,"prev_max_EMA26"
    ,"prev_min_EMA26"
    #Date changes
    ,"prev_max_close_date_change"
    ,"prev_min_close_date_change"
    ,"prev_max_MACD_date_change"
    ,"prev_min_MACD_date_change"
    ,"prev_max_EMA26_date_change"
    ,"prev_min_EMA26_date_change"
    ,"prev_max_signal_date_change"
    ,"prev_min_signal_date_change"
    #Change to periodic max mins
    ,"close_13_norm"
    ,"close_26_norm"
    ,"close_52_norm"
    #Pos neg features
    ,"MACD_pos_val"
    ,"MACD_neg_val"
    ,"signal_pos_val"
    ,"signal_neg_val"
    ,"change_price_pos_val"
    ,"change_price_neg_val"
    ,"per_change_price_pos_val"
    ,"per_change_price_neg_val"
    ,"max_change_close_pos_val"
    ,"max_change_close_neg_val"
    ,"min_change_close_pos_val"
    ,"min_change_close_neg_val"
    ,"max_change_MACD_pos_val"
    ,"max_change_MACD_neg_val"
    ,"min_change_MACD_pos_val"
    ,"min_change_MACD_neg_val"
    ,"max_change_EMA26_pos_val"
    ,"max_change_EMA26_neg_val"
    ,"min_change_EMA26_pos_val"
    ,"min_change_EMA26_neg_val"
    ,"max_change_signal_pos_val"
    ,"max_change_signal_neg_val"
    ,"min_change_signal_pos_val"
    ,"min_change_signal_neg_val"  
    #SIGNAL
    ,"signal"  
]

In [22]:
#Then loop the tickers and combine these into one large dataset
hf_store_name = path+'all_hist_prices_w_ft_eng2.h5'
h_store = pd.HDFStore(hf_store_name)
count = 0
error_li = []
for tick in tick_ftse["Ticker"]:
    try:
        print("")
        count += 1
        print("{}".format(count))
        print("RUN FOR {}".format(tick))
        #Isolate this ticker
        this_tick_df = df_prices_w[df_prices_w["Ticker"] == re.sub('[^a-zA-Z]','',tick)].copy()
        print("shape: {}".format(this_tick_df.shape))
        #Get the features and buy,sell signals
        this_tick_df = create_features(this_tick_df)
#         print(this_tick_df[out_cols])
        #Append this data to the group
        try:
            this_tick_df[out_cols].to_hdf(hf_store_name,key='wkly_data',append=True,min_itemsize={'Ticker':4}) #TEMP - PUT BACK IN
            print('ADDED TO {}'.format(hf_store_name))
        except Exception as e:
            print('ERROR READING TO FILE {}'.format(e))
    except Exception as e:
        h_store.close()
        error_li.append({"Ticker":tick,"Error":e})
        print('ERROR PROCESSING DATA {}'.format(e))
h_store.close()
error_li


1
RUN FOR III
shape: (601, 14)
BUY SIGNALS: 243
SELL SIGNALS: 214
HOLD COUNT: 180
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

2
RUN FOR ADM
shape: (361, 14)
BUY SIGNALS: 181
SELL SIGNALS: 70
HOLD COUNT: 131
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

3
RUN FOR AAL
shape: (1007, 14)
BUY SIGNALS: 498
SELL SIGNALS: 424
HOLD COUNT: 163
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

4
RUN FOR ANTO
shape: (1044, 14)
BUY SIGNALS: 568
SELL SIGNALS: 351
HOLD COUNT: 184
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

5
RUN FOR AHT
shape: (1022, 14)
BUY SIGNALS: 588
SELL SIGNALS: 348
HOLD COUNT: 176
NO

BUY SIGNALS: 171
SELL SIGNALS: 215
HOLD COUNT: 75
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

38
RUN FOR HLMA
shape: (1012, 14)
BUY SIGNALS: 382
SELL SIGNALS: 231
HOLD COUNT: 415
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

39
RUN FOR HL
shape: (628, 14)
BUY SIGNALS: 333
SELL SIGNALS: 178
HOLD COUNT: 149
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

40
RUN FOR HSX
shape: (1018, 14)
BUY SIGNALS: 399
SELL SIGNALS: 243
HOLD COUNT: 393
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

41
RUN FOR HSBA
shape: (1044, 14)
BUY SIGNALS: 234
SELL SIGNALS: 385
HOLD COUNT: 447
NO_SIGNAL COUNT: 0
ADDED TO C:

shape: (696, 14)
BUY SIGNALS: 333
SELL SIGNALS: 182
HOLD COUNT: 212
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

74
RUN FOR RR
shape: (1044, 14)
BUY SIGNALS: 424
SELL SIGNALS: 351
HOLD COUNT: 322
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

75
RUN FOR RBS
shape: (1044, 14)
BUY SIGNALS: 367
SELL SIGNALS: 475
HOLD COUNT: 271
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

76
RUN FOR RDSA
shape: (400, 14)
BUY SIGNALS: 104
SELL SIGNALS: 155
HOLD COUNT: 155
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

77
RUN FOR RSA
shape: (1044, 14)
BUY SIGNALS: 321
SELL SIGNALS: 431
HOLD COUNT: 338
NO_SIGNAL COU

BUY SIGNALS: 76
SELL SIGNALS: 30
HOLD COUNT: 73
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

110
RUN FOR ASHM
shape: (665, 14)
BUY SIGNALS: 294
SELL SIGNALS: 254
HOLD COUNT: 162
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

111
RUN FOR AGR
shape: (814, 14)
BUY SIGNALS: 232
SELL SIGNALS: 275
HOLD COUNT: 356
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

112
RUN FOR AML
shape: (65, 14)
BUY SIGNALS: 24
SELL SIGNALS: 25
HOLD COUNT: 21
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

113
RUN FOR AVST
shape: (61, 14)
BUY SIGNALS: 23
SELL SIGNALS: 0
HOLD COUNT: 38
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Ro

BUY SIGNALS: 47
SELL SIGNALS: 145
HOLD COUNT: 411
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

146
RUN FOR CKN
shape: (1025, 14)
BUY SIGNALS: 542
SELL SIGNALS: 277
HOLD COUNT: 252
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

147
RUN FOR CBG
shape: (1044, 14)
BUY SIGNALS: 389
SELL SIGNALS: 390
HOLD COUNT: 320
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

148
RUN FOR CLI
shape: (1043, 14)
BUY SIGNALS: 436
SELL SIGNALS: 305
HOLD COUNT: 342
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

149
RUN FOR COA
shape: (946, 14)
BUY SIGNALS: 365
SELL SIGNALS: 381
HOLD COUNT: 247
NO_SIGNAL COUNT: 0
ADDED TO

BUY SIGNALS: 139
SELL SIGNALS: 157
HOLD COUNT: 318
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

182
RUN FOR FGT
shape: (600, 14)
BUY SIGNALS: 95
SELL SIGNALS: 112
HOLD COUNT: 399
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

183
RUN FOR FGP
shape: (1044, 14)
BUY SIGNALS: 412
SELL SIGNALS: 451
HOLD COUNT: 253
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

184
RUN FOR FSJ
shape: (1025, 14)
BUY SIGNALS: 444
SELL SIGNALS: 316
HOLD COUNT: 319
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

185
RUN FOR FRCL
shape: (685, 14)
BUY SIGNALS: 132
SELL SIGNALS: 159
HOLD COUNT: 396
NO_SIGNAL COUNT: 0
ADDED TO

BUY SIGNALS: 76
SELL SIGNALS: 56
HOLD COUNT: 70
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

218
RUN FOR IGG
shape: (741, 14)
BUY SIGNALS: 312
SELL SIGNALS: 239
HOLD COUNT: 217
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

219
RUN FOR IMI
shape: (1044, 14)
BUY SIGNALS: 424
SELL SIGNALS: 380
HOLD COUNT: 288
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

220
RUN FOR INCH
shape: (1044, 14)
BUY SIGNALS: 495
SELL SIGNALS: 356
HOLD COUNT: 270
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

221
RUN FOR ISAT
shape: (734, 14)
BUY SIGNALS: 291
SELL SIGNALS: 287
HOLD COUNT: 201
NO_SIGNAL COUNT: 0
ADDED TO 

BUY SIGNALS: 141
SELL SIGNALS: 124
HOLD COUNT: 339
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

254
RUN FOR MGAM
shape: (1044, 14)
BUY SIGNALS: 482
SELL SIGNALS: 370
HOLD COUNT: 254
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

255
RUN FOR MYI
shape: (601, 14)
BUY SIGNALS: 92
SELL SIGNALS: 156
HOLD COUNT: 356
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

256
RUN FOR NEX
shape: (1044, 14)
BUY SIGNALS: 362
SELL SIGNALS: 395
HOLD COUNT: 338
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

257
RUN FOR NBLS
shape: (429, 14)
BUY SIGNALS: 0
SELL SIGNALS: 33
HOLD COUNT: 396
NO_SIGNAL COUNT: 0
ADDED TO C

BUY SIGNALS: 497
SELL SIGNALS: 401
HOLD COUNT: 209
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

290
RUN FOR RTN
shape: (1044, 14)
BUY SIGNALS: 457
SELL SIGNALS: 387
HOLD COUNT: 275
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

291
RUN FOR RHIM
shape: (89, 14)
BUY SIGNALS: 41
SELL SIGNALS: 28
HOLD COUNT: 30
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

292
RUN FOR RCP
shape: (60, 14)
BUY SIGNALS: 0
SELL SIGNALS: 12
HOLD COUNT: 48
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

293
RUN FOR RSE
shape: (298, 14)
BUY SIGNALS: 48
SELL SIGNALS: 113
HOLD COUNT: 138
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\

BUY SIGNALS: 172
SELL SIGNALS: 198
HOLD COUNT: 243
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

326
RUN FOR TIFS
shape: (89, 14)
BUY SIGNALS: 42
SELL SIGNALS: 34
HOLD COUNT: 23
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

327
RUN FOR TCAP
shape: (966, 14)
BUY SIGNALS: 425
SELL SIGNALS: 391
HOLD COUNT: 224
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

328
RUN FOR TRY
shape: (601, 14)
BUY SIGNALS: 151
SELL SIGNALS: 179
HOLD COUNT: 279
NO_SIGNAL COUNT: 0
ADDED TO C:\Users\Robert\Documents\Python Scripts\Stock trading - ML modelling\Historical prices\all_hist_prices_w_ft_eng2.h5

329
RUN FOR TPK
shape: (1044, 14)
BUY SIGNALS: 397
SELL SIGNALS: 371
HOLD COUNT: 310
NO_SIGNAL COUNT: 0
ADDED TO C:

[]

In [23]:
tmp_df = pd.read_hdf(hf_store_name,key='wkly_data',mode='r')
print("")
print("FINAL HDFSTORE SIZE: {}".format(tmp_df.shape))
print("FINAL BUY COUNT: {}".format(len(tmp_df[tmp_df["signal"] == "buy"])))
print("FINAL SELL COUNT: {}".format(len(tmp_df[tmp_df["signal"] == "sell"])))
h_store.close()
tmp_df.head(50)
# tmp_df[(tmp_df["Ticker"] == 'ADM') & (tmp_df["Date"] > '2013-12-01') & (tmp_df["Date"] < '2014-02-01')].head(200)


FINAL HDFSTORE SIZE: (254719, 55)
FINAL BUY COUNT: 83821
FINAL SELL COUNT: 85871


Unnamed: 0,Ticker,Date,Open,Close,High,Low,Volume,change_price,per_change_price,close_shift1,...,min_change_MACD_neg_val,max_change_EMA26_pos_val,max_change_EMA26_neg_val,min_change_EMA26_pos_val,min_change_EMA26_neg_val,max_change_signal_pos_val,max_change_signal_neg_val,min_change_signal_pos_val,min_change_signal_neg_val,signal
0,III,2007-12-31,0.866039,0.813036,0.855672,0.848469,0.053557,0.564065,0.47674,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hold
1,III,2008-01-07,0.837258,0.771148,0.821356,0.799022,0.23639,0.5243,0.457015,0.842741,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hold
2,III,2008-01-14,0.784406,0.74765,0.767864,0.760208,0.325624,0.605302,0.493657,0.799322,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hold
3,III,2008-01-21,0.757195,0.764508,0.797134,0.724054,0.264801,0.730486,0.556505,0.774966,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hold
4,III,2008-01-28,0.778127,0.80895,0.803189,0.783603,0.173403,0.799705,0.58999,0.792439,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hold
5,III,2008-02-04,0.839874,0.770638,0.819843,0.796363,0.226728,0.515464,0.453051,0.838505,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hold
6,III,2008-02-11,0.77708,0.766551,0.778462,0.786793,0.175053,0.680412,0.530798,0.798793,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hold
7,III,2008-02-18,0.788592,0.72926,0.792087,0.759145,0.183115,0.540501,0.461945,0.794557,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hold
8,III,2008-02-25,0.757718,0.662852,0.738595,0.680987,0.308995,0.435935,0.407094,0.755904,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,buy
9,III,2008-03-03,0.677132,0.621475,0.659366,0.649085,0.277779,0.543446,0.456395,0.68707,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,buy
