# Trialing the Light Gradient Boost model
The code will run the logistic regression model on the FTSE 350 stock data and see what the profit would have been from investing with the below criteria:
- Starting with £10,000
- Never investing more than 10% of the value into a single stock
- Tracking balance so once the account is empty no more can be invested until shares are liqidated
- Shares are bought at the open of the day following the first buy signal, if shares are not already held
- Shares are sold at the open of the day following the first close signal after a hold period, if shares are held

Trading variables:
- Trades cost £2.50 to execute
- Spread is 1%

In [1]:
#Import models
import numpy as np
import pandas as pd
import math
import lightgbm as lgb
from sklearn.externals import joblib as jl
import os
import tables
from rf_modules import *



In [2]:
#Import and combine prices files
path = r'C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices'
df_ft = pd.read_hdf(path +r'\all_hist_prices_w_ft_eng2.h5')
print('SHAPE: {}'.format(df_ft.shape))
print(df_ft.columns)
df_ft.head()

SHAPE: (279075, 132)
Index(['ticker', 'date', 'open', 'close', 'high', 'low', 'volume',
       'change_price', 'per_change_price', 'ema26',
       ...
       'min_change_volume', 'prev_max_grad_volume', 'prev_min_grad_volume',
       'max_move_cum_volume', 'min_move_cum_volume',
       'long_prev_max_move_date_volume', 'long_prev_min_move_date_volume',
       'long_max_grad_volume', 'long_min_grad_volume', 'signal'],
      dtype='object', length=132)


Unnamed: 0,ticker,date,open,close,high,low,volume,change_price,per_change_price,ema26,...,min_change_volume,prev_max_grad_volume,prev_min_grad_volume,max_move_cum_volume,min_move_cum_volume,long_prev_max_move_date_volume,long_prev_min_move_date_volume,long_max_grad_volume,long_min_grad_volume,signal
127630,III,2007-12-31,,,,,,,,,...,,,,0,0,0.0,0.0,0.0,0.0,hold
127631,III,2008-01-07,0.0,0.0,0.0,0.0,1.0,0.0,,,...,,,,0,0,0.0,0.0,0.0,0.0,hold
127632,III,2008-01-14,0.0,0.0,0.0,0.0,1.0,0.0,,,...,,,,0,0,0.0,0.0,0.0,0.0,buy
127633,III,2008-01-21,0.0,0.257812,0.333333,0.0,0.77644,0.257812,1.0,,...,,,,0,0,0.0,0.0,0.0,0.0,hold
127634,III,2008-01-28,0.192308,0.9375,0.402299,0.478632,0.440501,0.745192,0.794872,,...,,,,0,0,0.0,0.0,0.0,0.0,sell


In [3]:
#Import to lr model
lgb_mod = jl.load(path+r'\lgb_model.joblib')
lgb_mod

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=22,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_leaves=848, objective=None,
               random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [10]:
print('SHAPE BEFORE: {}'.format(df_ft.shape))
# data_df = df_ft.replace([np.inf,-np.inf],np.nan).dropna(axis=0)
data_df = df_ft
print('SHAPE AFTER: {}'.format(data_df.shape))
data_df.head()

SHAPE BEFORE: (279075, 132)
SHAPE AFTER: (279075, 132)


Unnamed: 0,ticker,date,open,close,high,low,volume,change_price,per_change_price,ema26,...,min_change_volume,prev_max_grad_volume,prev_min_grad_volume,max_move_cum_volume,min_move_cum_volume,long_prev_max_move_date_volume,long_prev_min_move_date_volume,long_max_grad_volume,long_min_grad_volume,signal
127630,III,2007-12-31,,,,,,,,,...,,,,0,0,0.0,0.0,0.0,0.0,hold
127631,III,2008-01-07,0.0,0.0,0.0,0.0,1.0,0.0,,,...,,,,0,0,0.0,0.0,0.0,0.0,hold
127632,III,2008-01-14,0.0,0.0,0.0,0.0,1.0,0.0,,,...,,,,0,0,0.0,0.0,0.0,0.0,buy
127633,III,2008-01-21,0.0,0.257812,0.333333,0.0,0.77644,0.257812,1.0,,...,,,,0,0,0.0,0.0,0.0,0.0,hold
127634,III,2008-01-28,0.192308,0.9375,0.402299,0.478632,0.440501,0.745192,0.794872,,...,,,,0,0,0.0,0.0,0.0,0.0,sell


In [11]:
#Import feature_list
f = open(path + r'\lgb_model_feature_list.txt','r')
feature_li = f.read().split(',')
feature_li

['open',
 'close',
 'high',
 'low',
 'volume',
 'change_price',
 'per_change_price',
 'ema26',
 'macd',
 'signal_line',
 'macd_line',
 'close_orig_per_change_max_4',
 'close_orig_per_change_max_13',
 'close_orig_per_change_max_26',
 'close_orig_per_change_max_52',
 'close_orig_per_change_min_4',
 'close_orig_per_change_min_13',
 'close_orig_per_change_min_26',
 'close_orig_per_change_min_52',
 'macd_line_per_change_max_4',
 'macd_line_per_change_max_13',
 'macd_line_per_change_max_26',
 'macd_line_per_change_max_52',
 'macd_line_per_change_min_4',
 'macd_line_per_change_min_13',
 'macd_line_per_change_min_26',
 'macd_line_per_change_min_52',
 'change_close_shift1',
 'close_max',
 'close_min',
 'prev_max_close',
 'prev_min_close',
 'prev_max_close_date_change',
 'prev_min_close_date_change',
 'max_change_close',
 'min_change_close',
 'prev_max_grad_close',
 'prev_min_grad_close',
 'max_move_cum_close',
 'min_move_cum_close',
 'long_prev_max_move_date_close',
 'long_prev_min_move_date_cl

In [12]:
#Run the rf_mod to get signals
data_df['signal'] = lgb_mod.predict(data_df[feature_li])
data_df['signal_prob'] = [x.max() for x in lgb_mod.predict_proba(data_df[feature_li])]

In [13]:
#Show current buy ratings
print('BUY COUNT: {:,}'.format(data_df.loc[(data_df['date'] == data_df['date'].max()) & (data_df['signal'] == 'buy')].shape[0]))
data_df.loc[(data_df['date'] == data_df['date'].max()) & (data_df['signal'] == 'buy'),['ticker','signal','signal_prob']].sort_values(['signal','signal_prob'],ascending=[True,False])

BUY COUNT: 4


Unnamed: 0,ticker,signal,signal_prob
274025,WG,buy,0.712181
129440,IMB,buy,0.589902
234467,SOPH,buy,0.485379
63084,CWK,buy,0.461984


In [14]:
#Show current sell ratings
print('SELL COUNT: {:,}'.format(data_df.loc[(data_df['date'] == data_df['date'].max()) & (data_df['signal'] == 'sell')].shape[0]))
data_df.loc[(data_df['date'] == data_df['date'].max()) & (data_df['signal'] == 'sell'),['ticker','signal','signal_prob']].sort_values(['signal','signal_prob'],ascending=[True,False])

SELL COUNT: 13


Unnamed: 0,ticker,signal,signal_prob
69375,DNLM,sell,0.710268
123216,HWDN,sell,0.678979
61196,CSP,sell,0.60154
66020,DIGS,sell,0.600643
60440,CRH,sell,0.564274
141664,JLG,sell,0.55405
72853,DPLM,sell,0.548081
186143,PNN,sell,0.531606
94975,GAW,sell,0.52741
5657,AGR,sell,0.523491


In [15]:
#Show current hold ratings
print('HOLD COUNT: {:,}'.format(data_df.loc[(data_df['date'] == data_df['date'].max()) & (data_df['signal'] == 'hold')].shape[0]))
data_df.loc[(data_df['date'] == data_df['date'].max()) & (data_df['signal'] == 'hold'),['ticker','signal','signal_prob']].sort_values(['signal','signal_prob'],ascending=[True,False])

HOLD COUNT: 316


Unnamed: 0,ticker,signal,signal_prob
201637,RHIM,hold,0.872038
253849,TEM,hold,0.847483
212664,RTO,hold,0.844428
187129,PPH,hold,0.827110
67151,DJAN,hold,0.827093
203475,RMG,hold,0.822794
26094,BGSC,hold,0.819823
179162,PFG,hold,0.818469
191763,PSON,hold,0.817279
166047,NESF,hold,0.817016


# Combine with price data and create ledger

In [16]:
#Import and combine prices files
df_prices = pd.read_hdf(path +r'\all_hist_prices_w.h5')

In [17]:
#Sort by ticker and date then add the open_shift_neg1 field
#These allow the buying and selling to be done at a realistic price
df_prices.sort_values(['ticker','date'],ascending=[True,True],inplace=True)
df_prices['open_shift_neg1'] = df_prices['open'].shift(-1)
df_prices['date'] = df_prices['date'].astype('datetime64')
print('SHAPE: {}'.format(df_prices.shape))
print(df_prices.columns)
df_prices.iloc[90:95]

SHAPE: (279267, 14)
Index(['ticker', 'date', 'high', 'low', 'volume', 'open', 'close', 'change',
       'ema12', 'ema26', 'macd_line', 'signal_line', 'macd',
       'open_shift_neg1'],
      dtype='object')


Unnamed: 0,ticker,date,high,low,volume,open,close,change,ema12,ema26,macd_line,signal_line,macd,open_shift_neg1
90,3IN,2009-09-21,142.85,137.15,7932371.0,139.26,142.57,3.31,136.97158,134.290336,2.681244,1.988463,0.692781,140.39
91,3IN,2009-09-28,145.67,139.62,5390930.0,140.39,144.96,4.57,138.200568,135.080682,3.119886,2.214748,0.905138,144.26
92,3IN,2009-10-05,146.09,142.99,3520386.0,144.26,144.68,0.42,139.197403,135.791742,3.405661,2.45293,0.952731,143.84
93,3IN,2009-10-12,146.37,142.99,2676531.0,143.84,144.82,0.98,140.062418,136.460502,3.601916,2.682727,0.919189,146.37
94,3IN,2009-10-19,147.07,142.29,3350132.0,146.37,143.56,-2.81,140.600508,136.986391,3.614117,2.869005,0.745112,144.26


In [18]:
#Join on the buy and sell signals
df_prices = pd.merge(df_prices,data_df[['ticker','date','signal','signal_prob']],left_on=['ticker','date'],right_on=['ticker','date'],how='inner')
print('SHAPE: {}'.format(df_prices.shape))
print(df_prices.columns)
print(df_prices.signal.value_counts())
df_prices.head()

SHAPE: (285051, 16)
Index(['ticker', 'date', 'high', 'low', 'volume', 'open', 'close', 'change',
       'ema12', 'ema26', 'macd_line', 'signal_line', 'macd', 'open_shift_neg1',
       'signal', 'signal_prob'],
      dtype='object')
hold    199364
sell     47220
buy      38467
Name: signal, dtype: int64


Unnamed: 0,ticker,date,high,low,volume,open,close,change,ema12,ema26,macd_line,signal_line,macd,open_shift_neg1,signal,signal_prob
0,3IN,2007-12-31,149.89,147.07,1373801.0,149.18,147.43,-1.75,,,,,,147.07,hold,0.999647
1,3IN,2008-01-07,149.54,147.07,2345191.0,147.07,148.83,1.76,,,,,,148.48,hold,0.708142
2,3IN,2008-01-14,150.59,147.43,2150049.0,148.48,149.18,0.7,,,,,,147.78,hold,0.789326
3,3IN,2008-01-21,154.82,145.32,3070968.0,147.78,152.0,4.22,,,,,,149.89,hold,0.8279
4,3IN,2008-01-28,154.82,148.83,2510972.0,149.89,152.0,2.11,,,,,,154.46,hold,0.784444


In [19]:
#Limit to a test period
df_prices = df_prices[df_prices['date'] >= '2014-01-01']
print('SHAPE: {}'.format(df_prices.shape))

SHAPE: (93366, 16)


In [20]:
#Create a dictionary of max character lengths of fields for use later in h5 file appending
def get_col_lens(_df_in):
    _col_lens = {}
    for c in _df_in:
        _tmp_s = pd.Series([len(str(x)) for x in _df_in[c]])
        _col_lens[c] = _tmp_s.max()
    return _col_lens
col_lens = get_col_lens(df_prices)
col_lens

{'ticker': 4,
 'date': 19,
 'high': 8,
 'low': 8,
 'volume': 12,
 'open': 7,
 'close': 7,
 'change': 21,
 'ema12': 18,
 'ema26': 18,
 'macd_line': 23,
 'signal_line': 23,
 'macd': 23,
 'open_shift_neg1': 8,
 'signal': 4,
 'signal_prob': 19}

In [21]:
#Write df_prices to a .h5 file
hf_store_name = path + r'\historic_lgb_bsh_signals_TMP.h5'
hf = pd.HDFStore(hf_store_name)
group_name = r'bsh_signals'
df_prices.to_hdf(hf_store_name,key=group_name,append=True,min_itemsize=col_lens)
hf.close()

In [22]:
#close any open h5 files
tables.file._open_files.close_all()

In [23]:
#Delete the old h5 file and rename the TMP
try:
    os.remove(path + r'\historic_lgb_bsh_signals.h5')
    print('\nSUCCESSFULLY REMOVED {}'.format(path + r'\historic_lgb_bsh_signals.h5'))
except Exception as e:
    print('\nERROR - REMOVING:{}'.format(e))
try:
    os.rename(path + r'\historic_lgb_bsh_signals_TMP.h5',path + r'\historic_lgb_bsh_signals.h5')
    print('\nSUCCESSFULLY RENAMED {} TO {}'.format(path + r'\historic_lgb_bsh_signals_TMP.h5',path + r'\historic_lgb_bsh_signals.h5'))
except Exception as e:
    print('\nERROR - RENAMING:{}'.format(e))


SUCCESSFULLY REMOVED C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\historic_lgb_bsh_signals.h5

SUCCESSFULLY RENAMED C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\historic_lgb_bsh_signals_TMP.h5 TO C:\Users\Robert\Documents\python_scripts\stock_trading_ml_modelling\historical_prices\historic_lgb_bsh_signals.h5


# Getting the status of a specific ticker

In [24]:
def get_signal():
    tick = input('\nEnter the ticker: ').upper()
    if tick in data_df.ticker.unique():
        print('{} CURRENT STATUS -> {} - {}'.format(tick.upper(),data_df.loc[(data_df['date'] == data_df['date'].max()) & (data_df['ticker'] == tick.upper())].signal.values[0],data_df['date'].max()))
        display(data_df.loc[data_df['ticker'] == tick.upper(),['ticker','date','signal']].sort_values(['date'],ascending=False))
        return None
    elif tick == 'EXIT':
        return None
    else:
        print('THERE IS NO SIGNAL FOR {}'.format(tick.upper()))
        get_signal()
get_signal()


Enter the ticker: sbry
SBRY CURRENT STATUS -> hold - 2019-12-02 00:00:00


Unnamed: 0,ticker,date,signal
214505,SBRY,2019-12-02,hold
214504,SBRY,2019-11-25,hold
214503,SBRY,2019-11-18,hold
214502,SBRY,2019-11-11,hold
214501,SBRY,2019-11-04,hold
214500,SBRY,2019-10-28,hold
214499,SBRY,2019-10-21,hold
214498,SBRY,2019-10-14,hold
214497,SBRY,2019-10-07,hold
214496,SBRY,2019-09-30,hold
