# Trialing the logistic regression model,
The code will run the logistic regression model on the FTSE 350 stock data and see what the profit would have been from investing with the below criteria:
- Starting with \u00c2\u00a310,000
- Never investing more than 10% of the value into a single stock
- Tracking balance so once the account is empty no more can be invested until shares are liqidated
- Shares are bought at the open of the day following the first buy signal, if shares are not already held
- Shares are sold at the open of the day following the first close signal after a hold period, if shares are held

Trading variables:
- Trades cost \u00c2\u00a32.50 to execute
- Spread is 1%

In [1]:
#Import models
import numpy as np
import pandas as pd
import math
from sklearn.linear_model import LogisticRegression as lr
from sklearn.externals import joblib as jl

In [2]:
#Import and combine prices files
path = r'C:\\Users\\Robert\\Documents\\python_scripts\\stock_trading_ml_modelling\\historical_prices'
df_ft = pd.read_hdf(path +'\\all_hist_prices_w_ft_eng2.h5')
print('SHAPE: {}'.format(df_ft.shape))
print(df_ft.columns)
df_ft.head()

SHAPE: (255320, 55)
Index(['Ticker', 'Date', 'Open', 'Close', 'High', 'Low', 'Volume',
       'change_price', 'per_change_price', 'close_shift1',
       'change_close_shift1', 'vol_shift1', 'change_vol_shift1',
       'EMA26_shift1', 'change_EMA26_shift1', 'prev_max_close',
       'prev_min_close', 'prev_max_EMA26', 'prev_min_EMA26',
       'prev_max_close_date_change', 'prev_min_close_date_change',
       'prev_max_MACD_date_change', 'prev_min_MACD_date_change',
       'prev_max_EMA26_date_change', 'prev_min_EMA26_date_change',
       'prev_max_signal_date_change', 'prev_min_signal_date_change',
       'close_13_norm', 'close_26_norm', 'close_52_norm', 'MACD_pos_val',
       'MACD_neg_val', 'signal_pos_val', 'signal_neg_val',
       'change_price_pos_val', 'change_price_neg_val',
       'per_change_price_pos_val', 'per_change_price_neg_val',
       'max_change_close_pos_val', 'max_change_close_neg_val',
       'min_change_close_pos_val', 'min_change_close_neg_val',
       'max_change_

Unnamed: 0,Ticker,Date,Open,Close,High,Low,Volume,change_price,per_change_price,close_shift1,...,min_change_MACD_neg_val,max_change_EMA26_pos_val,max_change_EMA26_neg_val,min_change_EMA26_pos_val,min_change_EMA26_neg_val,max_change_signal_pos_val,max_change_signal_neg_val,min_change_signal_pos_val,min_change_signal_neg_val,signal
0,III,2007-12-31,,,,,,,,,...,,,,,,,,,,hold
1,III,2008-01-07,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,...,,,,,,,,,,hold
2,III,2008-01-14,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,,,,,,,,,,hold
3,III,2008-01-21,0.0,0.257812,0.333333,0.0,0.77644,1.0,1.0,0.0,...,,,,,,,,,,hold
4,III,2008-01-28,0.192308,0.9375,0.402299,0.478632,0.440501,1.0,1.0,0.257812,...,,,,,,,,,,hold


In [3]:
#Import the features
file_object = open(path+r'\\log_reg_model_feature_list.txt','r')
ft_li = file_object.read().split(',')
file_object.close()
ft_li

['close_52_norm',
 'close_13_norm',
 'min_change_signal_neg_val',
 'close_26_norm',
 'min_change_close_pos_val',
 'change_EMA26_shift1',
 'max_change_close_neg_val',
 'min_change_EMA26_neg_val',
 'Close',
 'MACD_neg_val',
 'Low',
 'signal_neg_val',
 'min_change_close_neg_val',
 'per_change_price_neg_val',
 'Open',
 'close_shift1',
 'min_change_MACD_neg_val',
 'max_change_MACD_neg_val',
 'High',
 'change_price_neg_val',
 'per_change_price',
 'MACD_pos_val',
 'change_close_shift1',
 'max_change_EMA26_neg_val',
 'prev_min_signal_date_change',
 'signal_pos_val',
 'change_price',
 'prev_min_EMA26_date_change',
 'EMA26_shift1',
 'max_change_EMA26_pos_val',
 'prev_min_close',
 'prev_max_close',
 'max_change_close_pos_val',
 'prev_max_EMA26_date_change',
 'prev_min_MACD_date_change',
 'min_change_MACD_pos_val',
 'min_change_EMA26_pos_val',
 'prev_max_signal_date_change',
 'change_price_pos_val',
 'max_change_signal_pos_val',
 'min_change_signal_pos_val',
 'Volume',
 'vol_shift1',
 'max_change_

In [4]:
#Import to lr model
lr_mod = jl.load(path+r'\\log_reg_model.joblib')
lr_mod

{'buy': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False),
 'hold': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False),
 'sell': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False)}

In [5]:
lr_mod_df = df_ft.replace([np.inf,-np.inf],np.nan).dropna(axis=0)
lr_mod_df.head()

Unnamed: 0,Ticker,Date,Open,Close,High,Low,Volume,change_price,per_change_price,close_shift1,...,min_change_MACD_neg_val,max_change_EMA26_pos_val,max_change_EMA26_neg_val,min_change_EMA26_pos_val,min_change_EMA26_neg_val,max_change_signal_pos_val,max_change_signal_neg_val,min_change_signal_pos_val,min_change_signal_neg_val,signal
137,III,2010-08-16,0.099655,0.106687,0.090225,0.10969,0.278294,0.65243,0.516895,0.106559,...,0.465977,0.0,0.004028,0.0,0.017067,0.0,0.003004,0.0,1.0,hold
138,III,2010-08-23,0.099778,0.102986,0.08476,0.09715,0.184318,0.643594,0.501823,0.106687,...,0.0,0.0,0.006223,0.0,0.036954,0.0,0.006125,0.0,1.0,buy
139,III,2010-08-30,0.091268,0.11243,0.086217,0.101077,0.184233,0.685714,0.57518,0.102986,...,0.0,0.0,0.007211,0.0,0.0,0.0,0.008824,0.0,1.0,hold
140,III,2010-09-06,0.108165,0.124553,0.097146,0.115009,0.168642,0.673343,0.551672,0.11243,...,0.0,0.0,0.006784,0.03478,0.0,0.0,0.00998,0.0,1.0,hold
141,III,2010-09-13,0.117785,0.125191,0.105889,0.130462,0.192537,0.651841,0.515942,0.124553,...,0.0,0.0,0.006318,0.072732,0.0,0.0,0.010024,0.0,0.0,hold


In [6]:
#Run the lr model on the data
lr_mod_df = df_ft.replace([np.inf,-np.inf],np.nan).dropna(axis=0).reset_index(drop=True) #Reset index allows joining of signal after calc_probs_df

#Function for creating a dataframe with model probabilities and the most likely outcome
def calc_probs_df(models,unique_classes,df_test,features):
    modelled_probs = pd.DataFrame(columns=unique_classes)
    for cl in unique_classes:
        x_test = df_test[features]
        modelled_probs[cl] = models[cl].predict_proba(x_test)[:,1] 
        #Column index 1 as predict_proba outputs 2 columns,
            #the first is the probability that it is a negative result
            #the second is the probability that it is a positive result
            #that's why we want column index 1, we are looking at the positive result
    #Determine the most probable result
    modelled_probs['signal'] = modelled_probs.idxmax(axis=1)
    modelled_probs['signal_prob'] = modelled_probs[unique_classes].max(axis=1)
#     print(modelled_probs)
    for cl in unique_classes:
        print('{} count -> {}'.format(cl,len(modelled_probs.loc[modelled_probs['signal'] == cl,:])))
    return (modelled_probs['signal'],modelled_probs['signal_prob'])
lr_mod_df['signal'],lr_mod_df['signal_prob'] = calc_probs_df(lr_mod,['buy','sell','hold'],lr_mod_df,ft_li)
lr_mod_df.head()

8372
51444
141046


Unnamed: 0,Ticker,Date,Open,Close,High,Low,Volume,change_price,per_change_price,close_shift1,...,max_change_EMA26_pos_val,max_change_EMA26_neg_val,min_change_EMA26_pos_val,min_change_EMA26_neg_val,max_change_signal_pos_val,max_change_signal_neg_val,min_change_signal_pos_val,min_change_signal_neg_val,signal,signal_prob
0,III,2010-08-16,0.099655,0.106687,0.090225,0.10969,0.278294,0.65243,0.516895,0.106559,...,0.0,0.004028,0.0,0.017067,0.0,0.003004,0.0,1.0,sell,0.45899
1,III,2010-08-23,0.099778,0.102986,0.08476,0.09715,0.184318,0.643594,0.501823,0.106687,...,0.0,0.006223,0.0,0.036954,0.0,0.006125,0.0,1.0,hold,0.464104
2,III,2010-08-30,0.091268,0.11243,0.086217,0.101077,0.184233,0.685714,0.57518,0.102986,...,0.0,0.007211,0.0,0.0,0.0,0.008824,0.0,1.0,hold,0.547734
3,III,2010-09-06,0.108165,0.124553,0.097146,0.115009,0.168642,0.673343,0.551672,0.11243,...,0.0,0.006784,0.03478,0.0,0.0,0.00998,0.0,1.0,hold,0.639875
4,III,2010-09-13,0.117785,0.125191,0.105889,0.130462,0.192537,0.651841,0.515942,0.124553,...,0.0,0.006318,0.072732,0.0,0.0,0.010024,0.0,0.0,hold,0.645168


In [14]:
#Show current BHS ratings
lr_mod_df.loc[(lr_mod_df['Date'] == lr_mod_df['Date'].max()) & (lr_mod_df['signal'] == 'buy'),['Ticker','signal','signal_prob']].sort_values(['signal','signal_prob'],ascending=[True,False])

Unnamed: 0,Ticker,signal,signal_prob


# Combine with price data and create ledger

In [7]:
#Import and combine prices files
df_prices = pd.read_hdf(path +'\\all_hist_prices_w.h5')

In [8]:
#Sort by ticker and date then add the open_shift_neg1 field
#These allow the buying and selling to be done at a realistic price
df_prices.sort_values(['Ticker','Date'],ascending=[True,True],inplace=True)
df_prices['open_shift_neg1'] = df_prices['Open'].shift(-1)
df_prices['Date'] = df_prices['Date'].astype('datetime64')
print('SHAPE: {}'.format(df_prices.shape))
print(df_prices.columns)
df_prices.head()

SHAPE: (256364, 16)
Index(['Unnamed: 0', 'Ticker', 'Date', 'High', 'Low', 'Volume', 'Open',
       'Close', 'Change', 'EMA12', 'EMA26', 'MACD_line', 'Signal', 'MACD',
       'Index', 'open_shift_neg1'],
      dtype='object')


Unnamed: 0.1,Unnamed: 0,Ticker,Date,High,Low,Volume,Open,Close,Change,EMA12,EMA26,MACD_line,Signal,MACD,Index,open_shift_neg1
0,0,3IN,2007-12-31,149.89,147.07,1373801.0,149.18,147.43,-1.75,,,,,,0,147.07
1,1,3IN,2008-01-07,149.54,147.07,2345191.0,147.07,148.83,1.76,,,,,,1,148.48
2,2,3IN,2008-01-14,150.59,147.43,2150049.0,148.48,149.18,0.7,,,,,,2,147.78
3,3,3IN,2008-01-21,154.82,145.32,3070968.0,147.78,152.0,4.22,,,,,,3,149.89
4,4,3IN,2008-01-28,154.82,148.83,2510972.0,149.89,152.0,2.11,,,,,,4,154.46


In [9]:
#Join on the buy and sell signals
df_prices = pd.merge(df_prices[['Ticker','Date','Open','Close','open_shift_neg1']],lr_mod_df[['Ticker','Date','signal','signal_prob']],left_on=['Ticker','Date'],right_on=['Ticker','Date'],how='inner')
print('SHAPE: {}'.format(df_prices.shape))
print(df_prices.columns)
df_prices.head()

SHAPE: (200862, 7)
Index(['Ticker', 'Date', 'Open', 'Close', 'open_shift_neg1', 'signal',
       'signal_prob'],
      dtype='object')


Unnamed: 0,Ticker,Date,Open,Close,open_shift_neg1,signal,signal_prob
0,3IN,2011-04-18,163.4,162.56,162.13,hold,0.563628
1,3IN,2011-04-25,162.13,162.98,162.27,hold,0.663394
2,3IN,2011-05-02,162.27,166.07,165.51,hold,0.740945
3,3IN,2011-05-09,165.51,170.72,171.0,hold,0.888239
4,3IN,2011-05-16,171.0,169.59,169.59,hold,0.898204


In [10]:
#Create a class for maintaining the value of the fund
class new_fund:
    def __init__(self,init_val):
        self.st_val = init_val
        self.available = init_val
        self.invested_value = 0
        self.codb = 0
        self.ledger = []
        self.cur_holdings = {}
        print('NEW FUND')
        print('st_val:{}'.format(self.st_val))
        print('available:{}'.format(self.available))
    @property
    def fund_value(self):
        return self.invested_value + self.available
    #Create a function to buy shares
    def buy(self,ticker:str,trade_date:int,price:float,spread:float,value:float,trade_cost:float,signal_prob:float,val_inc_tc:bool = True):
        #Check if already bought
        if ticker in self.cur_holdings:
            return
        #Error check
        if spread > 1 or spread < 0:
            raise ValueError('spread should be between 0 and 1, the value expressed was -> {}'.format(spread))
        if price < 0:
            raise ValueError('price cannot be a negative, the value expressed was -> {}'.format(price))
        #Calculate the trade value
        if val_inc_tc == True:
            trade_funds = value - trade_cost
        else:
            trade_funds = value
        #Calculate the ask and bid price of each share
        a_price = round(price * (1+spread),2)
        b_price = round(price * (1-spread),2)
        #Calculate the number of whole shares which can be purchased
        share_vol = int(trade_funds/a_price)
        trade_value = round(share_vol * a_price,2)
        #Calc the total_spend
        spread_cost = round(share_vol * price * spread,2)
        total_spend = (share_vol*a_price) + trade_cost
        #Check the fund has the money to cover this trade
        if total_spend > self.available:
            raise ValueError('you do not have the funds to make this trade -> this transaction will be cancelled')
        #Create a record for the ledger
        ledge_rec = {
            'trade_type':'buy'
            ,'signal_prob':signal_prob
            ,'ticker':ticker
            ,'trade_date':trade_date
            ,'spread':spread
            ,'price':price
            ,'ask_price':a_price
            ,'bid_price':b_price
            ,'share_vol':share_vol
            ,'trade_value':trade_value
            ,'trade_cost':trade_cost
            ,'spread_cost':spread_cost
            ,'holding_value':share_vol*price
            ,'total_spend':total_spend
            ,'invested_pre_trade':self.invested_value
            ,'invested_post_trade':self.invested_value + (share_vol*price)
            ,'available_pre_trade':self.available
            ,'available_post_trade':self.available - total_spend
        }
        self.ledger.append(ledge_rec)
        #Update the object
        self.available += -round(total_spend,2)
        self.codb += round(trade_cost + spread_cost,2)
        self.invested_value += round(share_vol*price,2)
        #Add to cur_holdings
        holding_rec = {
           'share_vol':share_vol
            ,'cur_price':price
            ,'value':round(share_vol*price,2)
        }
        #Check if key already in dict
        if ticker in self.cur_holdings:
            self.cur_holdings[ticker]['share_vol'] += share_vol
            self.cur_holdings[ticker]['cur_price'] = round(price,2)
            self.cur_holdings[ticker]['value'] = round(self.cur_holdings[ticker]['share_vol']*price,2)
        else:
            self.cur_holdings[ticker] = holding_rec
        
    #Create a function to sell shares
    def sell(self,ticker:str,trade_date:int,price:float,spread:float,trade_cost:float,signal_prob:float):
        #Check if already sold
        if ticker not in self.cur_holdings:
            return
        #Round down the share_vol
        share_vol = int(self.cur_holdings[ticker]['share_vol'])
        #Error check
        if spread > 1 or spread < 0:
            raise ValueError('spread should be between 0 and 1, the value expressed was -> {}'.format(spread))
        if price < 0:
            raise ValueError('price cannot be a negative, the value expressed was -> {}'.format(price))
        #Calculate the ask and bid price of each share
        a_price = round(price * (1+spread),2)
        b_price = round(price * (1-spread),2)
        #Calculate the trade value
        trade_value = round(share_vol*b_price,2)
        value = round(trade_value - trade_cost,2)
        spread_cost = round(share_vol * price * spread)
        #Calc the total_spend
        total_spend = round(trade_cost+spread_cost-(share_vol*price),2)
        #Check the fund has the money to cover this trade
        if total_spend > self.available:
            raise ValueError('you do not have the funds to make this trade -> this transaction will be cancelled')
        #Create a record for the ledger
        ledge_rec = {
           'trade_type':'sell'
            ,'signal_prob':signal_prob
            ,'ticker':ticker
            ,'trade_date':trade_date
            ,'spread':spread
            ,'price':price
            ,'ask_price':a_price
            ,'bid_price':b_price
            ,'share_vol':share_vol
            ,'trade_value':trade_value
            ,'trade_cost':trade_cost
            ,'spread_cost':spread_cost
            ,'holding_value':share_vol*price
            ,'total_spend':total_spend
            ,'invested_pre_trade':self.invested_value
            ,'invested_post_trade':self.invested_value - (share_vol*price)
            ,'available_pre_trade':self.available
            ,'available_post_trade':self.available - total_spend
        }
        self.ledger.append(ledge_rec)
        #Update the object
        self.available += -round(total_spend,2)
        self.codb += round(trade_cost + spread_cost,2)
        self.invested_value += -round((share_vol*price),2)
        #Remove from cur_holdings
        #Check if key already in dict
        if ticker in self.cur_holdings:
            if self.cur_holdings[ticker]['share_vol'] > share_vol:
                self.cur_holdings[ticker]['share_vol'] += -share_vol
                self.cur_holdings[ticker]['cur_price'] = price
                self.cur_holdings[ticker]['value'] = round(self.cur_holdings[ticker]['share_vol']*price,2)
            elif self.cur_holdings[ticker]['share_vol'] == share_vol:
                del self.cur_holdings[ticker] #Delete from the dictionary
            else:
                raise ValueError('you do not have enough share to make this trade. You want to sell {} of {} however you only have {}'.format(share_vol,ticker,self.cur_holdings[ticker]['share_vol']))
        else:
            return
        
    #Create a function to update value after a price change
    def price_change(self,ticker:str,price:float):
        #Check if key already in dict
        if ticker in self.cur_holdings:
            #Update the object
            self.invested_value += round((self.cur_holdings[ticker]['share_vol']*price) - self.cur_holdings[ticker]['value'],2)
            self.cur_holdings[ticker]['cur_price'] = price
            self.cur_holdings[ticker]['value'] = round(self.cur_holdings[ticker]['share_vol']*price,2)
        else:
            return

In [11]:
#Order the data by date (asc) and buy probability (desc)
signal_df = df_prices[['Ticker','Date','Open','Close','open_shift_neg1','signal','signal_prob']].copy()
signal_df.sort_values(['Date','signal_prob'],ascending=[True,False],inplace=True)
signal_df.reset_index(drop=True,inplace=True)
signal_df.head()

Unnamed: 0,Ticker,Date,Open,Close,open_shift_neg1,signal,signal_prob
0,SVS,2000-10-23,90.0,97.75,98.5,sell,0.453756
1,SVS,2000-10-30,98.5,98.75,98.0,hold,0.411386
2,SVS,2000-11-06,98.0,104.75,104.5,hold,0.506952
3,SVS,2000-11-13,104.5,104.0,105.88,hold,0.648606
4,SVS,2000-11-20,105.88,111.25,110.0,hold,0.76195


In [12]:
#Establish trading variables
#All figures in pence
fund_value_st = 1000000 #£10,000
trade_cost = 250
investment_limit_min_val = 100000 #£1,000
investment_limit_max_per = 0.1 #10%
spread = 0.01 #1%

In [13]:
#Run through rows and buy and sell according to signals and holdings
fund = new_fund(fund_value_st)
for index,row in signal_df.iterrows():
    #Follow signal
    if row['signal'] =='buy':
        #Check for funds
        if fund.available < investment_limit_min_val:
            continue
        #Buy shares
        val_to_invest = fund.available*investment_limit_max_per if fund.available*investment_limit_max_per > investment_limit_min_val else investment_limit_min_val
        fund.buy(row['Ticker'],row['Date'],row['open_shift_neg1'],spread,val_to_invest,trade_cost,row['signal_prob'])
    elif row['signal'] =='sell':
        #Sell shares
        fund.sell(row['Ticker'],row['Date'],row['open_shift_neg1'],spread,trade_cost,row['signal_prob'])
    elif row['signal'] == 'hold':
        fund.price_change(row['Ticker'],row['Close'])

NEW FUND
st_val:1000000
available:1000000


In [14]:
#Show summary
print('fund.st_val:£{:,.2f}'.format(fund.st_val/100))
print('fund.available:£{:,.2f}'.format(fund.available/100))
print('fund.codb:£{:,.2f}'.format(fund.codb/100))
print('fund.invested_value:£{:,.2f}'.format(fund.invested_value/100))
ledger_df = pd.DataFrame(fund.ledger,columns=[
            'trade_type'
            ,'signal_prob'
            ,'ticker'
            ,'trade_date'
            ,'spread'
            ,'price'
            ,'ask_price'
            ,'bid_price'
            ,'share_vol'
            ,'trade_value'
            ,'trade_cost'
            ,'spread_cost'
            ,'holding_value'
            ,'total_spend'
            ,'invested_pre_trade'
            ,'invested_post_trade'
            ,'available_pre_trade'
            ,'available_post_trade'])
print('TRADE COUNT:{:,}'.format(len(ledger_df)))
ledger_df.head(50)

fund.st_val:£10,000.00
fund.available:£3,333,746.88
fund.codb:£1,210,196.61
fund.invested_value:£284,526.89
TRADE COUNT:6,315


Unnamed: 0,trade_type,signal_prob,ticker,trade_date,spread,price,ask_price,bid_price,share_vol,trade_value,trade_cost,spread_cost,holding_value,total_spend,invested_pre_trade,invested_post_trade,available_pre_trade,available_post_trade
0,buy,0.543768,MSLH,2001-03-05,0.01,234.33,236.67,231.99,421,99638.07,250,986.53,98652.93,99888.07,0.0,98652.93,1000000.0,900111.93
1,sell,0.723422,MSLH,2001-03-12,0.01,233.81,236.15,231.47,421,97448.87,250,984.0,98434.01,-97200.01,98652.93,218.92,900111.93,997311.94
2,buy,0.547441,SVS,2001-04-02,0.01,117.5,118.67,116.33,840,99682.8,250,987.0,98700.0,99932.8,218.92,98918.92,997311.94,897379.14
3,buy,0.804765,SDR,2001-04-09,0.01,975.0,984.75,965.25,101,99459.75,250,984.75,98475.0,99709.75,98918.92,197393.92,897379.14,797669.39
4,buy,0.843193,FOUR,2001-04-23,0.01,268.75,271.44,266.06,367,99618.48,250,986.31,98631.25,99868.48,197393.92,296025.17,797669.39,697800.91
5,sell,0.620382,FOUR,2001-05-07,0.01,320.0,323.2,316.8,367,116265.6,250,1174.0,117440.0,-116016.0,304845.17,187405.17,697800.91,813816.91
6,buy,0.624134,AAL,2001-05-14,0.01,1329.67,1342.97,1316.37,74,99379.78,250,983.96,98395.58,99629.78,193495.17,291890.75,813816.91,714187.13
7,buy,0.608364,RDW,2001-06-04,0.01,168.19,169.87,166.51,587,99713.69,250,987.28,98727.53,99963.69,294620.75,393348.28,714187.13,614223.44
8,buy,0.825935,ELM,2001-06-11,0.01,62.23,62.85,61.61,1587,99742.95,250,987.59,98759.01,99992.95,393348.28,492107.29,614223.44,514230.49
9,buy,0.62824,JDW,2001-06-11,0.01,342.82,346.25,339.39,288,99720.0,250,987.32,98732.16,99970.0,492947.29,591679.45,514230.49,414260.49


In [17]:
#From the ledger create a dataframe of completed trades
completed_trades = {}
#Format
#     ABC:{
#         open_position:True/False #Bool showing if there is currently an open position
#         ,trades:[ #List showing all trades
#             { #Each trade has an object
#                 share_vol:12345 #Volume of shares purchased
#                 ,buy_spend:12345.67 #Total value spent in buying shares including costs
#                 ,sell_spend:12345.67 #Total value spent in selling shares including costs
#                 ,profit_loss:12345.67 #Profit/loss of ths trade
#             }
#         ]
#     }
for index,row in ledger_df.iterrows():
    #Check if there is an open trade for this ticker
    if row['ticker'] not in completed_trades:
        completed_trades[row['ticker']] = {
            'open_position':False
            ,'trades':[]
        }
    #Deal with buying
    if row['trade_type'] == 'buy':
        #Create a trade object and add to the trades list in completed_trades
        completed_trades[row['ticker']]['trades'].append({
            'ticker':row['ticker']
            ,'share_vol':row['share_vol']
            ,'buy_spend':row['total_spend']
            ,'buy_prob':row['signal_prob']
            ,'buy_date':row['trade_date']
            ,'sell_spend':None
            ,'sell_prob':None
            ,'sell_date':None
            ,'periods_held':None
            ,'profit_loss':None
        })
        #Open the trading position
        completed_trades[row['ticker']]['open_position'] = True
    #Dealing with selling
    if row['trade_type'] == 'sell':
        shares_to_sell = row['share_vol']
        #Find open positions and sell until shares al gone
        for trade in completed_trades[row['ticker']]['trades']:
            if trade['share_vol'] == row['share_vol']:
                trade['sell_spend'] = row['total_spend']
                trade['sell_prob'] = row['signal_prob']
                trade['sell_date'] = row['trade_date']
trades_li = []
for tick in completed_trades:
    for trade in completed_trades[tick]['trades']:
        trades_li.append(trade)
trades_df = pd.DataFrame(trades_li,columns=[
            'ticker'
            ,'share_vol'
            ,'buy_spend'
            ,'buy_prob'
            ,'buy_date'
            ,'sell_spend'
            ,'sell_prob'
            ,'sell_date'
            ,'periods_held'
            ,'profit_loss'])
trades_df['profit_loss'] = -trades_df['sell_spend'] - trades_df['buy_spend']
trades_df['periods_held'] = trades_df['sell_date'] - trades_df['buy_date']
trades_df['periods_held'] = [x.days/7 for x in trades_df['periods_held']]
trades_df

Unnamed: 0,ticker,share_vol,buy_spend,buy_prob,buy_date,sell_spend,sell_prob,sell_date,periods_held,profit_loss
0,MSLH,421,99888.07,0.543768,2001-03-05,-97200.01,0.723422,2001-03-12,1.0,-2688.06
1,MSLH,416,99815.44,0.495630,2001-08-06,-97343.52,0.455571,2001-08-13,1.0,-2471.92
2,MSLH,396,99871.72,0.409974,2002-02-04,-112962.88,0.630567,2002-06-03,17.0,13091.16
3,MSLH,337,99907.64,0.661124,2002-06-10,-100946.84,0.452559,2002-06-24,2.0,1039.20
4,MSLH,365,99836.60,0.523738,2003-09-22,-100942.60,0.487771,2003-09-29,1.0,1106.00
5,MSLH,393,99950.17,0.624329,2003-11-17,-88858.79,0.483198,2004-08-09,38.0,-11091.38
6,MSLH,362,99789.14,0.547657,2006-05-15,-101150.28,0.443119,2006-05-22,1.0,1361.14
7,MSLH,357,99860.14,0.557022,2006-06-05,-96208.44,0.383317,2006-06-19,2.0,-3651.70
8,MSLH,352,99968.08,0.660159,2006-06-26,-93689.64,0.524003,2007-11-12,72.0,-6278.44
9,MSLH,352,99732.24,0.615977,2007-08-20,-93689.64,0.524003,2007-11-12,12.0,-6042.60


In [18]:
#Export
trades_df.to_csv(path+r'\\trades_ledger_log_reg_mod.csv')