In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
from dateutil.parser import parse
import os
from tqdm import tqdm
import traceback

In [None]:
def loadMids(filePath):
    dfQuotes = pd.read_csv(filePath)#.unstack('id')
    dfQuotes['timestamp'] = dfQuotes['timestamp'].map(parse)
    dfQuotes = dfQuotes.set_index(['id','timestamp'])
    dfQuotes['mid'] = (dfQuotes['bid'] + dfQuotes['ask']) / 2
    dfQuotes['spread'] = dfQuotes['ask'] - dfQuotes['bid']
    mids = dfQuotes['mid'].unstack('id').sort_index()
    return mids

def loadMidsFromHistoricalPrices(filePath):
    dfQuotes = pd.read_csv(filePath)#.unstack('id')
    dfQuotes['timestamp'] = dfQuotes['timestamp'].map(parse)
    dfQuotes = dfQuotes.set_index(['id','timestamp'])
    dfQuotes['mid'] = dfQuotes['traded']
    mids = dfQuotes['mid'].unstack('id').sort_index()
    mids = mids.fillna(method='ffill')
    return mids

def convertToPickle(loadFunc, baseDir, outputDir):
    pbar = tqdm(os.listdir(baseDir))
    for file in pbar:
        try:
            filePath = baseDir + '\\' + file
            mids = loadFunc(filePath)
            outputFile = outputDir + '\\' + file[:-4] + '.pickle'
            mids.to_pickle(outputFile)
        except:
            pass
        

In [None]:
outputDir=r'D:\data\BFTrader\historicalhorsepricespickle'
baseDir = r'D:\Data\BFTrader\historicalhorseprices'
convertToPickle(loadMidsFromHistoricalPrices, baseDir, outputDir)

In [None]:
outputDir=r'D:\Data\BFTrader\horsepricemidspickle'
baseDir = r'D:\Data\BFTrader\horsepricesCsv'
convertToPickle(loadMids, baseDir, outputDir)

In [None]:
startTimeDelta = '60m'
endTimeDelta = '5m'
sourceDir=r'D:\Data\BFTrader\historicalhorsepricespickle'
pnls = []
pbar = tqdm(os.listdir(sourceDir))
count = 0
total_pnl = 0
errors = []
for file in pbar:
    if file[-6:] == 'pickle':
        filePath = outputDir + '\\' + file
        try:
            mids = pd.read_pickle(filePath).sort_index()
            endDate = mids.last_valid_index()
            startDate = endDate - pd.Timedelta(startTimeDelta)
            mids = mids[(mids.index > startDate) & (mids.index < endDate - pd.Timedelta(endTimeDelta))]
            #sort runners according to favourites at beginning of timeseries
            runners = mids.columns[mids.loc[mids.first_valid_index()].argsort()]
            runners = runners[mids[runners].loc[mids.first_valid_index()] > 0]
            # back with 1
            fav1=0
            fav2=0
            fav3=0
            margin = 1.0
            #fav1 = mids.loc[mids.first_valid_index(), runners[0]] / mids.loc[mids.last_valid_index(), runners[0]] - 1
            #fav2 = mids.loc[mids.first_valid_index(), runners[1]] / mids.loc[mids.last_valid_index(), runners[1]] - 1
            if len(runners) >= 3:
                fav3 = mids.loc[mids.first_valid_index(), runners[2]] / mids.loc[mids.last_valid_index(), runners[2]] - 1
            # lay with 1 equivalent
            #fav1 = -fav1 * (1/(mids.loc[mids.first_valid_index(), runners[0]]-1))
            #fav2 = -fav2 * (1/(mids.loc[mids.first_valid_index(), runners[1]]-1))
            pnl = (fav1 + fav2 + fav3)/margin
            if pnl > 0:
                pnl = pnl * 0.95
            total_pnl += pnl
            count += 1
            pnls.append((file,pnl))
            pbar.set_description('running avg trade : %f' % (total_pnl / count))
        except:
            errors.append({'file':filePath, 'error': traceback.format_exc()})
print(f'{len(errors)} errors')
median_trade = np.median(np.array(np.array(pnls)[:,1],dtype=float))
print(f'median trade {median_trade}')
print(f'total pnl {total_pnl}')

In [None]:
dfPnl=pd.DataFrame(pnls, columns=['id','pnl'])
dfPnl['pnl'].hist(bins=100, figsize=(20,6))

In [None]:
dfPnl['pnl'].cumsum().plot()