In [None]:
%matplotlib inline
import pandas as pd
import hvplot.pandas
from os import listdir
import os
#from tqdm import tqdm
from tqdm import tqdm_notebook as tqdm
from dask.distributed import Client, LocalCluster

In [None]:
rootDir = 'C:\\Users\\adam\Work\\BFTrader\\data\\historicalhorseprices'
import re

def loadHorsePricesDataSet(file):
    df = pd.read_csv(file)
    id = re.search('(.*)\.csv', os.path.basename(file), re.IGNORECASE).group(1)
    df['timestamp'] = df['timestamp'].map(pd.Timestamp)
    df = df.set_index(['id','timestamp']).unstack('id')
    df.columns.set_names('field',level=0, inplace=True)
    if len(df) != 0:
        df = df[('traded',)]
        df = df.fillna(method='pad')
        return (id, df)

def loadHorsePricesDataSets(rootDir):
    files = listdir(rootDir)
    results = dict()
    for file in tqdm(files):
        ds = loadHorsePricesDataSet(rootDir + '\\' + file)
        if ds is not None:
            id, df = ds
            results[id] = df
    return results

In [None]:
cluster = LocalCluster(n_workers=6,threads_per_worker=1)
client = Client(cluster)
client

In [None]:
futures = client.map(loadHorsePricesDataSet, [rootDir + '\\' + file for file in listdir(rootDir)])
horsePrices = client.gather(futures)
client.close()
cluster.close()

In [None]:
def getFavourite(df,nthfavourite):
    ranked = df.head(1).rank(axis=1).values[0]
    for i in range(len(ranked)):
        if ranked[i] == nthfavourite * 1.0:
            return df.columns[i]
        
def getFaviouriteReturns(horsePrices,nthfavourites,startMinutes,endMinutes):
    files = listdir(rootDir)
    results = []
    for ds in tqdm(horsePrices):
        if ds is not None:
            id, df = ds
            nrunners = len(df.columns)
            lastTimestamp = df.index.values[-1]
            startTimestamp = lastTimestamp - pd.Timedelta(minutes=startMinutes)
            endTimestamp = lastTimestamp - pd.Timedelta(minutes=endMinutes)
            df = df.loc[(df.index >= startTimestamp) & (df.index <= endTimestamp)]
            if len(df) != 0:
                for nthfavourite in nthfavourites:
                    if nthfavourite + 3 < nrunners:
                        fav = getFavourite(df, nthfavourite)
                        if fav is not None:
                            startPrice = df[fav].iloc[0]
                            endPrice = df[fav].iloc[-1]
                            ret = startPrice/endPrice-1
                            results.append({'fav':nthfavourite,'date': df.index[0],'duration':df.index[-1]-df.index[0],'runner': fav, 'nrunners':nrunners, 'return': ret, 'startPrice': startPrice, 'endPrice': endPrice})
    return results

returns = getFaviouriteReturns(horsePrices,[1,2,3,4],60,5)

In [None]:
dfreturns = pd.DataFrame(returns).set_index(['fav','date'])
dfreturns = dfreturns.loc[(dfreturns['duration'] > pd.Timedelta(minutes=50))]
dfreturns['retscaledlay'] = (-dfreturns['return'] * 0.02 + 1)
dfreturns['retscaledback'] = (dfreturns['return'] * 0.02 + 1)
dfreturns['retscaledlay'] = dfreturns['retscaledlay'].map(lambda x: x if x < 0 else (x-1) * 0.95 +1)
dfreturns['retscaledback'] = dfreturns['retscaledback'].map(lambda x: x if x < 0 else (x-1) * 0.95 +1)
retmeans = {'fav4':(dfreturns.loc[4]['retscaledback']-1).mean(),'fav3': (dfreturns.loc[3]['retscaledback']-1).mean(), 'fav2': (dfreturns.loc[2]['retscaledback']-1).mean(), 'fav1':(dfreturns.loc[1]['retscaledlay']-1).mean()}
print(retmeans)
dfreturns.loc[4]['retscaledback'].cumprod().hvplot() * dfreturns.loc[3]['retscaledback'].cumprod().hvplot() *  dfreturns.loc[2]['retscaledback'].cumprod().hvplot() * dfreturns.loc[1]['retscaledlay'].cumprod().hvplot()

In [None]:
combined = pd.concat([dfreturns.loc[2],dfreturns.loc[3],dfreturns.loc[4]])
combined['retscaledback'].sort_index().cumprod().hvplot()

In [None]:
dfreturns.groupby(by=['fav','nrunners']).mean().loc[1]

In [None]:
dfreturns.loc[dfreturns['nrunners'] == 5].loc[1].sort_values(by='return')

In [None]:
horsePrices[3][1].tail()

In [None]:
horsePrices[3][1].corr()

In [None]:
horsePrices[3][1].diff().corr()