In [None]:
# Import libraries
import betfairlightweight
from betfairlightweight import filters
from datetime import datetime
from datetime import timedelta
from dateutil import tz
import math
import numpy as np
import pandas as pd
from scipy.stats import zscore
from sklearn.linear_model import LogisticRegression
import fasttrack as ft

In [None]:
from fasttrack import Fasttrack as ft

In [None]:
today = datetime.now()
today = pd.to_datetime(today).floor(freq='D')
today_str = datetime.strftime(today, '%d-%B-%Y')
today_str

In [None]:
seckey = 'security key goes here'
greys = ft.Fasttrack(seckey)

In [None]:
# Find list of greyhound tracks and fasttrack codes
track_codes = greys.listTracks()
track_codes.head()

In [None]:
# Filter to only use Qld
tracks_filter = [track_codes[track_codes['state'] == 'QLD']['track_code']]
tracks_filter

In [None]:
race_details, dog_results = greys.getRaceResults('2015-01-01', today_str)
race_details.head()

In [None]:
dog_results.head()

In [None]:
# Clean data
race_details['Distance'] = race_details['Distance'].apply(lambda x: int(x.replace('m', '')))
race_details = race_details.rename(columns = {'@id': 'FastTrack_RaceId'})
race_details['date_dt'] = pd.to_datetime(race_details['date'], format='%d %b %y')
race_details['trackdist'] = race_details['Track'] + race_details['Distance'].astype(str)

dog_results = dog_results.rename(columns = {'@id': 'FastTrack_DogId', 'RaceId':'FastTrack_RaceId'})
dog_results['StartPrice'] = dog_results['StartPrice'].apply(lambda x: None if x == None else float(x.replace('$', '').replace('F','')))
dog_results = dog_results[~dog_results['Box'].isnull()]
dog_results = dog_results.merge(
    race_details[['FastTrack_RaceId', 'Distance', 'RaceGrade', 'Track', 'date_dt', 'trackdist']],
    how = 'left',
    on = 'FastTrack_RaceId')
dog_results['RunTIme'] = dog_results['RunTime'].astype(float)
dog_results['Prizemoney'] = dog_results['Prizemoney'].astype(float)
dog_results['win'] = dog_results['Place'].apply(lambda x: 1 if x in ['1', '1='] else 0)

print('Number of races in dataset: ' + str(dog_results['FastTrack_RaceId'].nunique()))

In [None]:
# Feature engineering
dog_results = dog_results.sort_values(by=['FastTrack_DogId', 'date_dt'])
dog_results = dog_results.set_index('date_dt')

# Normalise runtimes for each trackdist so we can compare runs across different track distance combinations
# This assumes a dog having a good runtime on one track will have a good runtime on a different track
dog_results['RunTime_norm'] = dog_results.groupby('trackdist')['RunTime'].transform(lambda x: zscore(x, nan_policy='omit'))

# Feature 1 = total prize money won over last 365 days
dog_results['Prizemoney_365D'] = dog_results.groupby('FastTrack_DogId')['Prizemoney'].apply(lambda x: x.rolling('365D').sum().shift(1))
dog_results['Prizemoney_365D'].fillna(0, inplace=True)

# Feature 2 = Number of runs over last 365D
dog_results['runs_365D'] = dog_results.groupby('FastTrack_DogId')['win'].apply(lambda x: x.rolling('365D').count().shift(1))
dog_results['runs_365D'].fillna(0, inplace=True)

# Feature 3 = win % over the last 365D
dog_results['wins_365D'] = dog_results.groupby('FastTrack_DogId')['win'].apply(lambda x: x.rolling('365D').sum().shift(1))
dog_results['wins_365D'].fillna(0, inplace=True)
dog_results['win%_365D'] = dog_results['wins_365D'] / dog_results['runs_365D']

# Feature 4 = Best runtime over the last 365D
dog_results['RunTime_norm_best_365D'] = dog_results.groupby('FastTrack_DogId')['RunTime_norm'].apply(lambda x: x.rolling('365D').min().shift(1))

# Feature 4 = Median runtime over the last 365D
dog_results['RunTime_norm_median_365D'] = dog_results.grouopby('FastTrack_DogId')['RunTime_norm'].apply(lambda x: x.rolling('365D').median().shift(1))

dog_results.head()

In [None]:
# Convert features to Z-scores within each race so they are on a relative basis
dog_results = dog_results.sort_values(by = ['date_dt', 'FastTrack_RaceId'])

for col in ['Prizemoney_365D', 'runs_365D', 'win%_365D', 'RunTime_norm_best_365D', 'RunTime_norm_median_365D']:
    dog_results[col + '_ Z'] = dog_results.groupby('FastTrack_RaceId')[col].transform(lambda x: zscore(x, ddof = 1))

dog_results['runs_365D_Z'].fillna(0, inplace=True)
dog_results['win%_365D_Z'].fillna(0, inplace=True)

In [None]:
# Train the model
dog_results = dog_results.reset_index()
dog_results = dog_results.sort_values(by = ['date_dt', 'FastTrack_RaceId'])

# Only keep data after 2019
model_df = dog_results[dog_results['date_dt'] >= '2019-01-01']
feature_cols = ['Prizemoney_365D_Z', 'runs_365D_Z', 'win%_365D_Z', 'RunTime_norm_best_365D_Z', 'RunTime_norm_median_365D_Z']
model_df = model_df[['date_dt', 'FastTrack_RaceId', 'DogName', 'win', 'StartPrice'] + feature_cols]

# Only train model off races wehere each dog has a value for each feature
races_exclude = model_df[model_df.isnull().any(acis=1)]['FastTrack_RaceId'].drop_duplicates()
model_df = model_df[~model_df['FastTrack_RaceId'].isin(races_exclude)]

# check for nulls
model_df.drop(columns = 'StartPrice').isnull().values.any()

In [None]:
train_data = model_df[model_df['date_dt'] < '2021-01-01'].reset_index(drop=True)
test_data = model_df[model_df['date_dt'] < '2021-01-01'].reset_index(drop=True)

train_x, train_y = train_data[feature_cols], train_data['win']
test_x, test_y = test_data[feature_cols], test_data['win']

logit_model = LogisticRegression()
logit_model.fit(train_x, train_y)

test_data['prob_unscaled'] = logit_model.predict_proba(test_x)[:,1]
test_data.groupby('FastTrack_RaceId')['prob_unscaled'].sum()

In [None]:
# Scale raw model output so they sum to unity
test_data['prob_scaled'] = test_data.groupby('FastTrack_RaceId')['prob_unscaled'].apply(lambda x: x/sum(x))
test_data.groupby('FastTrack_RaceId')['prob_scaled'].sum()

In [None]:
# Create a column for highest model prediction
# And for starting price to compare
test_data['model_win_prediction'] = test_data.groupby('FastTrack_RaceId')['prob_scaled'].apply(lambda x: x == max(x))
test_data['odds_win_prediction'] = test_data.groupby('FastTrack_RaceId')['StartPrice'].apply(lambda x: x == min(x))

print('Model predicts the winner in {:.2%} of races'.format(len(test_data[(test_data['model_win_prediction'] == True) & (test_data['win'] == 1)]) / test_data['FastTrack_RaceId'].nunique()))
print('Starting Price Odds predicts winner in {:.2%} of races'.format(len(test_data[(test_data['odds_win_prediction'] == True) & (test_data['win'] == 1)]) / test_data['FastTrack_RaceId'].nunique()))

In [None]:
# Retrieve todays matches
qld_races_today, qld_dogs_today = greys.getBasicFormat('2021-06-16', tracks_filter)
qld_races_today.head()

In [None]:
# qld tracks running today
qld_tracks_today = list(qld_races_today['Track'].unique())
qld_tracks_today

In [None]:
my_username = "your_username"
my_password = "your_password"
my_app_key = "your_app_key"

trading = betfairlightweight.APIClient(my_username, my_password, app_key=my_app_key)
trading.login_interactive()