# Linking Dependencies

In [1]:
# imports
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
from bayes_opt.bayesian_optimization import BayesianOptimization
from bayes_opt.helpers import UtilityFunction

__all__ = ["BayesianOptimization", "UtilityFunction"]

# Data Input

In [2]:
# read excel file
raceDataFrame = pd.read_excel('data.xlsx', sheetname='Sheet1')
raceData = np.array(raceDataFrame.values)
raceData[np.argsort(raceData[:, -1])]
numberOfRaces = int(raceData[-1][-1])
proportion = 1/3
separationPoint = int(proportion * (numberOfRaces - 1))

# init array containing all races
#(each race is a sequence of horse profile(X) and speed(Y), with speed as the first index)
races = []
for _ in range (0, numberOfRaces):
    races.append([])

# separate data into X and Y, indexed by races (each as an array of horse-in-race entry)
for i in range (0, len(raceData)):
    # group into races
    currentRow = raceData[i]
    raceNumber = int(currentRow[-1])
    rowData = currentRow[0:-1]
    races[raceNumber-1].append(rowData)

# convert races into np.arrays
races = list(map(lambda x: np.array(x), races))
    
# split data into different sets
np.random.shuffle(races)
historicalRaces = races[:separationPoint]
futureRaces = races[separationPoint:]

# all races ready as an array: races (randomised order)
# historical races are used to initialise the GP model within the Bayesian Optimiser
# future races are used to simulate the selection performed by the Bayesian Optimiser

# Use Modified Bayesian Optimisation

In [3]:
parameter_bounds = {'win': (0, 1000),
                    'horse_rating': (0, 300),
                    'horse_weight': (300, 800),
                    'handicapped_weight': (0, 100),
                    'carried_weight': (0, 100),
                    'lane_number': (1, 18),
                    'running_one': (1, 18),
                    'running_two': (1, 18),
                    'placing': (1, 18),
                    'length_behind_winner': (0, 100),
                   }

# create and init a new bayesian optimiser that follows the param limits specified above
bayesianOptimiser = BayesianOptimization(parameter_bounds)

## Define Kernel, Kappa, Acquisition Function, Batch Size, etc.

In [4]:
acquisitionFunctionFlag = 'poi' # poi (probability of improvement) | ucb (upper confidence bound) | ei (expected improvement)
kappa = 2.576
xi = 0.0
theta0 = 0.02
nugget = 0.01

bayesianOptimiser.setup(acquisitionFunctionFlag, kappa, xi)

## Initialise GP Model with Historical Data

In [5]:
# strip historical races
historicalHorses = []

for race in historicalRaces:
    for horse in race:
        historicalHorses.append(horse)
        
bayesianOptimiser.initialize(np.vstack(historicalHorses))

## Iterate for n Rounds (Races Allowed to Observe)

In [6]:
# can only select from n sets of races, each time picking 1 race to watch
# after watching n races, predict for a race and place a bet ... see if successful ...
splitRatio = 0.5
splitPoint = int(splitRatio * (len(futureRaces) - 1))
np.random.shuffle(futureRaces)
trainRaces = races[:splitPoint]
testRaces = np.array(races[splitPoint:])

# group training races into sets/batches
batchSize = 3
split_points = range(batchSize, len(trainRaces), batchSize)
batchedTrainRaces = np.split(trainRaces, split_points)
# print(len(batchedTrainRaces))

# sums =0
# for i in range(len(batchedTrainRaces)) :
#     for ii in range(len(batchedTrainRaces[i])) :
#         sums += len(batchedTrainRaces[i][ii])
# print(sums)


In [8]:
# call minimise to optimise
bayesianOptimiser.minimize(np.array(batchedTrainRaces))

# optimal input to be used to update GP model
print(bayesianOptimiser.res['max'])

[31mBayesian Optimization[0m
[94m----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   carried_weight |   handicapped_weight |   horse_rating |   horse_weight |   lane_number |   length_behind_winner |   placing |   running_one |   running_two |       win | 


ValueError: length of x0 != length of bounds

In [None]:
print(len(batchedTrainRaces))

In [None]:
%debug

> [0;32m/Users/Larry/anaconda3/lib/python3.5/site-packages/scipy/optimize/lbfgsb.py[0m(260)[0;36m_minimize_lbfgsb[0;34m()[0m
[0;32m    258 [0;31m        [0mbounds[0m [0;34m=[0m [0;34m[[0m[0;34m([0m[0;32mNone[0m[0;34m,[0m [0;32mNone[0m[0;34m)[0m[0;34m][0m [0;34m*[0m [0mn[0m[0;34m[0m[0m
[0m[0;32m    259 [0;31m    [0;32mif[0m [0mlen[0m[0;34m([0m[0mbounds[0m[0;34m)[0m [0;34m!=[0m [0mn[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m--> 260 [0;31m        [0;32mraise[0m [0mValueError[0m[0;34m([0m[0;34m'length of x0 != length of bounds'[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    261 [0;31m    [0;31m# unbounded variables must use None, not +-inf, for optimizer to work properly[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    262 [0;31m    [0mbounds[0m [0;34m=[0m [0;34m[[0m[0;34m([0m[0;32mNone[0m [0;32mif[0m [0ml[0m [0;34m==[0m [0;34m-[0m[0mnp[0m[0;34m.[0m[0minf[0m [0;32melse[0m [0ml[0m[0;34m,[0m [0;32mNone[0m 

### Iterate through Race Profiles (of scheduled / upcoming races)

In [None]:
print(bayesianOptimiser.res['max'])

In [None]:
print(bayesianOptimiser.res)

### Calculate Race Learning Index

In [None]:
# using trained model...
gp = bayesianOptimiser.gp

# get fitted model
for race in testRaces: 
    print("RACE: ", race)
    for horse in race:  
        horse_timing, sigma = gp.predict(horse[1:], return_std=True)
        print("HORSE: ", horse[1:])
        print("HORSE TIMING PREDICTED: ", horse_timing)

# predict test set


### Output Race with Max. Index

## Predict a Race and Use Decision Function to Determine Whether to Bet

## Plot Results with Different Params