In [3]:

import os
import requests
import argparse

import numpy as np
import pandas as pd

from tqdm import tqdm
from bs4 import BeautifulSoup
from datetime import datetime

track = 'churchill-downs'
date = '2023-05-03'

# Get the entries
url = f"https://entries.horseracingnation.com/entries-results/{track}/{date}"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')



In [17]:

date = datetime.strftime(datetime.today(),'%Y-%m-%d')

'2023-05-03'

In [8]:

### parsing functions ###
def parse_meta(info, restriction, purse):
    info = info.text.split(',\n')
    info = [x.strip() for x in info]
    race_dist, race_track, race_type = info
    restriction = restriction.text.strip()
    purse = purse.text.replace('Purse: ','').strip()
    return pd.DataFrame([[race_dist, race_track, race_type, restriction, purse]], columns=['Distance','Surface','Type','Restriction','Purse'])


def parse_entries(table):
    
    table_data = []
    rows = table.find_all('tr')
    for row in rows:
        cols = row.find_all('td')
        if len(row['class']) > 0:
            if row['class'][0] == 'scratched':
                running = 0
                program_number = int(cols[0]['data-label'].split()[-1])
            else:
                raise ValueError()
        else:
            running = 1
            program_number = int(cols[0].find('img')['alt'])
        horse = cols[3].find('h4').text.strip()
        sire = cols[3].find('p').text.strip()
        trainer = cols[4].find('p').text.strip()
        jockey = cols[4].find_all('p')[-1].text.strip()
        morning_line_odds = cols[6].find('p').text.strip()
        table_data.append([running, program_number, horse, sire, trainer, jockey, morning_line_odds])
    
    return pd.DataFrame(table_data, columns=['running','horse','horse_name','sire','trainer','jockey','ml_odds'])


In [10]:


entry_tables = soup.find_all('table', {'class': 'table-entries'})
race_info = soup.find_all('div', {'class': 'race-distance'})
race_restrictions = soup.find_all('div', {'class': 'race-restrictions'})
race_purses = soup.find_all('div', {'class': 'race-purse'})

assert(len(entry_tables) == len(race_info))
assert(len(entry_tables) == len(race_restrictions))
assert(len(entry_tables) == len(race_purses))

entries = []
meta = []

for i in tqdm(range(len(entry_tables))):
    race_no = i+1

    entry_table = entry_tables[i]
    info = race_info[i]
    restrictions = race_restrictions[i]
    purse = race_purses[i]

    entry_df = parse_entries(entry_table)
    entry_df['track'] = track
    entry_df['date'] = date
    entry_df['race'] = race_no
    meta_row = parse_meta(info, restrictions, purse)
    meta_row['track'] = track
    meta_row['date'] = date
    meta_row['race'] = race_no
    meta.append(meta_row)
    entries.append(entry_df)
    
entries = pd.concat(entries, axis=0).reset_index(drop=True)
meta = pd.concat(meta, axis=0).reset_index(drop=True)
entries.to_csv(f'../data/upc_entries/{track}.csv', index=False)
meta.to_csv(f'../data/upc_meta/{track}.csv', index=False)

print("Entries: ", entries.head())
print("\nMeta: ", meta.head())


100%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 666.73it/s]

Entries:     running  horse     horse_name             sire                  trainer  \
0        1      1        Current           Curlin  Brittany A. Vanden Berg   
1        1      2  Locally Owned  Distorted Humor           Thomas  Morley   
2        1      3    Code Runner       Honor Code           Caio  Caramori   
3        1      4         Allege         Uncle Mo       Steven M. Asmussen   
4        1      5    Silver Dust            Tapit          Thomas D. Vance   

                 jockey ml_odds            track        date  race  
0  Christopher A. Emigh    10/1  churchill-downs  2023-05-03     1  
1    Julien R. Leparoux     7/5  churchill-downs  2023-05-03     1  
2         Luan  Machado    15/1  churchill-downs  2023-05-03     1  
3     Tyler  Gaffalione     3/1  churchill-downs  2023-05-03     1  
4      Corey J. Lanerie     7/2  churchill-downs  2023-05-03     1  

Meta:    Distance Surface                               Type  \
0   1 1/4M    Dirt  $40,000 Starter Option




In [12]:
entries

Unnamed: 0,running,horse,horse_name,sire,trainer,jockey,ml_odds,track,date,race
0,1,1,Current,Curlin,Brittany A. Vanden Berg,Christopher A. Emigh,10/1,churchill-downs,2023-05-03,1
1,1,2,Locally Owned,Distorted Humor,Thomas Morley,Julien R. Leparoux,7/5,churchill-downs,2023-05-03,1
2,1,3,Code Runner,Honor Code,Caio Caramori,Luan Machado,15/1,churchill-downs,2023-05-03,1
3,1,4,Allege,Uncle Mo,Steven M. Asmussen,Tyler Gaffalione,3/1,churchill-downs,2023-05-03,1
4,1,5,Silver Dust,Tapit,Thomas D. Vance,Corey J. Lanerie,7/2,churchill-downs,2023-05-03,1
...,...,...,...,...,...,...,...,...,...,...
93,1,8,Officer Quigley,Vancouver (AUS),"Albert M. Stall, Jr.",Corey J. Lanerie,15/1,churchill-downs,2023-05-03,10
94,1,9,Air Kenney,Goldencents,Anna M. Meah,Rafael Bejarano,12/1,churchill-downs,2023-05-03,10
95,1,10,Eddie M,Speightstown,Brendan P. Walsh,"Irad Ortiz, Jr.",8/1,churchill-downs,2023-05-03,10
96,1,11,Papa D L,Lord Nelson,Grant T. Forster,Colby J. Hernandez,20/1,churchill-downs,2023-05-03,10


In [None]:

ml_array = np.array([
    [5, 3, 1, 0.8],
    [5, 4, 1, 4.5],
    [5, 2, 1, 3.5],
    [5, 5, 1, 6],
    [5, 1, 1, 15],
    [ 6,  8, 1, 2.5],
    [ 6, 12, 1, 3.5],
    [ 6,  5, 1, 20],
    [ 6,  6, 1, 6],
    [ 6,  9, 1, 6],
    [ 6,  2, 1, 10],
    [ 6,  1, 1, 20],
    [ 6,  4, 1, 15],
    [ 6, 10, 1, 8],
    [ 7,  3, 1, 2.5],
    [ 7,  5, 1, 4],
    [ 7,  2, 1, 6],
    [ 7,  6, 1, 8],
    [ 7, 10, 0, 6],
    [ 7,  8, 1, 15],
    [ 7,  7, 1, 15],
    [ 7,  4, 1, 8],
    [ 7,  9, 1, 20],
    [ 7, 11, 1, 30],
    [ 7,  1, 1, 15],
    [8, 4, 1, 0.6],
    [8, 5, 1, 3],
    [8, 2, 1, 4.5],
    [8, 6, 0, 8],
    [9, 1, 1, 0.8],
    [9, 2, 1, 6],
    [9, 6, 1, 4],
    [9, 4, 1, 3.5],
    [9, 5, 1, 15],
    [10,  3, 1, 2],
    [10,  7, 1, 5],
    [10, 11, 1, 3.5],
    [10,  2, 1, 10],
    [10,  6, 1, 12],
    [10,  8, 0, 8],
    [10,  5, 1, 10],
    [10, 12, 1, 15],
    [10,  9, 1, 20]
])
