In [13]:
import pandas as pd
import numpy as np
import requests
import pickle
import random
from itertools import combinations
from datetime import datetime
import json
import os
from tqdm import tqdm

In [2]:
start_date = '2022-01-01T00:00:00'
end_date = '2023-08-30T00:00:00'
start_object = datetime.strptime(start_date, '%Y-%m-%dT%H:%M:%S')
end_object = datetime.strptime(end_date, '%Y-%m-%dT%H:%M:%S')

In [3]:
tournamentsendpoint = 'https://api.sportsdata.io/golf/v2/json/Tournaments?key=d656297eea034d51bdad731dcae66c84'
response = requests.get(tournamentsendpoint)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # The response content is typically in JSON format
    tournaments = response.json()
    print(tournaments)
else:
    print(f"Request failed with status code: {response.status_code}")

[{'TournamentID': 576, 'Name': 'The Presidents Cup', 'StartDate': '2024-09-26T00:00:00', 'EndDate': '2024-09-29T00:00:00', 'IsOver': False, 'IsInProgress': False, 'Venue': 'The Royal Montreal Golf Club', 'Location': 'Montreal', 'Par': None, 'Yards': None, 'Purse': None, 'StartDateTime': None, 'Canceled': False, 'Covered': False, 'City': 'Quebec', 'State': None, 'ZipCode': None, 'Country': 'Canada', 'TimeZone': None, 'Format': 'TeamMatch', 'SportRadarTournamentID': '', 'OddsCoverage': 'Limited', 'Rounds': []}, {'TournamentID': 614, 'Name': 'TOUR Championship', 'StartDate': '2024-08-29T00:00:00', 'EndDate': '2024-09-01T00:00:00', 'IsOver': False, 'IsInProgress': False, 'Venue': 'East Lake Golf Club', 'Location': 'Atlanta, GA', 'Par': None, 'Yards': None, 'Purse': None, 'StartDateTime': None, 'Canceled': None, 'Covered': True, 'City': 'Atlanta', 'State': 'GA', 'ZipCode': None, 'Country': 'USA', 'TimeZone': 'America/New York', 'Format': 'Stroke', 'SportRadarTournamentID': '', 'OddsCoverage

In [4]:
idtoname = {}
nametoid = {}
for t in tournaments:
    t_start_obj = datetime.strptime(t['StartDate'], '%Y-%m-%dT%H:%M:%S')
    t_end_obj = datetime.strptime(t['EndDate'], '%Y-%m-%dT%H:%M:%S')
    if start_object < t_start_obj and end_object > t_end_obj:
        tid = t['TournamentID']
        name = t['Name']
        idtoname[tid] = name
        nametoid[name] = tid
        print(name)
        
with open('metadata/touridtoname.json', 'w') as json_file:
    json.dump(idtoname, json_file)
with open('metadata/tournametoid.json', 'w') as json_file:
    json.dump(nametoid, json_file)
                

TOUR Championship
BMW Championship
FedEx St. Jude Championship
Wyndham Championship
3M Open
Barracuda Championship
The Open
Barbasol Championship
Genesis Scottish Open
John Deere Classic
Rocket Mortgage Classic
Travelers Championship
U.S. Open
RBC Canadian Open
The Memorial Tournament pres. by Workday
Charles Schwab Challenge
PGA Championship
AT&T Byron Nelson
Wells Fargo Championship
Mexico Open
Zurich Classic of New Orleans
RBC Heritage
Masters Tournament
Valero Texas Open
Corales Puntacana Championship
WGC-Dell Technologies Match Play
Valspar Championship
THE PLAYERS Championship
Puerto Rico Open
Arnold Palmer Invitational pres. by Mastercard
The Honda Classic
The Genesis Invitational
WM Phoenix Open
AT&T Pebble Beach Pro-Am
Farmers Insurance Open
The American Express
Sony Open in Hawaii
Sentry Tournament of Champions
QBE Shootout
Hero World Challenge
The RSM Classic
Cadence Bank Houston Open
World Wide Technology Championship at Mayakoba
Butterfield Bermuda Championship
THE CJ CUP 

In [14]:
playeridtoname = {}
nametoplayerid = {}

with open('metadata/touridtoname.json', 'r') as json_file:
    tour_dict = json.load(json_file)
tournaments = list(tour_dict.keys())

for t in tqdm(tournaments):
    #teetimes = {}
    if not os.path.exists(f'tournaments/{t}'):
        os.makedirs(f'tournaments/{t}')
    leaderboardurl = f'https://api.sportsdata.io/golf/v2/json/Leaderboard/{t}?key=d656297eea034d51bdad731dcae66c84'
    response = requests.get(leaderboardurl)
    if response.status_code == 200:
        data = response.json()
    else:
        print(f"Request failed with status code: {response.status_code}")
    for player in data['Players']:
        scores = []
        name = player['Name']
        playerid = player['PlayerID']
        if not os.path.exists(f'tournaments/{t}/{playerid}'):
            os.makedirs(f'tournaments/{t}/{playerid}')
        #teetimes[playerid] = {}
        teetimes = {}
        if playerid not in (playeridtoname.keys()):
            playeridtoname[playerid] = name
            nametoplayerid[name] = playerid
        scorecardpath = f'tournaments/{t}/{playerid}/scorecard.pkl'
        ttpath = f'tournaments/{t}/{playerid}/teetime.json'
        for r in player['Rounds']:
            teetimes[r['Number']] = [r['TeeTime'], r['BackNineStart']]
            roundscores = []
            for hole in r['Holes']:
                roundscores.append(hole['ToPar'])
            scores.append(roundscores)
        #print(scores)
        with open(scorecardpath, 'wb') as file:
            pickle.dump(scores, file)
        with open(ttpath, 'w') as json_file:
            json.dump(teetimes, json_file)

with open('metadata/playeridtoname.json', 'w') as json_file:
    json.dump(playeridtoname, json_file)
with open('metadata/nametoplayerid.json', 'w') as json_file:
    json.dump(nametoplayerid, json_file)
                        

100%|████████████████████████████████████████████████████████████████████████| 88/88 [01:25<00:00,  1.03it/s]


In [15]:
nametoplayerid

{'Viktor Hovland': 40003218,
 'Xander Schauffele': 40001638,
 'Wyndham Clark': 40002651,
 'Rory McIlroy': 40000965,
 'Patrick Cantlay': 40000216,
 'Tommy Fleetwood': 40000432,
 'Collin Morikawa': 40002471,
 'Scottie Scheffler': 40001274,
 'Keegan Bradley': 40000152,
 'Sam Burns': 40000194,
 'Max Homa': 40000638,
 'Adam Schenk': 40001686,
 'Matt Fitzpatrick': 40000430,
 'Russell Henley': 40000615,
 'Sepp Straka': 40002785,
 'Rickie Fowler': 40000003,
 'Tyrrell Hatton': 40000596,
 'Lucas Glover': 40000509,
 'Jon Rahm': 40001199,
 'Tony Finau': 40000423,
 'Si Woo Kim': 40000764,
 'Joo-Hyung Kim': 40004287,
 'Brian Harman': 40000585,
 'Sung-Jae Im': 40002605,
 'Nick Taylor': 40001424,
 'Corey Conners': 40000270,
 'Jordan Spieth': 40000004,
 'Jason Day': 40000314,
 'Emiliano Grillo': 40000540,
 'Taylor Moore': 40002348,
 'Chris Kirk': 40000011,
 'Harris English': 40000387,
 'Denny McCarthy': 40000946,
 'Andrew Putnam': 40001189,
 'Adam Svensson': 40001414,
 'Sahith Theegala': 40002486,
 'Ca