# DFS Golf Analysis
This program is meant to read from various sources and explore possibilities of modelling golfer performance at various PGA courses to make money on DraftKings or FanDuel.

First import the necessary libraries:

In [1]:
import pandas as pd
import numpy as np

# Input necessary setup variables
engine = 'FanDuel'   # Enter FanDuel or DraftKings

Prep for metadata by creating dictionaries for all the location codes that are encountered in the data.  These will be used later to create cleaner location information about each tournament which will likely be used as a feature for estimating player performance.

In [62]:
USabbrevs = {'al': 'Alabama','ak': 'Alaska', 'az': 'Arizona', 'ar': 'Arkansas', 'ca': 'California', 'co': 'Colorado',
 'ct': 'Connecticut', 'de': 'Delaware', 'fl': 'Florida', 'ga': 'Georgia', 'hi': 'Hawaii', 'id': 'Idaho', 'il': 'Illinois',
 'in': 'Indiana', 'ia': 'Iowa', 'ks': 'Kansas', 'ky': 'Kentucky', 'la': 'Louisiana', 'me': 'Maine', 'md': 'Maryland',
 'ma': 'Massachusetts', 'mi': 'Michigan', 'mn': 'Minnesota', 'ms': 'Mississippi', 'mo': 'Missouri', 'mt': 'Montana',
 'ne': 'Nebraska', 'nv': 'Nevada', 'nh': 'New Hampshire', 'nj': 'New Jersey', 'nm': 'New Mexico', 'ny': 'New York',
 'nc': 'North Carolina', 'nd': 'North Dakota', 'oh': 'Ohio', 'ok': 'Oklahoma', 'or': 'Oregon', 'pa': 'Pennsylvania',
 'ri': 'Rhode Island', 'sc': 'South Carolina', 'sd': 'South Dakota', 'tn': 'Tennessee', 'tx': 'Texas', 'ut': 'Utah',
 'vt': 'Vermont', 'va': 'Virginia', 'wa': 'Washington', 'wv': 'West Virginia', 'wi': 'Wisconsin', 'wy': 'Wyoming'}
CANabbrevs = {'on': 'Ontario'}
UKabbrevs = {'england': 'England', 'nir': 'Northern Ireland', 'eng': 'England'}
trans = {'jpn':'Japan','mex':'Mexico', 'pur':'Puerto Rico','aus':'Australia','bah':'Bahamas','ber':'Bermuda',
         'chn':'China','kor':'South Korea','can':'Canada','dom':'Dominican Republic','mas':'Malaysia'}

# Combine all dicts
all_abbrevs = {}
_ = [all_abbrevs.update(d) for d in (USabbrevs, CANabbrevs, UKabbrevs, trans)]

# Prep Location Breakdown for tournament data
def loc_breakdown(row):  
    # Look for a match with the dictionaries listed at the top of this notebook
    if row['end_loc'] in USabbrevs.values():  # located in USA
        row['City'] = row['beg_loc']
        row['State'] = row['end_loc']
        row['Country'] = 'United States'
    elif row['end_loc'] in CANabbrevs.values():  # located in Canada
        row['City'] = row['beg_loc']
        row['State'] = row['end_loc']
        row['Country'] = 'Canada'
    elif row['end_loc'] in UKabbrevs.values():  # located in United Kingdom
        row['City'] = row['beg_loc']
        row['State'] = row['end_loc']
        row['Country'] = 'United Kingdom'
    elif row['end_loc'] in trans.values():    # located elsewhere
        row['City'] = row['beg_loc']
        row['Country'] = row['end_loc']
    else:
        pass
    return row

Create functions for easy extraction of data from various internet sources which include:
    [sportsdata.io](https://sportsdata.io/developers/api-documentation/golf#)

The data can be retreived in the following formats:
    - entire season data
    - specific player data
    - data from all players
    
Add new functions here as new data sources are discovered

In [63]:
# Setup calls to data
api = 'de4dc63e16ee485b9df3bb79146bdcc1'

# Individual seasons
def season_data(season):
    return pd.read_json('https://api.sportsdata.io/golf/v2/json/Tournaments/{}?key={}'.format(str(season),api))

# Individual players
def player_data(player_id):
    return pd.read_json('https://api.sportsdata.io/golf/v2/json/Player/{}?key={}'.format(str(player_id),api))

# All players
def all_players_data():
    players = pd.read_json('https://api.sportsdata.io/golf/v2/json/Players?key={}'.format(api)).replace({None: np.nan})
    players = players[players['DraftKingsName'].notnull() & players['FanDuelName'].notnull()]  # strip out the nulls
    for col in players.select_dtypes(include=np.float).columns:
        players[col] = players[col].astype(pd.Int32Dtype())  # convert float columns to ints

    cols_to_drop = ['FantasyAlarmPlayerID','FantasyDraftName','FantasyDraftPlayerID','PhotoUrl',
                    'RotoWirePlayerID', 'RotoworldPlayerID', 'SportRadarPlayerID', 'YahooPlayerID']
    if engine.lower() == 'draftkings':
        cols_to_drop.extend(['FanDuelName','FanDuelPlayerID'])
        players = players.drop(cols_to_drop,axis=1).set_index('DraftKingsName')
    elif engine.lower() == 'fanduel':
        cols_to_drop.extend(['DraftKingsName','DraftKingsPlayerID'])
        players = players.drop(cols_to_drop,axis=1).set_index('FanDuelName')
    
    return players

# All tournaments from current season (same as season_data(2020))
def tournament_data():
    col_order=['StartDate', 'StartDateTime', 'EndDate', 'City', 'State', 'Country', 'Location', 'ZipCode', 'TimeZone', 
               'Covered', 'Format', 'IsInProgress', 'IsOver', 'Name', 'Par', 'Purse', 'Rounds', 'TournamentID', 'Venue', 
               'Yards', 'Canceled']  # modify the order that the data is shown
    tourn = (pd.read_json('https://api.sportsdata.io/golf/v2/json/Tournaments?key={}'.format(api))
               .replace({None: np.nan})  # replace Nones with NaNs
               .dropna(subset=['Location'])  # drop row with NaN in column Location
               .loc[:,col_order])        # only use columns listed above
    
    # Convert the dates to datetime
    tourn.EndDate = pd.to_datetime(tourn.EndDate)
    
    # Add columns for location breakdown
    tourn['beg_loc'] = tourn['Location'].str.extract('^([A-Za-z0-9 ]+),')   # Extract before comma
    tourn['end_loc'] = tourn['Location'].str.extract(', ([A-Za-z0-9 ]+)$')  # Extract after last comma
    tourn['end_loc'] = tourn['end_loc'].str.lower().replace(all_abbrevs)  # Replace with adjustments
    
    # Convert data to proper columns and drop unnecessary columns
    tourn = tourn.apply(loc_breakdown, axis=1).drop(['Location','beg_loc','end_loc'],axis=1)
    return tourn
    

### Test tournament data function here

In [64]:
tournament_data()

Unnamed: 0,StartDate,StartDateTime,EndDate,City,State,Country,ZipCode,TimeZone,Covered,Format,IsInProgress,IsOver,Name,Par,Purse,Rounds,TournamentID,Venue,Yards,Canceled
1,2021-09-02T00:00:00,,2021-09-05,Atlanta,Georgia,United States,,America/New York,1.0,Stroke,False,False,Tour Championship,70.0,,"[{'TournamentID': 453, 'RoundID': 11736, 'Numb...",453,East Lake GC,7346.0,0.0
2,2021-08-26T00:00:00,,2021-08-29,Baltimore,Maryland,United States,,America/New York,1.0,Stroke,False,False,BMW Championship,,,"[{'TournamentID': 452, 'RoundID': 11732, 'Numb...",452,Caves Valley Golf Club,,0.0
3,2021-08-19T00:00:00,,2021-08-22,Jersey City,New Jersey,United States,,America/New York,1.0,Stroke,False,False,THE NORTHERN TRUST,71.0,,"[{'TournamentID': 451, 'RoundID': 11728, 'Numb...",451,Liberty National GC,7370.0,0.0
4,2021-08-12T00:00:00,,2021-08-15,Greensboro,North Carolina,United States,,America/New York,1.0,Stroke,False,False,Wyndham Championship,70.0,,"[{'TournamentID': 450, 'RoundID': 11724, 'Numb...",450,Sedgefield CC,7127.0,0.0
5,2021-08-05T00:00:00,,2021-08-08,Truckee,California,United States,,America/Los Angeles,0.0,Stableford,False,False,Barracuda Championship,,,"[{'TournamentID': 448, 'RoundID': 11716, 'Numb...",448,Tahoe Mt. Club (Old Greenwood),,0.0
6,2021-08-05T00:00:00,,2021-08-08,Memphis,Tennessee,United States,,America/Chicago,1.0,Stroke,False,False,WGC-FedEx St. Jude Invitational,70.0,,"[{'TournamentID': 449, 'RoundID': 11720, 'Numb...",449,TPC Southwind,7237.0,0.0
7,2021-07-29T00:00:00,,2021-08-01,Saitama,,Japan,,Asia/Tokyo,1.0,Stroke,False,False,Olympic Men's Golf Competition,,,[],447,Kasumigaseki Country Club,,
8,2021-07-22T00:00:00,,2021-07-25,Blaine,Minnesota,United States,,America/Chicago,1.0,Stroke,False,False,3M Open,71.0,,"[{'TournamentID': 446, 'RoundID': 11712, 'Numb...",446,TPC Twin Cities,7468.0,0.0
9,2021-07-15T00:00:00,,2021-07-18,Nicholasville,Kentucky,United States,,America/New York,1.0,Stroke,False,False,The Open Championship,70.0,,"[{'TournamentID': 445, 'RoundID': 11708, 'Numb...",445,Keene Trace Golf Club,7211.0,0.0
10,2021-07-11T00:00:00,,2021-07-18,Sandwich,England,United Kingdom,,Europe/London,0.0,Stroke,False,False,The Open Championship,,,[],404,Royal St. George's GC,,


In [None]:
pd.show_versions()