# DFS Golf Analysis
This program is meant to read from various sources and explore possibilities of modelling golfer performance at various PGA courses to make money on DraftKings or FanDuel.

First import the necessary libraries:

In [1]:
import pandas as pd
import numpy as np

# Input necessary setup variables
engine = 'FanDuel'   # Enter FanDuel or DraftKings

Pull data from [sportsdata.io](https://sportsdata.io/developers/api-documentation/golf#) and explore

In [2]:
# Setup calls to data
api = 'de4dc63e16ee485b9df3bb79146bdcc1'

# Individual seasons
def season_data(season):
    return pd.read_json('https://api.sportsdata.io/golf/v2/json/Tournaments/{}?key={}'.format(str(season),api))

# Individual players
def player_data(player_id):
    return pd.read_json('https://api.sportsdata.io/golf/v2/json/Player/{}?key={}'.format(str(player_id),api))

# All players
def all_players_data():
    players = pd.read_json('https://api.sportsdata.io/golf/v2/json/Players?key={}'.format(api)).replace({None: np.nan})
    players = players[players['DraftKingsName'].notnull() & players['FanDuelName'].notnull()]  # strip out the nulls
    for col in players.select_dtypes(include=np.float).columns:
        players[col] = players[col].astype(pd.Int32Dtype())  # convert float columns to ints

    cols_to_drop = ['FantasyAlarmPlayerID','FantasyDraftName','FantasyDraftPlayerID','PhotoUrl',
                    'RotoWirePlayerID', 'RotoworldPlayerID', 'SportRadarPlayerID', 'YahooPlayerID']
    if engine.lower() == 'draftkings':
        cols_to_drop.extend(['FanDuelName','FanDuelPlayerID'])
        players = players.drop(cols_to_drop,axis=1).set_index('DraftKingsName')
    elif engine.lower() == 'fanduel':
        cols_to_drop.extend(['DraftKingsName','DraftKingsPlayerID'])
        players = players.drop(cols_to_drop,axis=1).set_index('FanDuelName')
    
    return players
    

In [3]:
# Read and clean tournament list
tourn = pd.read_json('https://api.sportsdata.io/golf/v2/json/Tournaments?key={}'.format(api)).replace({None: np.nan})
col_order=['StartDate', 'StartDateTime', 'EndDate', 'City', 'State', 'Country', 'Location', 'ZipCode', 'TimeZone', 'Covered', 'Format',
       'IsInProgress', 'IsOver', 'Name', 'Par', 'Purse', 'Rounds', 'TournamentID', 'Venue', 'Yards', 'Canceled']
tourn.EndDate = pd.to_datetime(tourn.EndDate)


# tourn_2019 = season_data(2019)

In [23]:
# DataFrames from the above cell
col_order=['StartDate', 'StartDateTime', 'EndDate', 'City', 'State', 'Country', 'Location', 'ZipCode', 'TimeZone', 'Covered', 'Format',
       'IsInProgress', 'IsOver', 'Name', 'Par', 'Purse', 'Rounds', 'TournamentID', 'Venue', 'Yards', 'Canceled']
tourn[col_order]

tourn['Location'].str.unique()

array([nan, 'Quintana Roo, Mexico', 'New Providence, Bahamas',
       'Saint Simons Island, GA', 'Augusta, GA', 'Houston, TX',
       'Shanghai, China', 'Southampton Parish, Bermuda', 'Chiba, Japan',
       'Jeju Island, South Korea', 'Las Vegas, NV', 'Jackson, MS',
       'Kohler, WI', 'Punta Cana, Dominican Republic', 'Mamaroneck, NY',
       'Napa, CA', 'Atlanta, GA', 'Olympia Fields, IL', 'Norton, MA',
       'Greensboro, NC', 'San Francisco, CA', 'Memphis, TN',
       'Truckee, CA', 'Saitama, Jpn', 'Blaine, MN', 'Dublin, OH',
       'Sandwich, Kent, Eng', 'Nicholasville, KY', 'Silvis, IL',
       'Detroit, MI', 'Cromwell, CT', 'Hilton Head, SC', 'Fort Worth, TX',
       'Etobicoke, ON', 'Dallas, TX', 'Charlotte, NC', 'Avondale, LA',
       'San Antonio, TX', 'Austin, TX', 'Palm Harbor, FL',
       'Ponte Vedra Beach, FL', 'Orlando, FL', 'Palm Beach Gardens, FL',
       'Mexico City, Mex', 'Rio Grande, Pur', 'Pacific Palisades, CA',
       'Pebble Beach, CA', 'Scottsdale, AZ', 'San

In [5]:
pd.show_versions()


INSTALLED VERSIONS
------------------
commit: None
python: 3.7.3.final.0
python-bits: 64
OS: Windows
OS-release: 10
machine: AMD64
processor: Intel64 Family 6 Model 78 Stepping 3, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None

pandas: 0.24.2
pytest: 4.3.1
pip: 19.0.3
setuptools: 40.8.0
Cython: 0.29.14
numpy: 1.16.2
scipy: 1.2.1
pyarrow: None
xarray: None
IPython: 7.4.0
sphinx: 1.8.5
patsy: 0.5.1
dateutil: 2.8.0
pytz: 2018.9
blosc: None
bottleneck: 1.2.1
tables: 3.5.1
numexpr: 2.6.9
feather: None
matplotlib: 3.0.3
openpyxl: 2.6.1
xlrd: 1.2.0
xlwt: 1.3.0
xlsxwriter: 1.1.5
lxml.etree: 4.3.2
bs4: 4.7.1
html5lib: 1.0.1
sqlalchemy: 1.3.1
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None
gcsfs: None
