In [1]:
import pandas as pd
from difflib import get_close_matches
import numpy as np
import requests
import json
import time

In [66]:
def get_pga_data(periodId):
    cookies = {
        '_fbp': 'fb.1.1652982905489.473636789',
        '.ASPXANONYMOUS': '7FPJC4O72AEkAAAANzJhYzQyZTAtMjBlMC00Y2U3LTg0NDgtNGNlYmQ5NzI2Y2Vj0',
        '.DOTNETNUKE': '84C02EC808A4608F4C931C9E1E01C1C0C38494594A271ACEEB55875FB4A4D61227F4E401C11511B9E0A67F47F89591F231BCD694CA19BC48DDDF0B0DD616909A4D9689F9BEA949EDD97289D031B87B90E4744D6F139FA38FEFA53B893E68D08B9F9EFE5224E87FE2DA6A5EA6517FA7DE979BD61500AF9482F427A0A15B5BC187C8B545AB',
        '_gid': 'GA1.2.1831468145.1658287386',
        'dnn_IsMobile': 'False',
        'language': 'en-US',
        '_ga': 'GA1.2.1013963682.1652982905',
        '_ga_EXD94TY7GX': 'GS1.1.1658331839.119.1.1658331995.0',
    }

    headers = {
        'Accept': 'application/json, text/plain, */*',
        'Accept-Language': 'en-US,en;q=0.9',
        'Connection': 'keep-alive',
        # Requests sorts cookies= alphabetically
        # 'Cookie': '_fbp=fb.1.1652982905489.473636789; .ASPXANONYMOUS=7FPJC4O72AEkAAAANzJhYzQyZTAtMjBlMC00Y2U3LTg0NDgtNGNlYmQ5NzI2Y2Vj0; .DOTNETNUKE=84C02EC808A4608F4C931C9E1E01C1C0C38494594A271ACEEB55875FB4A4D61227F4E401C11511B9E0A67F47F89591F231BCD694CA19BC48DDDF0B0DD616909A4D9689F9BEA949EDD97289D031B87B90E4744D6F139FA38FEFA53B893E68D08B9F9EFE5224E87FE2DA6A5EA6517FA7DE979BD61500AF9482F427A0A15B5BC187C8B545AB; _gid=GA1.2.1831468145.1658287386; dnn_IsMobile=False; language=en-US; _ga=GA1.2.1013963682.1652982905; _ga_EXD94TY7GX=GS1.1.1658331839.119.1.1658331995.0',
        'DNT': '1',
        'Referer': 'https://www.linestarapp.com/Projections',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
        'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
    }

    params = {
        'periodId': periodId,
        'site': '1',
        'sport': '5',
    }

    r = requests.get('https://www.linestarapp.com/DesktopModules/DailyFantasyApi/API/Fantasy/GetSalariesV5', params=params, cookies=cookies, headers=headers)
    r = r.json()
    # If there are no records, return None
    if len(r["Ownership"]["Salaries"]) == 0:
        raise ValueError(f"No data for periodId {periodId}")
    else:
        return r
    

def get_pga_realized_slate(periodId):
    data = get_pga_data(periodId)
    
    main_slate = [x for x in data["Ownership"]["Slates"] if x["SlateName"]=="Main"]
    # Raise errors if there are issues with selecting the right slate
    if len(main_slate) == 0:
        raise ValueError("No Main slate found")
    elif len(main_slate) > 1:
        raise ValueError("Multiple Main slates found")
    else:
        main_slate = main_slate[0]


    # Date of slate geames
    date = main_slate["SlateStart"][0:10]
    # Get SlateId for finding ownership data
    slate_id = main_slate["Id"]
    # Filter players to be those that have strictly positive projection
    slate_players = [x for x in data["Ownership"]["Salaries"] if x["PP"] > 0]
    
    # Make dictionary relating each player ID to projected ownership ammount
    player_ids = [x["PID"] for x in slate_players]
    proj_owned = {x["PlayerId"]: round(x["Owned"] / 100, 2) for x in data["Ownership"]["Projected"][str(slate_id)] if x["PlayerId"] in player_ids}
    # Get realized ownership for GPP tournaments that have contest type 4 on Linestar
    actual_owned = [x["OwnershipData"] for x in data["Ownership"]["ContestResults"] if (x["Contest"]["SlateId"] == slate_id) & (x["Contest"]["ContestType"] == 4)][0]
    actual_owned = {x["PlayerId"]: round(x["Owned"] / 100, 2) for x in actual_owned if x["PlayerId"] in player_ids}
    
    # Adding batting order data
    for player in slate_players:
        try:
            # Adding projected ownership
            player["ProjOwned"] = proj_owned[player["PID"]]
        except KeyError:
            player["ProjOwned"] = 0
            
        try:
            # Adding realized ownership
            player["actual_owned"] = actual_owned[player["PID"]]
        except KeyError:
            player["actual_owned"] = 0


    # Make dictionaries with data we need
    slate_players = [
        {
            "Name": x["Name"],
            "Salary": x["SAL"],
            "Projection": x["PP"],
            "Scored": x["PS"],
            "pOwn": x["ProjOwned"],
            "actOwn": x["actual_owned"]
        }
        for x in slate_players
    ]

    frame = pd.DataFrame(slate_players)
    return (date, frame)


def get_pga_proj_slate(periodId):
    data = get_pga_data(periodId)
    
    main_slate = [x for x in data["Ownership"]["Slates"] if x["SlateName"]=="Main"]
    # Raise errors if there are issues with selecting the right slate
    if len(main_slate) == 0:
        raise ValueError("No Main slate found")
    elif len(main_slate) > 1:
        raise ValueError("Multiple Main slates found")
    else:
        main_slate = main_slate[0]

    # Date of slate geames
    date = main_slate["SlateStart"][0:10]
    # Get SlateId for finding ownership data
    slate_id = [x["SlateId"] for x in main_slate["SlateGames"]][0]
    main_slate_game_ids = [x["GameId"] for x in main_slate["SlateGames"]]
    # Filter players to be those in games in the main slate, and have strictly positive projection
    slate_players = [x for x in data["Ownership"]["Salaries"] if (x["GID"] in main_slate_game_ids) & (x["PP"] > 0)]
    # Construct dictionary relating player IDs to projected ownership
    player_ids = [x["PID"] for x in slate_players]
    proj_owned = {x["PlayerId"]: round(x["Owned"] / 100, 2) for x in data["Ownership"]["Projected"][str(slate_id)] if x["PlayerId"] in player_ids}
    
    for player in slate_players:
        try:
            # Adding projected ownership
            player["ProjOwned"] = proj_owned[player["PID"]]
        except KeyError:
            # If nothing found, assume 0
            player["ProjOwned"] = 0


    # Make dictionaries with data we need
    slate_players = [
        {
            "Name": x["Name"],
            "Salary": x["SAL"],
            "Projection": x["PP"],
            "pOwn": x["ProjOwned"]
        }
        for x in slate_players
    ]

    frame = pd.DataFrame(slate_players)
    return (date, frame)

In [68]:
# Given a list of periodId's, write realized slates to file
periods = range(255, 342)

for period in periods:
    try:
        print(f"Getting period {period}")
        date, slate = get_pga_realized_slate(period)
        print(f"Writing {date} data to file")
        slate.to_csv(f"./data/pga_realized_slates/{date}.csv", index=False)
    except ValueError as e:
        print(e)
        print("Sleeping for 3 seconds")
        time.sleep(3)
        continue
    print("Sleeping for 3 seconds")
    time.sleep(3)

Getting period 255
Writing 2020-08-20 data to file
Sleeping for 3 seconds
Getting period 256
Writing 2020-08-27 data to file
Sleeping for 3 seconds
Getting period 257
No Main slate found
Sleeping for 3 seconds
Getting period 258
Writing 2020-09-10 data to file
Sleeping for 3 seconds
Getting period 259
Writing 2020-09-17 data to file
Sleeping for 3 seconds
Getting period 260
Writing 2020-09-24 data to file
Sleeping for 3 seconds
Getting period 261
Writing 2020-10-01 data to file
Sleeping for 3 seconds
Getting period 262
Writing 2020-10-08 data to file
Sleeping for 3 seconds
Getting period 263
Writing 2020-10-15 data to file
Sleeping for 3 seconds
Getting period 264
Writing 2020-10-22 data to file
Sleeping for 3 seconds
Getting period 265
Writing 2020-10-29 data to file
Sleeping for 3 seconds
Getting period 266
No Main slate found
Sleeping for 3 seconds
Getting period 267
Writing 2020-11-12 data to file
Sleeping for 3 seconds
Getting period 268
Writing 2020-11-19 data to file
Sleeping fo

In [91]:
# Gathering all historical slates into one file
import os

frames = []
for file in os.listdir("./data/pga_realized_slates"):
    data = pd.read_csv(f"./data/pga_realized_slates/{file}")
    data["Date"] = file[0:10]
    frames.append(data)
    
frame = pd.concat(frames)

In [92]:
frame.to_csv("./data/pga_hist.csv", index=False)