In [1]:
import pandas as pd
import numpy as np
import os
from difflib import get_close_matches

In [2]:
downloads = "/mnt/c/Users/Erik/Downloads"

def opp_pitcher(x):
    # If player is pitcher, return nothing
    if x["Position"] == "P":
        return np.nan

    series = slate.loc[
        (slate["Team"] == x["Opponent"]) & (slate["Position"] == "P"), "Name"
    ]
    # Error if no opposing pitcher is found
    if len(series) == 0:
        raise ValueError(f"{x} doesn't have an opposing pitcher")
    # Error if more than one opposing pitcher is found
    if len(series) > 1:
        raise ValueError(f"{x} Multiple Opposing Pitchers identified. Data Issues.")
    else:
        return series.iloc[0]


def close_matches(x, possible):
    matches = get_close_matches(x, possible, cutoff=0.80)
    if matches:
        return matches[0]
    else:
        return np.nan


def make_game(row):
    if row["HomeOrAway"] == "HOME":
        return f"{row['Opponent']}@{row['Team']}"
    elif row["HomeOrAway"] == "AWAY":
        return f"{row['Team']}@{row['Opponent']}"

In [17]:
slate = pd.read_csv(downloads + "/draftkings.csv")
slate = slate.drop_duplicates(subset=["Name", "Team"])
proj = pd.concat([pd.read_csv(downloads + "/fantasy-baseball-projections.csv"),
                     pd.read_csv(downloads + "/fantasy-baseball-projections (1).csv")]
                   )
# Double headers and other strange issues cause multiple entries,
# just drop them. I expect this introduces some error.
proj = proj.drop_duplicates(subset=["Name", "Team"])
stats = pd.concat([pd.read_csv(downloads + "/fantasy-baseball-leaders.csv"),
                     pd.read_csv(downloads + "/fantasy-baseball-leaders (1).csv")]
                   )
stats = stats.drop_duplicates(subset=["Name", "Team"])

slate = slate.merge(proj, on=["Name", "Team"], how="left", suffixes=(None, "_r"))
# Make game string
slate["Game"] = slate.apply(make_game, axis=1)
# Assume players with multiple positions listed only play the first one listed
slate["Position"] = slate["Position"].str.split("/", expand=True)[0]
# Replace SP and RP with P
slate["Position"] = slate["Position"].replace({"SP": "P", "RP": "P"})
# Drop batters with no batting order
slate = slate.drop(slate[(slate["BattingOrder"].isna()) & (slate["Position"] != "P")].index)
# Set all pitchers to have 0 batting order
slate["BattingOrder"] = slate["BattingOrder"].replace({np.nan: 0})
slate["BattingOrder"] = slate["BattingOrder"].astype(int)
# Ignore players with 0 projections
slate = slate[slate["FantasyPointsDraftKings"] > 0]
slate["Opp_Pitcher"] = slate.apply(opp_pitcher, axis=1)

slate = slate.merge(stats, on=["Name", "Team"], how="left", suffixes=(None, "_realized"))
slate = slate[["Name", "Position", "OperatorSalary", "Game", "Team", "Opponent", "BattingOrder", "Opp_Pitcher", "FantasyPointsDraftKings", "FantasyPointsDraftKings_realized"]]
slate.columns = ["Name", "Position", "Salary", "Game", "Team", "Opponent", "Order", "Opp_Pitcher", "Projection", "Scored"]

date = input("Date: ")
slate.to_csv(f"./data/realized_slates/{date}.csv", index=False)

os.remove(downloads + "/draftkings.csv")
os.remove(downloads + "/fantasy-baseball-projections.csv")
os.remove(downloads + "/fantasy-baseball-projections (1).csv")
os.remove(downloads + "/fantasy-baseball-leaders.csv")
os.remove(downloads + "/fantasy-baseball-leaders (1).csv")

Date: 2022-07-05
