In [1]:
import pandas as pd
import numpy as np
from difflib import get_close_matches
from datetime import datetime

In [3]:
test = pd.read_csv("./data/slate_2022-06-02.csv")

# Slate Data

In [29]:
def opp_pitcher(x):
    # If player is pitcher, return nothing
    if x["Position"] == "P":
        return np.nan

    series = slate.loc[
        (slate["Team"] == x["Opponent"]) & (slate["Position"] == "P"), "Player"
    ]
    if len(series) == 0:
        return np.nan
    if len(series) > 1:
        raise ValueError("Multiple Opposing Pitchers identified. Data Issues.")
    else:
        return series.iloc[0]

In [30]:
def close_matches(x, possible):
    matches = get_close_matches(x, possible)
    if matches:
        return matches[0]
    else:
        return np.nan

In [42]:
slate = pd.read_csv("./data/slate.csv")

proj = pd.concat([pd.read_csv("./data/proj_1.csv"), pd.read_csv("./data/proj_2.csv")])
# Find closest name matches from slate
proj["Player"] = proj["Player"].apply(lambda x: close_matches(x, slate["Nickname"]))
# Sometimes salary contains commas
proj["Salary"] = proj["Salary"].str.replace(",", "")
proj["Salary"] = proj["Salary"].astype(int)
# Sometimes data is duplicated
proj = proj.drop_duplicates(subset=["Player", "Pos", "Salary"])
proj = proj.dropna()

# Merge projections with slated players
slate = slate.merge(
    proj,
    left_on=["Nickname", "Position", "Salary"],
    right_on=["Player", "Pos", "Salary"],
    how="left",
)

# Drop all pitchers that are not starting
slate = slate.drop(slate.loc[(slate["Position"] == "P") & (slate["Probable Pitcher"].isna()), :].index)
slate = slate.drop(slate[slate["Injury Details"] == "Postponed"].index)
# BIG ASSUMPTION: assume player fills only first position listed.
# Because of the UTIL slot, I assume this has only minimal impact
# upon optimality
slate["Position"] = slate["Position"].str.split("/", expand=True)[0]
# C and 1B players can fill the C/1B slot
slate["Position"] = slate["Position"].replace({"C": "C/1B", "1B": "C/1B"})
# Pitchers have batting order 0
# Non-starting players also have batting order 0
slate["Batting Order"] = slate["Batting Order"].replace(np.nan, 0)
slate["Batting Order"] = slate["Batting Order"].astype(int)
# Opposing Pitcher for each player
slate["Opp_Pitcher"] = slate.apply(opp_pitcher, axis=1)
# Drop players with 0 fantasy points projected
slate = slate[slate["Proj"] > 0]

# Select relevant columns and rename
slate = slate[["Player", "Position", "Salary", "Game", "Team", "Opponent", "Batting Order", "Opp_Pitcher", "Proj"]]
slate.columns = ["Name", "Position", "Salary", "Game", "Team", "Opponent", "Order", "Opp_Pitcher", "Projection"]

# Write to csv with todays date
slate.to_csv(f"./data/slate_{datetime.today().strftime('%Y-%m-%d')}.csv", index=False)

# Covariance

# Batting Stacks

In [49]:
slate.columns

Index(['Id', 'Position', 'First Name', 'Nickname', 'Last Name', 'FPPG',
       'Played', 'Salary', 'Game', 'Team', 'Opponent', 'Injury Indicator',
       'Injury Details', 'Tier', 'Probable Pitcher', 'Batting Order',
       'Roster Position', 'Player', 'Pos', 'Value', 'Proj', 'Opp_Pitcher'],
      dtype='object')

In [57]:
slate[slate["Position"] == "P"].sum()

Name           Nestor CortesTarik SkubalRobbie RayMichael Kop...
Position                                             PPPPPPPPPPP
Salary                                                     92500
Game           LAA@NYYMIN@DETSEA@BALCWS@TORCIN@BOSMIN@DETTB@T...
Team                            NYYDETSEACWSCINMINTBTEXTORBOSLAD
Opponent                        LAAMINBALTORBOSDETTEXTBCWSCINPIT
Order                                                          0
Opp_Pitcher                                                    0
Projection                                                299.08
Ownership                                                    1.0
dtype: object

In [10]:
data = pd.read_csv("./data/linestar_data.csv")

In [117]:
order = np.array(range(1, 10))

stacks = []
for x in range(9):
    stacks.append(np.roll(order, x)[:4])

In [118]:
stack_scores = {str(stack): [] for stack in stacks}

In [129]:
def add_stack_scores(frame):
    for stack in stacks:
        try:
            stack_score = 0
            for order in stack:
                stack_score += frame.loc[frame["Order"] == order, "Scored"].iloc[0]
        except:
            continue
        stack_scores[str(stack)].append(stack_score)
    return "DONE"

In [130]:
data.groupby(["Date", "Team"]).apply(add_stack_scores)

Date        Team
2021-06-01  ARI     DONE
            ATL     DONE
            BAL     DONE
            BOS     DONE
            CHC     DONE
                    ... 
2022-05-24  SF      DONE
            STL     DONE
            TEX     DONE
            TOR     DONE
            WSH     DONE
Length: 3012, dtype: object

In [131]:
{stack: np.mean(scores) for stack, scores in stack_scores.items()}

{'[1 2 3 4]': 42.23353892315386,
 '[9 1 2 3]': 37.05796505652621,
 '[8 9 1 2]': 33.807536650836255,
 '[7 8 9 1]': 31.471151465125875,
 '[6 7 8 9]': 29.412154354106477,
 '[5 6 7 8]': 33.28679973163367,
 '[4 5 6 7]': 35.88484140233722,
 '[3 4 5 6]': 38.70013324450366,
 '[2 3 4 5]': 40.5610657785179}

In [132]:
{stack: np.std(scores) for stack, scores in stack_scores.items()}

{'[1 2 3 4]': 24.97278155575016,
 '[9 1 2 3]': 22.953305269307837,
 '[8 9 1 2]': 21.729417981269467,
 '[7 8 9 1]': 21.031638151550823,
 '[6 7 8 9]': 20.122761981252374,
 '[5 6 7 8]': 22.06418069248909,
 '[4 5 6 7]': 22.884343977167685,
 '[3 4 5 6]': 23.757624323710544,
 '[2 3 4 5]': 24.642707650125658}

In [133]:
{stack: len(scores) for stack, scores in stack_scores.items()}

{'[1 2 3 4]': 5999,
 '[9 1 2 3]': 4865,
 '[8 9 1 2]': 4843,
 '[7 8 9 1]': 4846,
 '[6 7 8 9]': 4846,
 '[5 6 7 8]': 5962,
 '[4 5 6 7]': 5990,
 '[3 4 5 6]': 6004,
 '[2 3 4 5]': 6005}