In [60]:
import numpy as np
import os
import pandas as pd

In [67]:
POS = (("Q", 1), ("R", 2), ("W", 2), ("T", 1), (("R", "W", "T"), 2))
BENCH = 7

In [54]:
# Get folder for most recent season
folders = [
    f
    for f in os.listdir()
    if os.path.isdir(f) and not f.startswith(".") and not f.startswith("_")
]
newest_folder = sorted(folders)[-1]
assert "cheat_sheet.csv" in os.listdir(newest_folder), "No cheat sheet found"

# Load cheat sheet
cheat_sheet = pd.read_csv(f"{newest_folder}/cheat_sheet.csv", header=5)

### Preprocess/clean up cheat sheet

In [73]:
# Remove unnamed columns
cheat_sheet = cheat_sheet.loc[:, ~cheat_sheet.columns.str.contains("^Unnamed")]

# # Split all column names on periods and take the first part
cheat_sheet.columns = cheat_sheet.columns.str.split(".").str[0]

# Melt on duplicate columns
cols = cheat_sheet.columns[~cheat_sheet.columns.duplicated()]
n_splits = cheat_sheet.shape[1] // len(cols)
splits = np.split(np.arange(cheat_sheet.shape[1]), n_splits)
dfs = [cheat_sheet.iloc[:, split] for split in splits]
cheat_sheet = pd.concat(dfs, axis=0).reset_index(drop=True)

# Only take first character for values in POS column
cheat_sheet["POS"] = cheat_sheet["POS"].str[0]

# Remove unneeded positions
pos_needed = set(
    np.hstack(np.array([p[0] for p in POS], dtype=object)).tolist()
)
cheat_sheet = cheat_sheet[cheat_sheet["POS"].isin(pos_needed)]