# 000. Optimizer Pipeline
This creates player sheets and runs the optimizer
- Type: Data
- Run Frequency: Pre-contest
- Sources:
    - Contest Guide
    - DraftKings API (Draftables)
    - Player Sims
    - RotoWire 
- Dates:
    - Created: 3/8/2025
    - Updated: 3/8/2025

In [1]:
%run "C:\Users\james\Documents\MLB\Code\U1. Imports.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U2. Utilities.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U3. Classes.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U4. Datasets.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U5. Models.ipynb"

In [8]:
def concat_player_sims(folder_path: str, position: str, n_jobs: int = -1) -> pd.DataFrame:
    ### Read in data
    # Select columns to read
    if position == "batter":
        columns = ['id', 'fullName', 'batting_order', 'imp_b_l', 'imp_b_r', 'confirmed', 'FP', 'team']
    else:
        columns = ['id', 'fullName', 'imp_p_l', 'imp_p_r', 'confirmed', 'FP', 'team']

    # Specify files
    folder = Path(folder_path)
    file_paths = [file for file in folder.iterdir() if file.is_file() and file.suffix == '.csv' and file.name.startswith(position)]

    # Read in CSVs, but only the specified columns 
    dfs = Parallel(n_jobs=n_jobs)(delayed(pd.read_csv)(file, usecols=columns) for file in file_paths)

    # Concatenate dataframes together
    df = pd.concat(dfs, ignore_index=True)

    ### Create new columns
    # Identify home and away teams
    away_team = folder_path.split("\\")[-1].split("@")[0]
    home_team = (folder_path.split("\\")[-1]).split("@")[1].split(" ")[0]
    # Identify game_id
    game_id = folder_path.split(" ")[-2]
    
    # Create team columns
    df['away_team'] = away_team
    df['home_team'] = home_team
    df['TeamAbbrev'] = np.where(df['team'] == "away", df['away_team'], df['home_team'])
    df['game_id'] = game_id
    
    
    return df

In [106]:
def create_player_file(contestKey, guide, draftGroupId, roto_slate, max_exposure_pitchers, max_exposure_batters, 
                       projections='roto', rostership='roto', ownership_spread=0.25):
    ### Step 1) Read in Draftables
    draftable_df = pd.read_csv(os.path.join(baseball_path, "A01. DraftKings", "2. Draftables", f"Draftables {draftGroupId}.csv"), dtype='str', encoding='iso-8859-1')

    # Create clean TEAM column
    draftable_df['TEAM'] = draftable_df['TeamAbbrev'].map(team_dict)

    
    ### Step 2) Read in Sims
    sim_dfs = []
    for folder in os.listdir(os.path.join(baseball_path, "B02. Simulations", "2. Player Sims", f"Matchups {guide['date'][0]}")):
        # Check if folder name contains any game_id
        if not any(game in folder for game in list(guide['game_id'].astype(str))):
            print(f"Excluding: {folder}")
            continue
            
        folder_path = os.path.join(baseball_path, "B02. Simulations", "2. Player Sims", f"Matchups {guide['date'][0]}", folder)
        print(f"Folder: {folder}")
        # Batters
        position = 'batter'
        batter_df = concat_player_sims(folder_path, position, n_jobs=-1)
        batter_df['Position'] = position
        batter_df.rename(columns={'imp_b_l': 'imp_l', 'imp_b_r': 'imp_r'}, inplace=True)
        # Pitchers
        position = 'pitcher'
        pitcher_df = concat_player_sims(folder_path, position, n_jobs=-1)
        pitcher_df['Position'] = position
        pitcher_df.rename(columns={'imp_p_l': 'imp_l', 'imp_p_r': 'imp_r'}, inplace=True)
        pitcher_df['batting_order'] = -99
        pitcher_df['confirmed'].fillna("Y", inplace=True)
        
        df = pd.concat([batter_df, pitcher_df], ignore_index=True, axis=0)
    
        sim_dfs.append(df)


    # Concatenate all player sims together
    sim_df = pd.concat(sim_dfs, ignore_index=True, axis=0)
    
    # Pivot
    # Create a new index for each FP instance within each `id`
    sim_df['FP_index'] = sim_df.groupby('id').cumcount()
    
    # Pivot the DataFrame, using the `FP_index` to spread `FP` values into columns
    wide_df = sim_df.pivot_table(index=[col for col in sim_df.columns if col != 'FP' and col != 'FP_index'],
                             columns='FP_index', 
                             values='FP', 
                             aggfunc='first')
    # print(wide_df.query('Position == "pitcher"'))
    
    # Rename the columns to FP_0, FP_1, etc.
    wide_df.columns = [f"FP_{col}" for col in wide_df.columns]
    
    # Reset index to get a flat DataFrame
    wide_df.reset_index(inplace=True)

    # Create clean TEAM variable
    wide_df['TEAM'] = wide_df['TeamAbbrev'].map(team_dict)

    print(wide_df.query('Position == "pitcher"').head())

    ### Step 3) Read in RotoWire Projections
    roto_df = pd.read_csv(os.path.join(baseball_path, "A07. Projections", "2. RotoWire", "2. Projections", f"RotoWire Projections {roto_slate}.csv"))

    # Create clean columns
    roto_df['fullName'] = roto_df['firstName'] + " " + roto_df['lastName']
    roto_df['roto_projection'] = roto_df['points']
    roto_df['TEAM'] = roto_df['teamAbbr'].map(team_dict)

    # Keep relevant columns
    roto_df = roto_df[['fullName', 'TEAM', 'roto_projection', 'rostership']]


    ### Step 4) Merge
    # Merge draftables, sims (wide), and RotoWire dataframes
    player_df = pd.merge(draftable_df, wide_df, left_on=['Name', 'TEAM'], right_on=['fullName', 'TEAM'], how='inner', suffixes=("", "2"))
    player_df = pd.merge(player_df, roto_df, on=['fullName', 'TEAM'], how='left')


    ### Step 5) Create New Fields
    ## Projections
    # Identify FP columns
    fp_columns = [col for col in player_df.columns if "FP_" in col]
    # RotoWire
    if projections == 'roto':
        player_df['AvgPointsPerGame'] = player_df['roto_projection'].fillna(0)
    # My projections
    elif projections == "robot":
        player_df['AvgPointsPerGame'] = player_df[fp_columns].mean(axis=1)
    
    ## Exposure
    # Set exposure range
    # RotoWire ownership projections
    if rostership == "roto":
        player_df['Min Exposure'] = np.maximum(player_df['rostership'] * (1 - ownership_spread) / 100, 0)
        player_df['Max Exposure'] = np.where(player_df['Position2'] == "batter",
                                             np.minimum(player_df['rostership'] * (1 + ownership_spread) / 100, max_exposure_batters),
                                             np.minimum(player_df['rostership'] * (1 + ownership_spread) / 100, max_exposure_pitchers))
    # No ownership projections
    else:
        player_df['Min Exposure'] = 0
        player_df['Max Exposure'] = np.where(player_df['Position2'] == "batter", max_exposure_batters, max_exposure_pitchers)

    ## Roster information
    player_df['Confirmed Starter'] = (player_df['confirmed'].isin(["Y",1])).astype(int)
    player_df['Roster Order'] = player_df['batting_order'].copy()

    # Relevant columns
    player_columns = ['Position', 'Name + ID', 'Name', 'ID', 'Roster Position', 'Salary', 'Game Info', 'TeamAbbrev', 'AvgPointsPerGame', 'playerId', 'draftGroupId', 'game_id', 'Position2', 'imp_l', 'imp_r', 'confirmed', 'batting_order'] + fp_columns + ['rostership', 'roto_projection', 'Roster Order', 'Confirmed Starter', 'Min Exposure', 'Max Exposure']

    
    return player_df[player_columns].sort_values(['AvgPointsPerGame'], ascending=False)

In [107]:
%%time
draftables_with_sims = create_player_file(contestKey, guide, draftGroupId, roto_slate, max_exposure_pitchers=0.75, max_exposure_batters=0.5, projections='robot', rostership='roto', ownership_spread=0.25)

Excluding: ARI@SDP 745407 1610
Folder: ATL@WSN 744849 1335
Folder: BAL@TBR 745085 1340
Folder: BOS@CHW 746787 1410
Folder: CHC@CIN 746705 1340
Folder: CLE@MIA 746054 1340
Folder: COL@STL 745167 1415
Excluding: HOU@LAA 746216 1607
Excluding: LAD@NYY 745736 1910
Folder: MIL@DET 746463 1340
Folder: MIN@PIT 745491 1335
Folder: SEA@KCR 746297 1410
Folder: SFG@TEX 745003 1305
Excluding: TOR@OAK 745651 1607
       id         fullName  batting_order  imp_l  imp_r confirmed  team  \
0  445276    Kenley Jansen          -99.0    0.0    0.0         Y  away   
1  445926     Jesse Chavez          -99.0    0.0    0.0         Y  away   
5  458677    Justin Wilson          -99.0    0.0    0.0         Y  home   
7  471911  Carlos Carrasco          -99.0    0.0    0.0         Y  away   
8  489446      Kirby Yates          -99.0    0.0    0.0         Y  home   

  away_team home_team TeamAbbrev game_id Position  FP_0 TEAM  
0       BOS       CHW        BOS  746787  pitcher  0.00  BOS  
1       ATL       W

In [108]:
draftables_with_sims.query('Position2 != "batter"').head()

Unnamed: 0,Position,Name + ID,Name,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame,playerId,draftGroupId,game_id,Position2,imp_l,imp_r,confirmed,batting_order,FP_0,rostership,roto_projection,Roster Order,Confirmed Starter,Min Exposure,Max Exposure
3,SP,Grayson Rodriguez (34554562),Grayson Rodriguez,34554562,P,9500,BAL@TB 06/09/2024 01:40PM ET,BAL,25.9,1118760,108160,745085,pitcher,0.0,0.0,Y,-99.0,25.9,23.3,18.03,-99.0,1,0.17475,0.29125
0,SP,Tarik Skubal (34554559),Tarik Skubal,34554559,P,10500,MIL@DET 06/09/2024 01:40PM ET,DET,25.55,1055775,108160,746463,pitcher,0.0,0.0,Y,-99.0,25.55,21.96,20.25,-99.0,1,0.1647,0.2745
6,SP,Nathan Eovaldi (34554566),Nathan Eovaldi,34554566,P,8600,SF@TEX 06/09/2024 01:05PM ET,TEX,22.5,455169,108160,745003,pitcher,0.0,0.0,Y,-99.0,22.5,12.7,16.3,-99.0,1,0.09525,0.15875
4,SP,Jared Jones (34554563),Jared Jones,34554563,P,9300,MIN@PIT 06/09/2024 01:35PM ET,PIT,21.05,1217250,108160,745491,pitcher,0.0,0.0,Y,-99.0,21.05,22.56,19.25,-99.0,1,0.1692,0.282
62,SP,Chris Flexen (34554576),Chris Flexen,34554576,P,5500,BOS@CWS 06/09/2024 02:10PM ET,CWS,20.85,658306,108160,746787,pitcher,0.0,0.0,Y,-99.0,20.85,1.83,11.42,-99.0,1,0.013725,0.022875
