# A9. Optimizer
Sources: <br>
A7. Matchups - 1. Salaries <br>
A8. Sims - 2. Players <br>

Description: This optimizes lineups based on player projections and provided constraints.

### Imports

In [1]:
import os
import pandas as pd
import numpy as np
import time
import datetime
from datetime import date

from pydfs_lineup_optimizer import get_optimizer, Site, Sport, Player, TeamStack, PlayerFilter, RandomFantasyPointsStrategy

import warnings
import unidecode
import re
import glob

import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
import ssl

from joblib import Parallel, delayed

warnings.simplefilter(action="ignore")
baseball_path = r"C:\Users\james\Documents\MLB\Data"
download_path = r"C:\Users\james\Downloads"

In [2]:
# Today's Date
# YYYY-MM-DD (datetime)
todaysdate_dt = datetime.date.today()

# YYYY-MM-DD (string)
todaysdate_dash = str(todaysdate_dt)

# MM/DD/YYYY
todaysdate_slash = todaysdate_dash.split("-")
todaysdate_slash = todaysdate_slash[1] + "/" + todaysdate_slash[2] + "/" + todaysdate_slash[0]

# YYYYMMDD
todaysdate = todaysdate_dash.replace("-", "")

### Create Lineups

In [3]:
def create_lineups(lineups=200, sims=1000, date=todaysdate):
    # File with player projections (input)
    sim_file = "Player_Sims_" + date + ".csv"
    
    # Load in DraftKings baseball optimizer
    optimizer = get_optimizer(Site.DRAFTKINGS, Sport.BASEBALL)

    # Load in player sims
    optimizer.load_players_from_csv(os.path.join(baseball_path, "A8. Sims - 1. Players", sim_file))
    
    # Set minimum salary
    optimizer.set_min_salary_cap(49500)
    
    # Settings
    optimizer.add_stack(TeamStack(5, spacing=6, for_positions=['C', '1B', '2B', '3B', 'SS', 'OF']))
    optimizer.add_stack(TeamStack(2, spacing=3, for_positions=['C', '1B', '2B', '3B', 'SS', 'OF']))
    optimizer.restrict_positions_for_opposing_team(['SP', 'RP'], ['C', 'SS', 'OF', '1B', '2B', '3B']) 
    # optimizer.player_pool.exclude_teams(['PIT', 'WAS', 'WSN', 'TB', 'TBR', 'CHW', 'CWS'])
    # optimizer.set_fantasy_points_strategy(RandomFantasyPointsStrategy(max_deviation=0.12)) ### Create some deviation
    optimizer.player_pool.add_filters(
        PlayerFilter(from_value=5),
        # PlayerFilter(from_value=0.0015, filter_by='efficiency'), Slower unless you exclude too much
    )
    
    # # Ownership
    # optimizer.set_projected_ownership(max_projected_ownership=0.25)
    
    # Create optimized lineups
    i = 0
    for lineup in optimizer.optimize(lineups):
        if i in [1, 25, 50, 75, 99, 100, 125, 150, 175, 200, 250, 300, 400, 500, 600, 700, 800, 900, 1000]:
                print(i)
        i += 1 
        if i < 5:
            print(lineup)

    # Export lineups to csv
    optimizer.export(os.path.join(baseball_path, "A9. Optimizer", "Lineup_Sims_" + date + ".csv"))

In [4]:
def choose_lineups(sims=200, date=todaysdate):
    # Read in daily lineups
    lineup_sims = pd.read_csv(os.path.join(baseball_path, "A9. Optimizer", "Lineup_Sims_" + date + ".csv"))
    
    ### Merge on Sims
    # Read in player sims
    salaries = pd.read_csv(os.path.join(baseball_path, "A8. Sims - 1. Players", "Player_Sims_" + date + ".csv"))
    # Keep relevant variables
    salaries.drop(columns={"Unnamed: 0", "Position", "Name", "ID", "Roster Position", "Salary", "Game Info", "TeamAbbrev", "AvgPointsPerGame"}, inplace=True)
    # Clean Name + ID variable to remove space (this is for consistency for merging)
    salaries['Name + ID'] = salaries['Name + ID'].str.replace(" \(", "(")
    
    ### Give Ohtani his own code
    salaries.loc[salaries['Name + ID'].str.contains('Shohei'), 'Name + ID'] = 'Shohei Ohtani(134045)'

    # Merge stats onto lineups
    # May need m:m with Ohtani, but ideally, we would not
    lineup_sims = lineup_sims.merge(salaries, left_on="P", right_on="Name + ID", how='left', validate="m:1")
    lineup_sims = lineup_sims.merge(salaries, left_on="P.1", right_on="Name + ID", how='left', validate="m:1", suffixes=(None, "_P.1"))
    lineup_sims = lineup_sims.merge(salaries, left_on="C", right_on="Name + ID", how='left', validate="m:1", suffixes=(None, "_C"))
    lineup_sims = lineup_sims.merge(salaries, left_on="1B", right_on="Name + ID", how='left', validate="m:1", suffixes=(None, "_1B"))
    lineup_sims = lineup_sims.merge(salaries, left_on="2B", right_on="Name + ID", how='left', validate="m:1", suffixes=(None, "_2B"))
    lineup_sims = lineup_sims.merge(salaries, left_on="3B", right_on="Name + ID", how='left', validate="m:1", suffixes=(None, "_3B"))
    lineup_sims = lineup_sims.merge(salaries, left_on="SS", right_on="Name + ID", how='left', validate="m:1", suffixes=(None, "_SS"))
    lineup_sims = lineup_sims.merge(salaries, left_on="OF", right_on="Name + ID", how='left', validate="m:1", suffixes=(None, "_OF"))
    lineup_sims = lineup_sims.merge(salaries, left_on="OF.1", right_on="Name + ID", how='left', validate="m:1", suffixes=(None, "_OF.1"))
    lineup_sims = lineup_sims.merge(salaries, left_on="OF.2", right_on="Name + ID", how='left', validate="m:1", suffixes=(None, "_OF.2"))

    # Add up player performances
    i=0
    # Where i is the number of simulations
    while i < sims:
        sim = "FP" + str(i)
        P1 = sim
        P2 = sim + "_P.1"
        C = sim + "_C"
        B1 = sim + "_1B"
        B2 = sim + "_2B"
        B3 = sim + "_3B"
        SS = sim + "_SS"
        OF1 = sim + "_OF"
        OF2 = sim + "_OF.1"
        OF3 = sim + "_OF.2"

        game = "Sim " + str(i)

        lineup_sims[game] = lineup_sims[P1] + lineup_sims[P2] + lineup_sims[C] + lineup_sims[B1] + lineup_sims[B2] + lineup_sims[B3] + lineup_sims[SS] + lineup_sims[OF1] + lineup_sims[OF2] + lineup_sims[OF3]

        i+=1

    # Delete excess variables
    lineup_sims.rename(columns={'FPPG':'AvgPointsPerGame'}, inplace=True)
    lineup_sims = lineup_sims.loc[:, ~lineup_sims.columns.str.contains('FP', case=False)]
    lineup_sims = lineup_sims.loc[:, ~lineup_sims.columns.str.contains('Name', case=False)]
    lineup_sims = lineup_sims.loc[:, ~lineup_sims.columns.str.contains('Order', case=False)]
    lineup_sims = lineup_sims.loc[:, ~lineup_sims.columns.str.contains('Exposure', case=False)]

    
    ### Merge on Ownership
    # Read in crosswalk
    crosswalk = pd.read_csv(os.path.join(baseball_path, "Utilities", "RotoWire Crosswalk.csv"))
    crosswalk = crosswalk[['name', 'rotowire_id']]
    
    # So many Luises Garcia.
    crosswalk.drop_duplicates(subset=['name'], keep='first', inplace=True)
    
    try:
        # Read in ownership
        ownership = pd.read_csv(os.path.join(baseball_path, "Projections - RotoWire", "Ownership_" + date + ".csv"))
        ownership = ownership[['id', 'ownership']]
        ownership['id'] = ownership['id'].astype('str')
        
        # Loop over position
        for pos in ['P', 'P.1', 'C', '1B', '2B', '3B', 'SS', 'OF', 'OF.1', 'OF.2']:
            # Remove (DraftKingsID)
            lineup_sims['temp'] = lineup_sims[pos].str.replace(r'\(\d+\)', '')
            lineup_sims['temp'] = lineup_sims['temp'].str.strip()

            # Merge to get ID
            lineup_sims = lineup_sims.merge(crosswalk, left_on='temp', right_on='name', how='left')
            lineup_sims['rotowire_id'] = lineup_sims['rotowire_id'].astype('str')
            # Merge to get ownership
            lineup_sims = lineup_sims.merge(ownership, left_on='rotowire_id', right_on='id', how='left')

            # Get rid of variables we don't need
            lineup_sims.drop(['temp', 'name', 'id', 'rotowire_id'], inplace=True, axis=1)

            # Rename ownership variable to be position-specific
            lineup_sims.rename(columns={'ownership':f'{pos}_ownership'}, inplace=True)

            # May be missing. This is common for Luis Garcia if we keep the wrong one.
            lineup_sims.fillna(5, inplace=True)
            
    except:
        print("No ownership for today")
    
    
    
    ### Calculate summary statistics
    column_list = [col for col in lineup_sims if col.startswith("Sim")]

    ### Points
    lineup_sims['P50'] = lineup_sims[column_list].median(axis=1)
    lineup_sims['P75'] = lineup_sims[column_list].quantile(.75, axis=1)
    lineup_sims['P90'] = lineup_sims[column_list].quantile(.90, axis=1)
    lineup_sims['P95'] = lineup_sims[column_list].quantile(.95, axis=1)
    lineup_sims['P99'] = lineup_sims[column_list].quantile(.99, axis=1)
    lineup_sims['P100'] = lineup_sims[column_list].max(axis=1)
    
    # Tail fatness
    lineup_sims['Tail'] = 0 
    for column in column_list:
        for i in range(len(lineup_sims)):
            if lineup_sims[column][i] >= lineup_sims['P95'][i]:
                lineup_sims['Tail'][i] = lineup_sims['Tail'][i] + lineup_sims[column][i]
    
    lineup_sims['Sim STD'] = lineup_sims[lineup_sims.columns[lineup_sims.columns.str.startswith('Sim')]].std(axis=1)
    
    # Standard deviations from mean 
    lineup_sims['Plus2'] = lineup_sims['AvgPointsPerGame'] + 2 * lineup_sims['Sim STD']
    lineup_sims['Plus3'] = lineup_sims['AvgPointsPerGame'] + 3 * lineup_sims['Sim STD']
    
    
    try:
        ### Ownership
        # Total
        lineup_sims['ownership'] = lineup_sims.filter(like='_ownership').sum(axis=1)
        # Pitcher ownership 
        lineup_sims['pitcher ownership'] = lineup_sims[['P_ownership', 'P.1_ownership']].sum(axis=1)
        # Batter ownership 
        lineup_sims['batter ownership'] = lineup_sims[['C_ownership', '1B_ownership', '2B_ownership', '3B_ownership', 'SS_ownership', 'OF_ownership', 'OF.1_ownership', 'OF.2_ownership']].sum(axis=1)
    except:
        pass
    
    
    
    # Save lineups with points
    lineup_sims.to_csv(os.path.join(baseball_path, "A9. Optimizer - 2. Decisions", "Lineups Ranked " + date + ".csv"))
    

### Uploads

In [5]:
def create_upload_file(date=todaysdate):
    # Read in DKSalaries for today
    dksalaries = pd.read_csv(os.path.join(baseball_path, "A7. Matchups - 1. Salaries", "DKSalaries_" + date + ".csv"))
    # Find Ohtani
    filtered_df = dksalaries[dksalaries['Name + ID'].str.contains('Shohei')]

    try:
        # Get the first value that contains "Shohei"
        first_shohei = filtered_df['Name + ID'].iloc[0]
        # Remove the space before the parenthesis (there's a slight disconnect here between the salaries and the lineups)
        first_shohei = first_shohei.replace(" (", "(")
    except:
        first_shohei = "None"
    
    
    # Read in lineup sims
    lineup_sims = pd.read_csv(os.path.join(baseball_path, "A9. Optimizer - 2. Decisions", "Lineups Ranked " + date + ".csv"))
    # Sort by Plus3 (ascending because DK will put the bottom lineups at the top)
    lineup_sims.sort_values(by=['Plus3'], ascending=True, inplace=True)
    # Keep just the players
    lineup_sims = lineup_sims[['P', 'P.1', 'C', '1B', '2B', '3B', 'SS', 'OF', 'OF.1', 'OF.2']]
    
    # Rename variables to appease DK's upload
    lineup_sims.rename(columns={'P.1':'P', 'OF.1':'OF', 'OF.2':'OF'}, inplace=True)
    # Replace any instance of Shohei Ohtani with his actual Name + ID (instead of the modified one I use)
    lineup_sims = lineup_sims.replace(to_replace='.*Shohei.*', value=first_shohei, regex=True)  
    
    lineup_sims.to_csv(os.path.join(baseball_path, "Uploads", "Upload " + date + ".csv"), index=False)

In [6]:
def email_upload_file(date=todaysdate):    
    message = f"""\
    Here are today's lineups."""

    sender_email = 'jamesgiles1993@gmail.com'
    receiver_email = 'jamesgiles1993@gmail.com'
    smtp_server = 'smtp.gmail.com'
    port = 465
    password = 'uepgnvemqxttdxbq'

    # Create a multipart message object
    msg = MIMEMultipart()
    msg['Subject'] = f'Lineups: {date}' 
    msg['From'] = sender_email
    msg['To'] = receiver_email

    # Attach the message to the email
    msg.attach(MIMEText(message, 'plain'))

    # Add the Excel file attachment
    attachment_path = os.path.join(baseball_path, "Uploads", "Upload " + date + ".csv")

    with open(attachment_path, 'rb') as attachment:
        part = MIMEBase('application', 'octet-stream')
        part.set_payload(attachment.read())
        encoders.encode_base64(part)
        part.add_header('Content-Disposition', f'attachment; filename="{attachment_path}"')
        msg.attach(part)

    # Create a secure SSL context
    context = ssl.create_default_context()

    # Send the email
    with smtplib.SMTP_SSL(smtp_server, port, context=context) as server:
        server.login(sender_email, password)
        server.sendmail(sender_email, receiver_email, msg.as_string())

### Run One

In [7]:
# Create one day's lineups
create_lineups(200, 1000, todaysdate)
choose_lineups(1000, todaysdate)
    
create_upload_file(todaysdate)
print("https://www.draftkings.com/lineup/upload")
email_upload_file(todaysdate)

 1. P       Justin Verlander              SP    NYM            NYY@NYM  23.131         8000.0$   
 2. P       Luis Castillo                 SP    SEA            MIA@SEA  25.248         9800.0$   
 3. C       Keibert Ruiz(5)               C     WAS            WAS@HOU  8.575          3500.0$   
 4. 1B      Joey Meneses(3)               1B    WAS            WAS@HOU  8.793          3400.0$   
 5. 2B      Marcus Semien(1)              2B    TEX            LAA@TEX  9.595          6100.0$   
 6. 3B      Jeimer Candelario(2)          3B    WAS            WAS@HOU  9.086          3900.0$   
 7. SS      Ezequiel Duran(3)             OF/SS TEX            LAA@TEX  8.653          3400.0$   
 8. OF      George Springer(1)            OF    TOR            TOR@BAL  9.418          4900.0$   
 9. OF      Lane Thomas(1)                OF    WAS            WAS@HOU  8.847          4200.0$   
10. OF      Stone Garrett(4)              OF    WAS            WAS@HOU  8.541          2800.0$   

Fantasy Points 119.

### Run All

In [8]:
# # Create a second function that won't break
# def create_lineups2(lineups=200, sims=1000, date=todaysdate):
#     try:
#         create_lineups(lineups, sims, date)
#         choose_lineups(sims, date)
#     except:
#         pass
    
# # Identify all days for which there are player sims
# days = []
# for filename in os.listdir(r"C:\Users\james\Documents\MLB\Data\A8. Sims - 1. Players"): 
#     # 2023 
#     if filename.endswith(".csv") and filename.startswith("Player_Sims_2022"):
#         # Pull out date
#         date = filename[12:20]
#         days.append(date)


# # Run all in parallel
# Parallel(n_jobs=-2, verbose=5)(delayed(create_lineups2)(200, 1000, day) for day in days)

In [9]:
print("Code was last run on: {} at {}.".format(datetime.date.today(), datetime.datetime.now().strftime("%H:%M:%S")))

Code was last run on: 2023-06-14 at 18:43:34.
