# 7. Sims

This simulates games <br>
Currently can only do one date at a time. <br>

# Settings

In [36]:
# Number of Simulations
num_sims = 1000
# Run just the late list
late = False

# Imports

In [37]:
import random
import numpy as np
import pandas as pd
import os
import xlrd
import math
import glob
import datetime
from datetime import date
import time 
import pickle

from functools import partial
from joblib import Parallel, delayed

import import_ipynb
from Classes import *
from simulation_functions_py import *

# import warnings
# warnings.simplefilter(action="ignore")

In [38]:
# Date and Files
todaysdate = date.today()
todaysdate = str(todaysdate)
todaysdate = todaysdate.replace("-", "")

# 20220917 is the first date for which current NN model works, 20220920 is weird because two slates
todaysdate = "20220408"

matchup_folder = "Matchups" + todaysdate

model_path = r"C:\Users\james\Documents\MLB\Code\Models"
baseball_path = r"C:\Users\james\Documents\MLB\Data"
matchup_path = os.path.join(baseball_path, "New_Matchups", matchup_folder)
download_path = r"C:\Users\james\Downloads"
print(matchup_path)

C:\Users\james\Documents\MLB\Data\New_Matchups\Matchups20220408


In [39]:
# Lists
# If you just want to run certain matchups
late_list = ['ATL_PHI 10142022 0437PM ET.xlsx']

# List of openers
opener_list = ['Yonny Chirinos', 'Mauricio Llovera', 'Jesse Chavez', 'Matt Bush', 'Gabe Speier', 'Bryan Baker',
               'Dillon Peters', 'Wil Crowe', 'Jalen Beeks', 'Davis Martin', 'Chi Chi Gonzalez', 'Tommy Henry', 
               'Trevor Richards', 'Bryse Wilson', 'Steven Wilson', 'Brusdar Graterol']
# Maybe make openers based on cost (<$5000 maybe)

In [40]:
# Load in neural network stat model
filename = os.path.join(model_path, "nn_pa_200200200r.sav")          
pred = pickle.load(open(filename, 'rb'))

In [41]:
# Team Map
team_map = pd.read_csv(os.path.join(baseball_path, "Utilities", "Team Map.csv"))[['BBREFTEAM', 'FANGRAPHSTEAM', 'VENUE_ID']]
team_map = team_map.set_index('BBREFTEAM')

In [42]:
# Read in daily weather
filename = "Daily_Weather_" + todaysdate + ".xlsx"
weather_df = pd.read_excel(os.path.join(baseball_path, "Daily Weather2", filename)) # just switched back to original
# Set venue id as index
weather_df = weather_df.set_index('venue_id')

In [43]:
# Read in double play odds 
dp_rates = pd.read_csv(os.path.join(baseball_path, "Statcast Data", "double_play_rates.csv"))
dp_rates.set_index(["runner_1b", "runner_2b", "runner_3b"], inplace=True)

# Read in double play base of outs odds
dp_base_rates = pd.read_csv(os.path.join(baseball_path, "Statcast Data", "double_play_base_rates.csv"))
dp_base_rates.set_index(["runner_1b", "runner_2b", "runner_3b"], inplace=True)

In [44]:
# Read in base path advances 
advances = pd.read_csv(os.path.join(baseball_path, "Statcast Data", "advance_rates.csv"))
advances = advances.set_index("dp").to_dict()

In [45]:
# Pulls
filename = os.path.join(model_path, "nn_pull.sav")

# Load in model
pull = pickle.load(open(filename, 'rb'))

In [46]:
# Should move this out of this code and keep it as only functions
# Load in today's DK salaries
dk_name = "DKSalaries_" + todaysdate + ".csv"

# Clean DK salaries for merge
DKSalaries = pd.read_csv(os.path.join(baseball_path, "Salaries Scraped", dk_name))

# Change Ohtani's code. He won't match otherwise.
DKSalaries['ID'] = np.where(DKSalaries['Name'] == "Shohei Ohtani", 134045, DKSalaries['ID'])

# This creates the folder where game sims will end up 
directory = "Matchups" + todaysdate

matchup_list = os.listdir(matchup_path)  
try:
    os.mkdir(os.path.join(baseball_path, "Player Sims", directory))
except:
    pass

print(directory)

Matchups20220408


In [47]:
# Choose late list if selected
if late == True:
    matchup_list = late_list
else:
    pass

# Simulations

In [48]:
# This merges sim results with DK Salaries then adds average and exposure
def create_matchup_df(matchup, DKSalaries=DKSalaries, matchup_path=matchup_path, 
            team_map=team_map, weather_df=weather_df, pull=pull, pred=pred, dp_rates=dp_rates, dp_base_rates=dp_base_rates,
            advances=advances, opener_list=opener_list, num_sims=num_sims):
    
    all_scores_list=[]
    game_template = create_game(matchup, matchup_path, team_map, weather_df)
    output = Parallel(n_jobs=-1, verbose=5)(delayed(sim_matchup)(matchup=matchup, matchup_path=matchup_path, team_map=team_map, weather_df=weather_df, pull=pull, pred=pred, 
                                dp_rates=dp_rates, dp_base_rates=dp_base_rates, advances=advances, opener_list=opener_list, 
                                all_scores_list=all_scores_list, game_template=game_template) for sims in range(num_sims))
    
    player_sims = pd.concat(output, axis=1)
    
    cols=[]
    count=0
    for column in player_sims.columns:
        if column == "FP":
            cols.append(f'FP{count}')
            count+=1
            continue
        cols.append(column)
    player_sims.columns=cols
    player_sims = player_sims.loc[:,~player_sims.columns.duplicated()].copy() 

    
    # Merge all scores onto players
    player_sims = DKSalaries.merge(player_sims, on='ID', how='inner')
    return player_sims

In [49]:
%%time
# broken = ['HOU', 'CHW', 'MIN', "COL"]
broken = []
matchup_list = [x for x in matchup_list if not any(bad in x for bad in broken)]
print(matchup_list)
# This loops over matchups and creates matchups and then exports them to excel
for matchup in matchup_list:
    print(matchup)
    
    player_sims = create_matchup_df(matchup, DKSalaries=DKSalaries, matchup_path=matchup_path, 
            team_map=team_map, weather_df=weather_df, pull=pull, pred=pred, dp_rates=dp_rates, dp_base_rates=dp_base_rates,
            advances=advances, opener_list=opener_list, num_sims=num_sims)
    
    # Fill missings with 0s
    player_sims.fillna(0, inplace=True)

    player_sims.rename(columns={'Name_x': 'Name', 'order':'Roster Order'}, inplace=True)
    try:
        player_sims.drop(columns=['Unnamed: 0', 'Name_y'], inplace=True)
    except:
        player_sims.drop(columns=['Name_y'], inplace=True)
    player_sims['Roster Order'] = player_sims['Roster Order'].astype(int)
    
    # Set baseline export constraints
    player_sims['Min Exposure'] = 0.0
    player_sims['Max Exposure'] = 0.5
    
    points_cols = [col for col in player_sims.columns if 'FP' in col]
    player_sims['AvgPointsPerGame'] = player_sims[points_cols].mean(axis=1)
    
    player_sims.to_excel(os.path.join(baseball_path, "Player Sims", directory, matchup))

['CIN_ATL.xlsx', 'HOU_LAA.xlsx', 'NYM_WSN.xlsx', 'SDP_ARI.xlsx', 'TEX_TOR.xlsx']
CIN_ATL.xlsx


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 130 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done 256 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 418 tasks      | elapsed:    8.6s
[Parallel(n_jobs=-1)]: Done 616 tasks      | elapsed:   10.9s
[Parallel(n_jobs=-1)]: Done 850 tasks      | elapsed:   13.8s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   15.7s finished


HOU_LAA.xlsx


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 228 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 480 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 804 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    8.6s finished


NYM_WSN.xlsx


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 228 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 480 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 804 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    8.3s finished


SDP_ARI.xlsx


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


StopIteration: 

In [50]:
# This appends all games together
# Create path to Player Sims folder
path = os.path.join(baseball_path, "Player Sims", directory)
# Find all Excel files
player_sim_files = glob.glob(os.path.join(path , "*.xlsx"))
                  
# Append all together
matchup_sim_list = []
for filename in player_sim_files:
    df = pd.read_excel(filename, index_col=None, header=0)
    matchup_sim_list.append(df)

all_matchup_sims = pd.concat(matchup_sim_list, axis=0, ignore_index=True)

# Set file name
sim_file = "Player_Sims_" + todaysdate + ".csv"

# Sort
all_matchup_sims.sort_values(['AvgPointsPerGame'], ascending=False, inplace=True)
# Clean name
all_matchup_sims = all_matchup_sims.loc[:,~all_matchup_sims.columns.str.startswith('Unnamed')]

# Export to CSV
all_matchup_sims.to_csv(os.path.join(baseball_path, "Player Sims", sim_file))

In [51]:
print("Code was last run on: " + str(datetime.date.today()))

Code was last run on: 2023-03-03


# Need to fit weather somehow and come up with a good system for dealing with historic. Can probably just switch folders
Daily Weather (new going forward)
Daily Weather2 (past weather)
should be perfectly comparable

To do:
add past fangraphs projections (need account)
fix dk merge (can't merge on anyhting other than name, but there are some issues like with Kike). Maybe just use last name but could be issues with duplicates

Clean draftkings names thorough, with manual adjustments?

Issue:
    Players making their debut will have a MILB fangraphs playerid in the day-of fangraphs projections and a MLB fangraphs playerid from Chadwick. 
    This causes them to fail to inner merge rosters with fangraphs projections, leaving them out entirely. 
    This can be fixed by manually adjusting fangraphs playerids in the projections. Just kind of annoying. 

In [52]:
### Maybe automate filling in missing batting order position