# A8. Sims
Source: A6. Weather, A7. Matchups, Statcast Data for base rates, Models<br>

Description: This simulates games. Output includes player fantasy points and game scores <br.

To do: <br>
Output more stats for players
Create betters models - maybe don't impute stats, just use projected stats from FG

# Settings

In [1]:
# # Number of Simulations
# num_sims = 1000
# # Run just the late list
# late = False

# Imports

In [2]:
import random
import numpy as np
import pandas as pd
import os
import xlrd
import math
import glob
import datetime
from datetime import date
import time 
import pickle

from functools import partial
from joblib import Parallel, delayed

import import_ipynb
from Classes import *
from simulation_functions_py import *

model_path = r"C:\Users\james\Documents\MLB\Code\Models"
baseball_path = r"C:\Users\james\Documents\MLB\Data"
download_path = r"C:\Users\james\Downloads"

# import warnings
# warnings.simplefilter(action="ignore")

importing Jupyter notebook from Classes.ipynb
Code was last run on: 2023-04-03


In [3]:
# Date and Files
todaysdate = date.today()
todaysdate = str(todaysdate)
todaysdate = todaysdate.replace("-", "")
# todaysdate = "20230330"

In [4]:
# Load in neural network stat model
filename = os.path.join(model_path, "nn_pa_200200200r.sav")          
pred = pickle.load(open(filename, 'rb'))

In [5]:
# Team Map
team_map = pd.read_csv(os.path.join(baseball_path, "Utilities", "Team Map.csv"))[['BBREFTEAM', 'FANGRAPHSTEAM', 'VENUE_ID']]
team_map = team_map.set_index('BBREFTEAM')

In [6]:
# Lists
# If you just want to run certain matchups
late_list = ['SFG_NYY 03302023 0105PM ET.xlsx']


# List o fgames that don't work
broken_list = []

In [7]:
# Read in double play odds 
dp_rates = pd.read_csv(os.path.join(baseball_path, "Statcast Data", "double_play_rates.csv"))
dp_rates.set_index(["runner_1b", "runner_2b", "runner_3b"], inplace=True)

# Read in double play base of outs odds
dp_base_rates = pd.read_csv(os.path.join(baseball_path, "Statcast Data", "double_play_base_rates.csv"))
dp_base_rates.set_index(["runner_1b", "runner_2b", "runner_3b"], inplace=True)

In [8]:
# Read in base path advances 
advances = pd.read_csv(os.path.join(baseball_path, "Statcast Data", "advance_rates.csv"))
advances = advances.set_index("dp").to_dict()

In [9]:
# Pulls
filename = os.path.join(model_path, "nn_pull.sav")

# Load in model
pull = pickle.load(open(filename, 'rb'))

In [10]:
# Read in daily weather
def create_weather_df(date=date):
    filename = "Daily_Weather_" + str(date) + ".xlsx"
    weather_df = pd.read_excel(os.path.join(baseball_path, "A6. Weather", filename)) # just switched back to original
    # Set venue id as index
    weather_df = weather_df.set_index('venue_id')
    
    return weather_df

In [11]:
# weather_df = create_weather_df("20220521")

In [12]:
def create_matchup_list(date=date, late_list=late_list, late=False):
    # Should move this out of this code and keep it as only functions
    # Load in today's DK salaries
    dk_name = "DKSalaries_" + str(date) + ".csv"

    # Clean DK salaries for merge
    DKSalaries = pd.read_csv(os.path.join(baseball_path, "A7. Matchups - 1. Salaries", dk_name))

    # Change Ohtani's code. He won't match otherwise.
    DKSalaries['ID'] = np.where(DKSalaries['Name'] == "Shohei Ohtani", 134045, DKSalaries['ID'])

    # This creates the folder where game sims will end up 
    directory = "Matchups" + str(date)
    
    matchup_folder = "Matchups" + str(date)
    matchup_path = os.path.join(baseball_path, "A7. Matchups - 2. Matchups", matchup_folder)
    
    matchup_list = os.listdir(matchup_path)
    
    # If you're not just running the late ones, make a new directory
    if late == False:
        try:
            os.mkdir(os.path.join(baseball_path, "A8. Sims - 1. Players", directory))
        except:
            pass
    # I separated these when I was adding scores separately (having them together would break if A8. Sims existed). Unnecessary going forward.
    if late == False:
        try:
            os.mkdir(os.path.join(baseball_path, "A8. Sims - 2. Scores", directory))
        except:
            pass
    
    if late == True:
        matchup_list = late_list
        
    opener_list = list(DKSalaries[(DKSalaries['Roster Position'] == "SP") & (DKSalaries['Salary'] < 5000)]['Name'].unique())
    print(opener_list)
    
    return DKSalaries, matchup_list, matchup_path, opener_list

# Simulations

In [13]:
# This merges sim results with DK Salaries then adds average and exposure
def create_matchup_df(matchup, DKSalaries, weather_df, date, opener_list,
            team_map=team_map, pull=pull, pred=pred, dp_rates=dp_rates, dp_base_rates=dp_base_rates,
            advances=advances, num_sims=num_sims):
    
    matchup_folder = "Matchups" + str(date)
    matchup_path = os.path.join(baseball_path, "A7. Matchups - 2. Matchups", matchup_folder)
    
    all_scores_list=[]
    game_template = create_game(matchup, matchup_path, team_map, weather_df)
    output = Parallel(n_jobs=-1, verbose=0)(delayed(sim_matchup)(matchup=matchup, matchup_path=matchup_path, team_map=team_map, weather_df=weather_df, pull=pull, pred=pred, 
                                dp_rates=dp_rates, dp_base_rates=dp_base_rates, advances=advances, opener_list=opener_list, 
                                all_scores_list=all_scores_list, game_template=game_template) for sims in range(num_sims))
    
    player_output = []
    away_score = []
    home_score = []
    for game_sim in output:
        player_output.append(game_sim[0])
        away_score.append(game_sim[1])
        home_score.append(game_sim[2])
    
    
    
    player_sims = pd.concat(player_output, axis=1)
    
    
    score_df = pd.DataFrame(list(zip(away_score, home_score)), columns=['Away', 'Home'])

    
    cols=[]
    count=0
    for column in player_sims.columns:
        if column == "FP":
            cols.append(f'FP{count}')
            count+=1
            continue
        cols.append(column)
    player_sims.columns=cols
    player_sims = player_sims.loc[:,~player_sims.columns.duplicated()].copy() 

    
    # Merge all scores onto players
    player_sims = DKSalaries.merge(player_sims, on='ID', how='inner')
    
    return player_sims, score_df

In [14]:
def run_all(team_map=team_map, pull=pull, pred=pred, dp_rates=dp_rates, dp_base_rates=dp_base_rates,
            advances=advances, num_sims=num_sims, broken_list=broken_list, date=date, late_list=late_list, late=False):
    
    directory = "Matchups" + str(date)
    
    # Create path to Player Sims folder
    path = os.path.join(baseball_path, "A8. Sims - 1. Players", directory)
    # Find all Excel files
    player_sim_files = glob.glob(os.path.join(path , "*.xlsx"))
    # Delete all if we're running the whole thing from scratch 
    # We'll just write over the ones we want to replace if we're using a late_list
    if late == False:
        for f in player_sim_files:
            os.remove(f)
        
    
    weather_df = create_weather_df(date)
    DKSalaries, matchup_list, matchup_path, opener_list = create_matchup_list(date, late_list, late)
    print(matchup_list)
    # This loops over matchups and creates matchups and then exports them to excel
    for matchup in matchup_list:
        if matchup.endswith(".xlsx"):
            print(matchup)

            player_sims, score_df = create_matchup_df(matchup, DKSalaries, weather_df, date, opener_list,
                    team_map=team_map, pull=pull, pred=pred, dp_rates=dp_rates, dp_base_rates=dp_base_rates,
                    advances=advances, num_sims=num_sims)

            # Fill missings with 0s
            player_sims.fillna(0, inplace=True)

            player_sims.rename(columns={'Name_x': 'Name', 'order':'Roster Order'}, inplace=True)
            try:
                player_sims.drop(columns=['Unnamed: 0', 'Name_y'], inplace=True)
            except:
                player_sims.drop(columns=['Name_y'], inplace=True)
            player_sims['Roster Order'] = player_sims['Roster Order'].astype(int)

            # Set baseline export constraints
            player_sims['Min Exposure'] = 0.0
            player_sims['Max Exposure'] = 0.5

            points_cols = [col for col in player_sims.columns if 'FP' in col]
            player_sims['AvgPointsPerGame'] = player_sims[points_cols].mean(axis=1)

            directory = "Matchups" + str(date)
            player_sims.to_excel(os.path.join(baseball_path, "A8. Sims - 1. Players", directory, matchup))
            
            # Game Scores
            score_df.to_excel(os.path.join(baseball_path, "A8. Sims - 2. Scores", directory, matchup))
                                         
                                         
            score_df['home_win'] = np.where(score_df['Away'] < score_df['Home'], 1, 0)
            score_df['total'] = score_df['Away'] + score_df['Home']
            
            print("Home Win Rate: " + str(score_df['home_win'].mean()))
            print("Median Away Score: " + str(score_df['Away'].median()))
            print("Median Home Score: " + str(score_df['Home'].median()))
            print("Median Total: " + str(score_df['total'].median()) + "\n")
            
            # Export later

In [15]:
# This appends all games together
def append_all(date=date):
    directory = "Matchups" + str(date)
    
    # Create path to Player Sims folder
    path = os.path.join(baseball_path, "A8. Sims - 1. Players", directory)
    # Find all Excel files
    player_sim_files = glob.glob(os.path.join(path , "*.xlsx"))

    # Append all together
    matchup_sim_list = []
    for filename in player_sim_files:
        df = pd.read_excel(filename, index_col=None, header=0)
        matchup_sim_list.append(df)

    all_matchup_sims = pd.concat(matchup_sim_list, axis=0, ignore_index=True)

    # Set file name
    sim_file = "Player_Sims_" + date + ".csv"

    # Sort
    all_matchup_sims.sort_values(['AvgPointsPerGame'], ascending=False, inplace=True)
    # Clean name
    all_matchup_sims = all_matchup_sims.loc[:,~all_matchup_sims.columns.str.startswith('Unnamed')]

    # Export to CSV
    all_matchup_sims.to_csv(os.path.join(baseball_path, "A8. Sims - 1. Players", sim_file))

In [16]:
datestr = todaysdate
run_all(date=datestr, late_list=['MIL_CHC 04022023 0220PM ET.xlsx'], late=False)


[]
['ARI_SDP 04032023 0940PM ET.xlsx', 'ATL_STL 04032023 0745PM ET.xlsx', 'BAL_TEX 04032023 0805PM ET.xlsx', 'CLE_OAK 04032023 0940PM ET.xlsx', 'COL_LAD 04032023 1010PM ET.xlsx', 'DET_HOU 04032023 0810PM ET.xlsx', 'LAA_SEA 04032023 0940PM ET.xlsx', 'PHI_NYY 04032023 0705PM ET.xlsx', 'PIT_BOS 04032023 0710PM ET.xlsx', 'TBR_WSN 04032023 0705PM ET.xlsx', 'TOR_KCR 04032023 0740PM ET.xlsx']
ARI_SDP 04032023 0940PM ET.xlsx
Home Win Rate: 0.502
Median Away Score: 4.0
Median Home Score: 4.0
Median Total: 8.0

ATL_STL 04032023 0745PM ET.xlsx
Home Win Rate: 0.459
Median Away Score: 4.0
Median Home Score: 3.0
Median Total: 8.0

BAL_TEX 04032023 0805PM ET.xlsx
Home Win Rate: 0.573
Median Away Score: 3.0
Median Home Score: 4.0
Median Total: 8.0

CLE_OAK 04032023 0940PM ET.xlsx
Home Win Rate: 0.581
Median Away Score: 3.0
Median Home Score: 4.0
Median Total: 7.0

COL_LAD 04032023 1010PM ET.xlsx
Home Win Rate: 0.493
Median Away Score: 3.0
Median Home Score: 3.0
Median Total: 7.0

DET_HOU 04032023 0810

In [17]:
append_all(datestr)

In [18]:
print("Code was last run on: {} at {}.".format(datetime.date.today(), datetime.datetime.now().strftime("%H:%M:%S")))

Code was last run on: 2023-04-03 at 09:08:21.


In [19]:
# %time
# for filename in os.listdir(r"C:\Users\james\Documents\MLB\Data\A7. Matchups - 1. Salaries"):
#     datestr = filename[11:19]
#     print(datestr)
#     try:
#         DKSalaries, matchup_list, matchup_path, opener_list = create_matchup_list(datestr)
#         print(matchup_list)
#         run_all(date=datestr)
#         append_all(datestr)
#     except:
#         print("Didn't work")

In [20]:
# Manual changes to matchups to ensure pitcher at each leverage level.
# 7/10: TOR_SEA
# 7/31: MIN_SDP
# 8/7: BOS_KCR
# 8/11: CHC_CIN - at Field of Dreams, changed to Cincinnati
# 8/18: Empty list2

In [21]:
# for datestr in ["20220819"]:
#     DKSalaries, matchup_list, matchup_path = create_matchup_list(datestr)
#     print(matchup_list)
#     run_all(date=datestr)
#     append_all(datestr)