# 10. Sims
Source: <br>
1. '6. Weather 
2. '7. Matchups
3. Statcast Data for base rates
4. Models<br>

Description: This simulates games. Output includes player fantasy points and game scores <br>

### Imports 

In [1]:
# Read in models, double play rates, double play base rates (where the outs are), advance rates (who advances)
def sim_imports(binary_model, out_model, safe_model, pull_model):
    # PA models
    # Load in neural network stat models
    # Binary (out vs. safe) model
    filename = os.path.join(model_path, binary_model) 
    model_binary = pickle.load(open(filename, 'rb'))

    # Out model
    filename = os.path.join(model_path, out_model)          
    model_outs = pickle.load(open(filename, 'rb'))

    # Safe model
    filename = os.path.join(model_path, safe_model) 
    model_safe = pickle.load(open(filename, 'rb'))
    
    
    # Pull model
    filename = os.path.join(model_path, pull_model)
    pull = pickle.load(open(filename, 'rb'))
    
    
    # Double play rates
    dp_rates = pd.read_csv(os.path.join(baseball_path, "Statcast Data", "double_play_rates.csv"))
    dp_rates.set_index(["runner_1b", "runner_2b", "runner_3b"], inplace=True)

    # Double play base rates
    dp_base_rates = pd.read_csv(os.path.join(baseball_path, "Statcast Data", "double_play_base_rates.csv"))
    dp_base_rates.set_index(["runner_1b", "runner_2b", "runner_3b"], inplace=True)
    
    # Advances
    advances = pd.read_csv(os.path.join(baseball_path, "Statcast Data", "advance_rates.csv"))
    advances = advances.set_index("dp").to_dict()
    
    
    # Late list (include)
    late_list = []

    # Broken list (exclude)
    broken_list = []
    
    return model_binary, model_outs, model_safe, pull, dp_rates, dp_base_rates, advances, late_list, broken_list

In [8]:
# Read in daily weather
def create_weather_df(date):
    # Read in weather file
    weather_df = pd.read_excel(os.path.join(baseball_path, "8. Weather", "A. Swish Analytics", f'Daily_Weather_{date}.xlsx')) 
    # Set venue id as index
    weather_df = weather_df.set_index('venue_id')
    
    return weather_df

In [9]:
# Read in salaries and create list of matchups
def create_matchup_list(contestKey, date, late_list, late=False):
    # Clean DK salaries for merge
    DKSalaries = pd.read_csv(os.path.join(baseball_path, "2. Contests", "C. Salaries", f'Salaries {contestKey}.csv'), encoding='iso-8859-1')

    # Change Ohtani's code. He won't match otherwise.
    DKSalaries['ID'] = np.where(DKSalaries['Name'] == "Shohei Ohtani", 134045, DKSalaries['ID'])

    # This creates the folder where game sims will end up 
    directory = "Matchups" + str(contestKey)
    
    # Matchups
    matchup_path = os.path.join(baseball_path, "9. Matchups", "B. Matchups", f'Matchups{contestKey}')
    matchup_list = os.listdir(matchup_path)
    
    # If you're not just running the late ones, make a new directory
    if late == False:
        try:
            os.mkdir(os.path.join(baseball_path, "10. Sims", "A. Players", directory))
        except:
            pass
    
    
    # If you're not just running the late ones, make a new directory
    if late == False:
        try:
            os.mkdir(os.path.join(baseball_path, "10. Sims", "B. Scores", directory))
        except:
            pass
    
    
    # Set matchup list to late list
    if late == True:
        matchup_list = late_list
        
    # Identify openers by low salary
    opener_list = list(DKSalaries[((DKSalaries['Roster Position'] == "SP") | (DKSalaries['Roster Position'] == "RP")) & (DKSalaries['Salary'] < 5000)]['Name'].unique())
    
    return DKSalaries, matchup_list, matchup_path, opener_list

# Simulations

In [10]:
# This merges sim results with DK Salaries then adds average and exposure
def create_matchup_df(matchup, DKSalaries, weather_df, date, contestKey, opener_list,
            team_map, pull, model_binary, model_outs, model_safe, dp_rates, dp_base_rates,
            advances, num_sims):
    
    # Matchup folder
    matchup_path = os.path.join(baseball_path, "9. Matchups", "B. Matchups", f'Matchups{contestKey}')
    
    # List of game scores
    all_scores_list = []
    
    # Create game template 
    game_template = create_game(matchup, matchup_path, team_map, weather_df)
    
    # Run game simulations
    output = Parallel(n_jobs=-1, verbose=0)(delayed(sim_matchup)(matchup=matchup, matchup_path=matchup_path, team_map=team_map, weather_df=weather_df, pull=pull, 
                                model_binary=model_binary, model_outs=model_outs, model_safe=model_safe, 
                                dp_rates=dp_rates, dp_base_rates=dp_base_rates, advances=advances, opener_list=opener_list, 
                                all_scores_list=all_scores_list, game_template=game_template) for sims in range(num_sims))
    
    
    # Player sims
    player_output = []
    # Away score sims
    away_score = []
    # Home score sims
    home_score = []
    for game_sim in output:
        player_output.append(game_sim[0])
        away_score.append(game_sim[1])
        home_score.append(game_sim[2])
    
    # Create score dataframe
    score_df = pd.DataFrame(list(zip(away_score, home_score)), columns=['Away', 'Home'])
    
    # Append all player sims together
    player_sims = pd.concat(player_output, axis=1)
    
    # Average statistics
    column_names = ['B1', 'B2', 'B3', 'HR', 'BB', 'HBP', 'H', 'SO', 'CS', 'SB', 'R', 'RBI', 
                    'ER', 'winning', 'W', 'CG', 'CGSO', 'NH', 'FP', 'PA', 'faced', 'starter', 'OUT']
    for col in column_names:
        if col in player_sims.columns:
            avg_col_name = col + '_avg'
            player_sims[avg_col_name] = player_sims[col].mean(axis=1)

    # Drop everything but FP
    column_names.remove('FP')            
    player_sims.drop(columns=column_names, inplace=True)
         
    # Append all FP projections together
    cols = []
    count = 0
    for column in player_sims.columns:
        if column == "FP":
            cols.append(f'FP{count}')
            count += 1
            continue
        cols.append(column)
        
    player_sims.columns = cols
    player_sims = player_sims.loc[:,~player_sims.columns.duplicated()].copy() 
    
    # Merge all scores onto players
    player_sims = DKSalaries.merge(player_sims, on='ID', how='inner')
       
    return player_sims, score_df

In [11]:
# Run all simulations in a slate
def run_all(team_map, pull, model_binary, model_outs, model_safe, dp_rates, dp_base_rates,
            advances, num_sims, broken_list, date, contestKey, late_list, late):
    
    directory = "Matchups" + str(contestKey)
    
    # Create path to Player Sims folder
    path = os.path.join(baseball_path, "10. Sims", "A. Players", directory)
    # Find all Excel files
    player_sim_files = glob.glob(os.path.join(path , "*.xlsx"))
    # Delete all if we're running the whole thing from scratch 
    # We'll just write over the ones we want to replace if we're using a late_list
    if late == False:
        for f in player_sim_files:
            os.remove(f)
        
    # Create weather data
    weather_df = create_weather_df(date)
    # Create matchup information
    DKSalaries, matchup_list, matchup_path, opener_list = create_matchup_list(contestKey, date, late_list, late)
    
    # Define a custom sorting key function that extracts the numbers from characters 17-20 in each string (the game time)
    # Note: AM games can mess this up a bit
    def sort_key(string):
        return int(string[17:21])

    # Attempt sort
    try:
        # Sort matchups on game time
        matchup_list.sort(key=sort_key)
    except:
        pass
        
    print(matchup_list)
    # This loops over matchups and creates matchups and then exports them to excel
    for matchup in matchup_list:
        if matchup.endswith(".xlsx"):
            print(matchup)

            # Create matchup data (sims)
            player_sims, score_df = create_matchup_df(matchup, DKSalaries, weather_df, date, contestKey, opener_list,
                    team_map=team_map, pull=pull, model_binary=model_binary, model_outs=model_outs, model_safe=model_safe, dp_rates=dp_rates, dp_base_rates=dp_base_rates,
                    advances=advances, num_sims=num_sims)

            # Fill missings with 0s
            player_sims.fillna(0, inplace=True)

            # Rename for compatibility with optimizer
            player_sims.rename(columns={'Name_x': 'Name', 'batting_order':'Roster Order'}, inplace=True)
            try:
                player_sims.drop(columns=['Unnamed: 0', 'Name_y'], inplace=True)
            except:
                player_sims.drop(columns=['Name_y'], inplace=True)
            # Convert to numeric
            player_sims['Roster Order'] = player_sims['Roster Order'].astype(int)

            # Set baseline export constraints
            player_sims['Min Exposure'] = 0.0
            # player_sims['Max Exposure'] = np.where(player_sims['Roster Position'] == "P", 1, 0.5)
            player_sims['Max Exposure'] = np.where(player_sims['Roster Position'] == "P", 0.5, 0.5)

            # Average score
            points_cols = [col for col in player_sims.columns if 'FP' in col]
            player_sims['AvgPointsPerGame'] = player_sims[points_cols].mean(axis=1)

            # Sims
            player_sims.to_excel(os.path.join(baseball_path, "10. Sims", "A. Players", directory, matchup))
            
            # Game Scores
            score_df.to_excel(os.path.join(baseball_path, "10. Sims", "B. Scores", directory, matchup))
                                         
            # Home wins
            score_df['home_win'] = np.where(score_df['Away'] < score_df['Home'], 1, 0)
            # Total runs
            score_df['total'] = score_df['Away'] + score_df['Home']
            
            # Print game score information
            print("Home Win Rate: " + str(score_df['home_win'].mean()))
            print("Mean Away Score: " + str(score_df['Away'].mean()))
            print("Mean Home Score: " + str(score_df['Home'].mean()))
            print("Mean Total: " + str(score_df['total'].mean()))
            print("Median Total: " + str(score_df['total'].median()) + "\n")

In [12]:
# This appends all games together
def append_all(date, contestKey):  
    # Find all Excel files
    player_sim_files = glob.glob(os.path.join(baseball_path, "10. Sims", "A. Players", "Matchups" + str(contestKey), "*.xlsx"))

    # Append all together
    matchup_sim_list = []
    for filename in player_sim_files:
        df = pd.read_excel(filename, index_col=None, header=0)
        matchup_sim_list.append(df)

    all_matchup_sims = pd.concat(matchup_sim_list, axis=0, ignore_index=True)

    # Set file name
    sim_file = "Player Sims " + str(contestKey) + ".csv"

    # Sort
    all_matchup_sims.sort_values(['AvgPointsPerGame'], ascending=False, inplace=True)
    # Clean name
    all_matchup_sims = all_matchup_sims.loc[:,~all_matchup_sims.columns.str.startswith('Unnamed')]

    # Export to CSV
    all_matchup_sims.to_csv(os.path.join(baseball_path, "10. Sims", "A. Players", sim_file), encoding='iso-8859-1')

### Run

In [14]:
# Run simulations in the dashboard
def run_sims(team_map, pull, model_binary, model_outs, model_safe, dp_rates, dp_base_rates, advances, broken_list, 
             date, contestKey, last_list, num_sims, late, historic):
    # Set index
    team_map.set_index('BBREFTEAM', inplace=True)
    
    # Non-historic (day-to-day) run simulations
    if historic == False:
        # Run all sims
        run_all(team_map=team_map, pull=pull, model_binary=model_binary, model_outs=model_outs, model_safe=model_safe, 
                dp_rates=dp_rates, dp_base_rates=dp_base_rates, advances=advances, broken_list=broken_list, 
                date=date, contestKey=contestKey, late_list=late_list, num_sims=num_sims, late=late)
        # Append all player sims together
        append_all(date, contestKey)
        
    # Historic simulations
    else:
        # Loop over contest history
        for i in range(len(history)):
            print("Contest: {}, Date: {}".format(history['contestKey'][i], history['date'][i]))
            try:
                # Run all sims
                run_all(team_map=team_map,pull=pull, model_binary=model_binary, model_outs=model_outs, model_safe=model_safe, 
                        dp_rates=dp_rates, dp_base_rates=dp_base_rates, advances=advances, broken_list=broken_list, 
                        date=history['date'][i], contestKey=history['contestKey'][i], late_list=[], num_sims=num_sims, late=late)
                # Append all player sims together
                append_all(date=history['date'][i], contestKey=history['contestKey'][i])
            except:
                print("Didn't work")
                
    # Reset index (gets team map back to usable form for use in dashboard)
    team_map.reset_index(inplace=True)