In [1]:
import pandas as pd
import numpy as np

In [2]:
path_to_players = "../simulated-data/league_players.csv"
players = pd.read_csv(path_to_players)

In [3]:
def game_day_performance(players_df, stat):
    gdp = np.array(
        [
            np.random.normal(
                loc=players_df.loc[i][f"{stat}_mean"],
                scale=players_df.loc[i][f"{stat}_std"]
            ) for i in players_df.index.to_list()
        ]
    )
    gdp[gdp < 0] = 0
    return gdp

def normalize_performance(gdp):
    if np.sum(gdp) == 0:
        return np.ones_like(gdp)/len(gdp)
    return gdp/np.sum(gdp)

def simulate_a_game(date, teams, players_df):
    # Load goalies
    print(f"Simulating a game on {date} between the {teams[0]} and {teams[1]}")
    players_by_team = [
        players_df.loc[players_df["team"] == team] for team in teams
    ]
    
    game_report_df = dict()
    game_report_df["name"] = []
    game_report_df["team"] = []
    game_report_df["position"] = []
    for team_players in players_by_team:
        game_report_df["name"].extend(team_players["name"].to_list())
        game_report_df["team"].extend(team_players["team"].to_list())
        game_report_df["position"].extend(team_players["position"].to_list())
    
    game_report_df = pd.DataFrame(game_report_df)
    game_report_df["Goals"] = 0
    game_report_df["Assists"] = 0
    game_report_df["Hits"] = 0
    game_report_df["Blocks"] = 0
    game_report_df["Game_Starts"] = 0
    game_report_df["Wins"] = 0
    game_report_df["Saves"] = 0
    
    # Choose a starter
    goalies_by_team = [
        team_players.loc[team_players["position"] == "G"] for team_players in players_by_team
    ]
    starters_by_team = []
    for goalies in goalies_by_team:
        starting_proba = normalize_performance(game_day_performance(goalies, "Game_Starts"))
        starters_by_team.append(
            np.random.choice(
                goalies.index.to_list(),
                p=starting_proba
            )
        )
    
        
    starter_names_by_team = players_df.loc[starters_by_team]["name"].to_list()
    print(f"\tStarting goaltender in tonight's game: {starter_names_by_team[0]} and {starter_names_by_team[1]}")
    game_report_df.loc[game_report_df["name"].isin(starter_names_by_team), "Game_Starts"] = 1
    
    # Flip a coin to chose the winning goalie
    win_performances = [
        max(
            np.random.normal(
                loc=players_df.loc[starter_index]["Wins_mean"],
                scale=players_df.loc[starter_index]["Wins_std"]
            ), 
            0
        ) for starter_index in starters_by_team
    ]
    
    winning_probabilities = normalize_performance(np.array(win_performances))
    winner = np.random.choice(starter_names_by_team, p=winning_probabilities)
    game_report_df.loc[game_report_df["name"] == winner, "Wins"] = 1
    winning_index = starter_names_by_team.index(winner)
    losing_index = int(np.abs(winning_index - 1)) #returns 0 if winning is 1, 1 if winning is 0
    
    # Randomly generate a score
    final_score = np.abs(np.random.normal(loc=0, scale=4, size=2)).astype(int)
    while final_score[winning_index] <= final_score[losing_index]:
        final_score = np.abs(np.random.normal(loc=0, scale=4, size=2)).astype(int)
    print(f"\tFinal Score {final_score[0]} - {final_score[1]}")
    
    # Randomly select which goals get assist and how many
    final_assists = [np.random.randint(low=0, high=3, size=goals) for goals in final_score]
    
    # Subselect players for goals by randomly sampling a "performance"
    skaters_by_team = [
        team_players.loc[team_players["position"] != "G"] for team_players in players_by_team
    ]
    
    goals_perf = [
        normalize_performance(game_day_performance(team_players, "Goals")) for team_players in skaters_by_team
    ]
    
    assists_perf = [
        game_day_performance(team_players, "Assists") for team_players in skaters_by_team
    ]
    
    for i, score in enumerate(final_score):
        print(f"\tGoal Scoring for {teams[i]}:")
        skaters = skaters_by_team[i]
        assists = final_assists[i]
        
        #choose the goal scorers
        goals_scorers = np.random.choice(
            skaters["name"].to_list(),
            size=score,
            replace=True,
            p=goals_perf[i]
        )
        
        # choose the assisting players
        for j, assist in enumerate(assists):
            goal_scorer = goals_scorers[j]
            print(f"\t\tScored by:{goal_scorer}")
            game_report_df.loc[game_report_df["name"] == goal_scorer, "Goals"] += 1
            
            remaining_skaters = skaters.loc[skaters["name"] != goal_scorer]
            goal_scorer_idx = skaters["name"].to_list().index(goal_scorer)
            remaining_assists_perf = normalize_performance(
                assists_perf[i][:goal_scorer_idx].tolist() + assists_perf[i][goal_scorer_idx+1:].tolist()
            )
            assist_scorers = np.random.choice(
                remaining_skaters["name"].to_list(),
                size=assists[j],
                replace=False,
                p=remaining_assists_perf
            )
        
            for assist in assist_scorers:
                print(f"\t\t\tassisted by {assist}")
                game_report_df.loc[game_report_df["name"] == assist, "Assists"] += 1 
            
        game_report_df.loc[
            game_report_df["name"].isin(skaters["name"].to_list()), 
            "Hits"] = game_day_performance(skaters, "Hits").astype(int)
        game_report_df.loc[
            game_report_df["name"].isin(skaters["name"].to_list()), 
            "Blocks"] = game_day_performance(skaters, "Blocks").astype(int)
        # Simulate saves per goalie
        starter_index = starters_by_team[i]
        saves_by_team = max(
            int(np.random.normal(
                loc=players_df.loc[starter_index]["Saves_mean"],
                scale=players_df.loc[starter_index]["Saves_std"],
            )), 0)
        game_report_df.loc[game_report_df["name"] == starter_names_by_team[i], "Saves"] = saves_by_team
        print(f"\tSaves made by {starter_names_by_team[i]}: {saves_by_team}")
    # Return game report
    game_report_df["date"] = date
    return final_score, game_report_df

In [4]:
for season in range(5):
    print("Simulating season", season)
    year = 2015 + season
    path_to_schedule = f"../simulated-data/schedules/season_{season}_{year}-{year+1}.csv"
    schedule = pd.read_csv(path_to_schedule)

    scores = []
    for index, row in schedule.iterrows():
        d = row["date"]
        t = row[["team_1","team_2"]].to_list()
        score, report = simulate_a_game(d, t, players)
        report.to_csv(f"../simulated-data/game_reports/season_{season}/{t[0]}_{t[1]}_{d}_game_report.csv")
        scores.append((d, t, score))

Simulating season 0
Simulating a game on 2015-12-12 between the Hat-Trick Heroes and No Woman, No Krejci
	Starting goaltender in tonight's game: Diane Anderson and Anthony Salazar
	Final Score 11 - 0
	Goal Scoring for Hat-Trick Heroes:
		Scored by:Dustin Medina
			assisted by David Lee
		Scored by:David Lee
		Scored by:Kelly Edwards
			assisted by Daniel Jackson
		Scored by:Daniel Montoya
			assisted by Dustin Medina
		Scored by:Nicole Rodriguez
			assisted by Misty Richards
			assisted by Dustin Medina
		Scored by:David Lee
			assisted by Nicole Rodriguez
			assisted by Daniel Jackson
		Scored by:Misty Richards
			assisted by Lisa Long
		Scored by:Daniel Montoya
		Scored by:Daniel Montoya
			assisted by Kelly Edwards
			assisted by Ronald Hill
		Scored by:Dustin Medina
			assisted by Kelly Edwards
			assisted by Lisa Long
		Scored by:Lisa Long
	Saves made by Diane Anderson: 27
	Goal Scoring for No Woman, No Krejci:
	Saves made by Anthony Salazar: 43
Simulating a game on 2016-01-18 bet

In [9]:
np.random.choice([1, 2, 3], size=0)

array([], dtype=int64)