In [66]:
import pandas as pd
import numpy as np
import matplotlib as plt

In [24]:
lineups_df = pd.read_csv("/Users/hajowolfram/Documents/SURG_notebooks/data/lineups/gs_lineups.csv")
lineup_times_df = pd.read_csv("/Users/hajowolfram/Documents/SURG_notebooks/data/lineup_times/gs_lineup_times.csv")
mc_df = pd.read_csv("/Users/hajowolfram/Documents/SURG_notebooks/data/sub_mcs/gs_sub_mc.csv")

In [63]:
# MLE (maximum likelihood estimate)
number_of_lineups = mc_df.shape[1] - 1 # first column excluded
# transition matrix (NxN) where N = number of unique lineups
# MLE (j, k) = mc_df[j][k] / lineup_times_df["Unnamed: 0"]
transition_matrix = np.zeros((mc_df.shape[0], mc_df.shape[0]))

for row in range(mc_df.shape[0]):
    for col in range(mc_df.shape[0]):
        # (row, col) --> corresponding lineup_times_df 
        transition_matrix[row][col] = mc_df.iloc[row, col + 1] / lineup_times_df.loc[row, "x"]

In [91]:
# simulate 8200 games 
monte_carlo_estimate = number_of_lineups / 100 # for estimate total no. of subs in entire season
# assume in state j
# simulate exponentially dist. random var with rates given by row j of transition_matrix
# minimum of these samples tields total time spent in state j and next state k
# initialise with most common lineup, terminate when it reaches 2880 seconds

starting_state = lineup_times_df.iloc[lineup_times_df["x"].idxmax(),0]
#print(starting_state)
#while time < 2880:
    #rate_probability = np.random.exponential()
def simulate_game(starting_state, transition_matrix, game_time=2880):
    current_state = starting_state
    time = 0
    substitutions = 0
    LU_sim = np.zeros((transition_matrix.shape[0], transition_matrix.shape[0]))
      
    while time < game_time:
        # Simulate exp dist var in curr_state
        rates = transition_matrix[current_state]
        # filter out 0s
        rates = rates[rates > 0]
        times = np.random.exponential(1 / rates)
        min_time = np.min(times, keepdims=True)
        next_state = np.argmin(times)
        
        # check if game time exceeded
        if time + min_time > game_time:
            LU_sim[current_state] += (2880 - time)
            break
            
        LU_sim[current_state] += min_time
        time += min_time
        current_state = next_state
        substitutions += 1
        
    return LU_sim

single_sim = simulate_game(starting_state, transition_matrix)
# print(len(single_sim))
single_sim_df = pd.DataFrame(single_sim)
single_sim_df.head(30)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,436,437,438,439,440,441,442,443,444,445
0,270.450945,270.450945,270.450945,270.450945,270.450945,270.450945,270.450945,270.450945,270.450945,270.450945,...,270.450945,270.450945,270.450945,270.450945,270.450945,270.450945,270.450945,270.450945,270.450945,270.450945
1,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,...,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221,1147.937221
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,92.449961,92.449961,92.449961,92.449961,92.449961,92.449961,92.449961,92.449961,92.449961,92.449961,...,92.449961,92.449961,92.449961,92.449961,92.449961,92.449961,92.449961,92.449961,92.449961,92.449961
5,483.623134,483.623134,483.623134,483.623134,483.623134,483.623134,483.623134,483.623134,483.623134,483.623134,...,483.623134,483.623134,483.623134,483.623134,483.623134,483.623134,483.623134,483.623134,483.623134,483.623134
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [92]:
single_sim_df.iloc[0].sum()

120621.12146211755

In [60]:
transition_df = pd.DataFrame(transition_matrix)
transition_df.shape
#transition_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,436,437,438,439,440,441,442,443,444,445
0,0.0,0.000349,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.000897,0.0,0.000179,0.0,0.0,0.0,0.0,0.0,6e-05,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.003268,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.000969,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
print(number_of_lineups)

446
