<h1 style="text-align: center;">Schedule Optimization Effort</h1>

## Necessary Libraries

In [106]:
import pandas as pd
import numpy as np 

## Loading in the Data

In [107]:
runs_master = pd.read_excel('files/2025 Big West Conference Runs.xlsx', sheet_name='Conference RunsIP')
runs_master

Unnamed: 0,Team,Conf R/GP,Conf RA/GP,Conf R/IP,Conf RA/IP,Conf ATT,Conf DEF
0,Cal Bap,6.17849,6.1373,0.686499,0.681922,1.000392,1.011012
1,Cal Poly,7.214724,5.042945,0.801636,0.560327,1.168174,0.830736
2,Bakersfield,4.27051,6.086475,0.474501,0.676275,0.691461,1.00264
3,Fullerton,6.0,5.823529,0.666667,0.647059,0.971492,0.959324
4,CSUN,6.236364,7.690909,0.692929,0.854545,1.009763,1.266942
5,LBSU,5.521472,6.478528,0.613497,0.719836,0.894011,1.067223
6,Sacramento State,5.2,5.04,0.577778,0.56,0.84196,0.830251
7,UCI,8.688391,5.389002,0.965377,0.598778,1.406784,0.887743
8,UCR,4.115086,8.020218,0.457232,0.891135,0.666295,1.32119
9,UCSD,7.302632,6.039474,0.811404,0.671053,1.182408,0.994897


### Set Home Field Advantage

In [108]:
hfa = {
    'Cal Bap': 0.23,
    'Cal Poly': 0.18,
    'UCI': 0.1, 
    'UCSB': 0.22,
    'UCR': 0.26,
    'LBSU': 0.22,
    'CSUN': 0.2,
    'UCSD': 0.24,
    'Fullerton': 0.19,
    'Utah Valley': 0.19,
    'Sacramento State': 0.17,
    'Bakersfield': 0.21
}

### Add Expected Runs

In [109]:
# Find the average runs scored by any team in the conference
average_runs = runs_master[runs_master['Team'] == 'Average']['Conf R/GP'].iloc[0]

def find_expected_runs(opponent):
    
    if opponent == 'Average':
        return np.nan 
    if opponent == 'UCSD': 
        return np.nan
    
    #Find our offensive rating
    att = runs_master[runs_master['Team'] == 'UCSD']['Conf ATT'].iloc[0]
    
    #Find opponents defensive rating
    defense = runs_master[runs_master['Team'] == opponent]['Conf DEF'].iloc[0]
    
    #Calculate expected runs
    xruns = (att * defense) * average_runs * np.sqrt(1 + hfa['UCSD'])
    
    return round(xruns, 2)


def find_opp_expected_runs(opponent):
    
    if opponent == 'Average':
        return np.nan
    
    if opponent == 'UCSD':
        return np.nan
    
    att = runs_master[runs_master['Team'] == opponent]['Conf ATT'].iloc[0]
    
    defense = runs_master[runs_master['Team'] == 'UCSD']['Conf DEF'].iloc[0]
    
    xruns = (att * defense) * average_runs * np.sqrt(1 + hfa[opponent])
    
    return round(xruns, 2)
    

runs_master['ucsd_xruns'] = runs_master['Team'].apply(find_expected_runs) 
runs_master['x_runs'] = runs_master['Team'].apply(find_opp_expected_runs)

runs_master
    

Unnamed: 0,Team,Conf R/GP,Conf RA/GP,Conf R/IP,Conf RA/IP,Conf ATT,Conf DEF,ucsd_xruns,x_runs
0,Cal Bap,6.17849,6.1373,0.686499,0.681922,1.000392,1.011012,8.22,6.82
1,Cal Poly,7.214724,5.042945,0.801636,0.560327,1.168174,0.830736,6.76,7.8
2,Bakersfield,4.27051,6.086475,0.474501,0.676275,0.691461,1.00264,8.15,4.67
3,Fullerton,6.0,5.823529,0.666667,0.647059,0.971492,0.959324,7.8,6.51
4,CSUN,6.236364,7.690909,0.692929,0.854545,1.009763,1.266942,10.3,6.8
5,LBSU,5.521472,6.478528,0.613497,0.719836,0.894011,1.067223,8.68,6.07
6,Sacramento State,5.2,5.04,0.577778,0.56,0.84196,0.830251,6.75,5.6
7,UCI,8.688391,5.389002,0.965377,0.598778,1.406784,0.887743,7.22,9.07
8,UCR,4.115086,8.020218,0.457232,0.891135,0.666295,1.32119,10.74,4.6
9,UCSD,7.302632,6.039474,0.811404,0.671053,1.182408,0.994897,,


### Developing Probability Distribution

In [122]:
# Setting constants
m = -0.01219 
n = -1.1813
p = -0.3865
b = -1.042

# Setting 12 as the maximum runs/inning to consider
R_max = 12

In [134]:
def compute_C(A):
    numerator = A  # Expected total runs in game
    denominator = 9 * sum(
        (r * np.exp(m * r + p * r + n * (r / A) + b))
        for r in range(1, R_max + 1)
    ) 
    return numerator / denominator

def probability(A, R):
    C = compute_C(A)
    if R > 0:
        return C * np.exp(m * A + p * R + n * (R / A) + b)
    else:  # R == 0
        sum_term = sum(
            np.exp(m * A + p * r + n * (r / A) + b)
            for r in range(1, R_max + 1)
        )
        return 1 - C * sum_term

### Simulate single games using runs/inning probability distribution & season simulations

In [135]:
# Develop static inning probability distributions for each team we're playing

def inning_probabilities(A):
    probs = [probability(A, r) for r in range(R_max + 1)]
    probs = np.array(probs)
    probs = probs / probs.sum()  # Normalize to exactly sum to 1
    return probs

In [136]:
# Simulate a single game
def simulate_game_custom(ucsd_xruns, opp_xruns, innings=9):
    ucsd_probs = inning_probabilities(ucsd_xruns)
    opp_probs = inning_probabilities(opp_xruns)
    
    ucsd_score = np.sum(np.random.choice(range(R_max+1), p=ucsd_probs, size=innings))
    opp_score = np.sum(np.random.choice(range(R_max+1), p=opp_probs, size=innings))
    
    return ucsd_score, opp_score

In [142]:
## Takes about 2.5 minutes to run for 50,000 games per team

# Simulating 50,000 games per team and calculating a win probability
def simulate_season_custom(df, sims_per_game=50000):
    np.random.seed(42)
    results = []
    
    # Loop through the dataframe and simulate games
    for _, row in df.iterrows():
        team = row['Team']
        if team not in ['UCSD', 'Average']:
            ucsd_xruns = row['ucsd_xruns']
            opp_xruns = row['x_runs']
            
            # Gather game results and win pcts
            game_results = [simulate_game_custom(ucsd_xruns, opp_xruns) for _ in range(sims_per_game)]
            win_pct = sum(1 for u, o in game_results if u > o) / sims_per_game
            results.append((team, win_pct, game_results))
    
    return pd.DataFrame(results, columns=['Team', 'Win Percentage', 'Game Results'])

season_results = simulate_season_custom(runs_master)
season_results


Unnamed: 0,Team,Win Percentage,Game Results
0,Cal Bap,0.54604,"[(7, 8), (1, 9), (4, 11), (5, 8), (7, 3), (3, ..."
1,Cal Poly,0.40742,"[(2, 1), (1, 4), (1, 6), (22, 4), (6, 12), (1,..."
2,Bakersfield,0.67614,"[(0, 9), (0, 3), (31, 1), (7, 13), (2, 1), (1,..."
3,Fullerton,0.53666,"[(11, 13), (0, 3), (3, 2), (7, 14), (7, 2), (5..."
4,CSUN,0.64544,"[(13, 13), (7, 8), (11, 6), (15, 3), (3, 7), (..."
5,LBSU,0.61326,"[(5, 5), (12, 12), (4, 2), (6, 9), (4, 4), (10..."
6,Sacramento State,0.53472,"[(3, 1), (2, 7), (9, 13), (0, 5), (5, 5), (7, ..."
7,UCI,0.3692,"[(1, 5), (10, 9), (6, 5), (8, 14), (16, 11), (..."
8,UCR,0.78772,"[(12, 3), (7, 0), (12, 4), (17, 3), (7, 3), (1..."
9,UCSB,0.40906,"[(1, 5), (7, 3), (3, 11), (5, 6), (8, 0), (17,..."


In [143]:
season_results.to_csv('simulated_results.csv')

In [None]:
def gather_game_results(df, sims_per_game=50000):
    
    np.random.seed(42)
    all_game_results = {}
    
    # Loop through the dataframe and simulate games
    for _, row in df.iterrows():
        team = row['Team']
        if team not in ['UCSD', 'Average']:
            ucsd_xruns = row['ucsd_xruns']
            opp_xruns = row['x_runs']
            
            # Gather game results and win pcts
            game_results = [simulate_game_custom(ucsd_xruns, opp_xruns) for _ in range(sims_per_game)]
            all_game_results[team] = game_results
    
    return all_game_results