<h1 style="text-align: center;">Schedule Optimization Effort</h1>

## Necessary Libraries

In [1]:
import pandas as pd
import numpy as np 
import itertools


## Loading in the Data

In [9]:
runs_master = pd.read_excel('files/2025 Big West Conference Runs.xlsx', sheet_name='Conference RunsIP')
runs_master

Unnamed: 0,Team,Conf R/GP,Conf RA/GP,Conf R/IP,Conf RA/IP,Conf ATT,Conf DEF
0,Cal Bap,6.17849,6.1373,0.686499,0.681922,1.000392,1.011012
1,Cal Poly,7.214724,5.042945,0.801636,0.560327,1.168174,0.830736
2,Bakersfield,4.27051,6.086475,0.474501,0.676275,0.691461,1.00264
3,Fullerton,6.0,5.823529,0.666667,0.647059,0.971492,0.959324
4,CSUN,6.236364,7.690909,0.692929,0.854545,1.009763,1.266942
5,LBSU,5.521472,6.478528,0.613497,0.719836,0.894011,1.067223
6,Sacramento State,5.2,5.04,0.577778,0.56,0.84196,0.830251
7,UCI,8.688391,5.389002,0.965377,0.598778,1.406784,0.887743
8,UCR,4.115086,8.020218,0.457232,0.891135,0.666295,1.32119
9,UCSD,7.302632,6.039474,0.811404,0.671053,1.182408,0.994897


## Simulation

### Set Home Field Advantage

In [23]:
hfa = {
    'Cal Bap': 0.23,
    'Cal Poly': 0.18,
    'UCI': 0.1, 
    'UCSB': 0.22,
    'UCR': 0.26,
    'LBSU': 0.22,
    'CSUN': 0.2,
    'UCSD': 0.24,
    'Fullerton': 0.19,
    'Utah Valley': 0.19,
    'Sacramento State': 0.17,
    'Bakersfield': 0.21
}

### Add Expected Runs

In [24]:
# Find the average runs scored by any team in the conference
average_runs = runs_master[runs_master['Team'] == 'Average']['Conf R/GP'].iloc[0]

def find_expected_runs(opponent):
    
    if opponent == 'Average':
        return np.nan 
    if opponent == 'UCSD': 
        return np.nan
    
    #Find our offensive rating
    att = runs_master[runs_master['Team'] == 'UCSD']['Conf ATT'].iloc[0]
    
    #Find opponents defensive rating
    defense = runs_master[runs_master['Team'] == opponent]['Conf DEF'].iloc[0]
    
    #Calculate expected runs
    xruns = (att * defense) * average_runs * np.sqrt(1 + hfa['UCSD'])
    
    return round(xruns, 2)


def find_opp_expected_runs(opponent):
    
    if opponent == 'Average':
        return np.nan
    
    if opponent == 'UCSD':
        return np.nan
    
    att = runs_master[runs_master['Team'] == opponent]['Conf ATT'].iloc[0]
    
    defense = runs_master[runs_master['Team'] == 'UCSD']['Conf DEF'].iloc[0]
    
    xruns = (att * defense) * average_runs * np.sqrt(1 + hfa[opponent])
    
    return round(xruns, 2)
    

runs_master['ucsd_xruns'] = runs_master['Team'].apply(find_expected_runs) 
runs_master['x_runs'] = runs_master['Team'].apply(find_opp_expected_runs)

runs_master
    

Unnamed: 0,Team,Conf R/GP,Conf RA/GP,Conf R/IP,Conf RA/IP,Conf ATT,Conf DEF,ucsd_xruns,x_runs
0,Cal Bap,6.17849,6.1373,0.686499,0.681922,1.000392,1.011012,8.22,6.82
1,Cal Poly,7.214724,5.042945,0.801636,0.560327,1.168174,0.830736,6.76,7.8
2,Bakersfield,4.27051,6.086475,0.474501,0.676275,0.691461,1.00264,8.15,4.67
3,Fullerton,6.0,5.823529,0.666667,0.647059,0.971492,0.959324,7.8,6.51
4,CSUN,6.236364,7.690909,0.692929,0.854545,1.009763,1.266942,10.3,6.8
5,LBSU,5.521472,6.478528,0.613497,0.719836,0.894011,1.067223,8.68,6.07
6,Sacramento State,5.2,5.04,0.577778,0.56,0.84196,0.830251,6.75,5.6
7,UCI,8.688391,5.389002,0.965377,0.598778,1.406784,0.887743,7.22,9.07
8,UCR,4.115086,8.020218,0.457232,0.891135,0.666295,1.32119,10.74,4.6
9,UCSD,7.302632,6.039474,0.811404,0.671053,1.182408,0.994897,,


### Developing Probability Distribution

In [25]:
# Setting constants
m = -0.01219 
n = -1.1813
p = -0.3865
b = -1.042

# Setting 12 as the maximum runs/inning to consider
R_max = 12

In [26]:
def compute_C(A):
    numerator = A  # Expected total runs in game
    denominator = 9 * sum(
        (r * np.exp(m * r + p * r + n * (r / A) + b))
        for r in range(1, R_max + 1)
    ) 
    return numerator / denominator

def probability(A, R):
    C = compute_C(A)
    if R > 0:
        return C * np.exp(m * A + p * R + n * (R / A) + b)
    else:  # R == 0
        sum_term = sum(
            np.exp(m * A + p * r + n * (r / A) + b)
            for r in range(1, R_max + 1)
        )
        return 1 - C * sum_term

### Simulate single games using runs/inning probability distribution & season simulations

In [27]:
# Develop static inning probability distributions for each team we're playing

def inning_probabilities(A):
    probs = [probability(A, r) for r in range(R_max + 1)]
    probs = np.array(probs)
    probs = probs / probs.sum()  # Normalize to exactly sum to 1
    return probs

In [28]:
# Simulate a single game
def simulate_game_custom(ucsd_xruns, opp_xruns, innings=9):
    ucsd_probs = inning_probabilities(ucsd_xruns)
    opp_probs = inning_probabilities(opp_xruns)
    
    ucsd_score = np.sum(np.random.choice(range(R_max+1), p=ucsd_probs, size=innings))
    opp_score = np.sum(np.random.choice(range(R_max+1), p=opp_probs, size=innings))
    
    return ucsd_score, opp_score

In [146]:
#Takes about 5 mins with 50,000 sims per game
## THIS FUNCTION WRITES TO AN EXCEL SHEET, THE EXCEL SHEET MAY ALREADY BE IN THE REPO ##

def simulate_season_custom_to_excel(df, filename, sims_per_game=100):
    np.random.seed(42)
    results = []
    
    with pd.ExcelWriter(filename, engine="openpyxl") as writer:
        for _, row in df.iterrows():
            team = row['Team']
            if team not in ['UCSD', 'Average']:
                ucsd_xruns = row['ucsd_xruns']
                opp_xruns = row['x_runs']

                # Simulate all games
                game_results = [simulate_game_custom(ucsd_xruns, opp_xruns) for _ in range(sims_per_game)]
                
                # Convert to DataFrame
                games_df = pd.DataFrame(game_results, columns=["UCSD Runs", f"{team} Runs"])
                
                # Write to Excel, each team gets its own sheet
                sheet_name = team[:31]  # Excel sheet names max length = 31
                games_df.to_excel(writer, sheet_name=sheet_name, index=False)
                
                game_results = [simulate_game_custom(ucsd_xruns, opp_xruns) for _ in range(sims_per_game)]
                win_pct = sum(1 for u, o in game_results if u > o) / sims_per_game
                results.append((team, win_pct))
                
        results_df = pd.DataFrame(results, columns=['Team', 'Win Percentage'])
                
        # Write the summary results to a separate sheet
        results_df.to_excel(writer, sheet_name='Win Percentages', index=False)

    print(f"Simulation results written to {filename}")

simulate_season_custom_to_excel(runs_master, 'simulated_season_results.xlsx', sims_per_game=50000)

Simulation results written to simulated_season_results.xlsx


## Conference Expected Wins

In [13]:
expected_win_rates = pd.read_csv('../Other/files/conf_win_probs.csv').copy()
expected_win_rates['expected_wins'] = expected_win_rates['Win Percentage'] * 3
expected_win_rates

Unnamed: 0,Team,Win Percentage,expected_wins
0,Cal Bap,0.54152,1.62456
1,Cal Poly,0.40564,1.21692
2,Bakersfield,0.67456,2.02368
3,Fullerton,0.53444,1.60332
4,CSUN,0.6471,1.9413
5,LBSU,0.61104,1.83312
6,Sacramento State,0.53492,1.60476
7,UCI,0.37194,1.11582
8,UCR,0.78134,2.34402
9,UCSB,0.40882,1.22646


In [95]:
def expected_wins_choose_k_w_combo(df, k):
    results = []
    for combo in itertools.combinations(range(len(df)), k):
        total_wins = df.loc[list(combo), 'expected_wins'].sum() / (k * 3)
        results.append((combo, total_wins))
    return sorted(results, key=lambda x: x[1], reverse=True)

In [97]:
expected_win_rates

Unnamed: 0,Team,Win Percentage,expected_wins
0,Cal Bap,0.54152,1.62456
1,Cal Poly,0.40564,1.21692
2,Bakersfield,0.67456,2.02368
3,Fullerton,0.53444,1.60332
4,CSUN,0.6471,1.9413
5,LBSU,0.61104,1.83312
6,Sacramento State,0.53492,1.60476
7,UCI,0.37194,1.11582
8,UCR,0.78134,2.34402
9,UCSB,0.40882,1.22646


In [98]:
expected_wins_choose_k_w_combo(expected_win_rates, 8)[-1]

((0, 1, 3, 5, 6, 7, 9, 10), 0.4930999999999999)

In [25]:
def expected_wins_choose_k(df, k):
    results = []
    for combo in itertools.combinations(range(len(df)), k):
        total_wins = df.loc[list(combo), 'expected_wins'].sum() / (k * 3)
        results.append(total_wins)
    return sorted(results)

In [79]:
## 33 games
wrate_33 = expected_wins_choose_k(expected_win_rates, 11)[0]
w_33 = wrate_33 * 33
l_33 = 33 - w_33
wl_33 = f'{round(w_33, 1)} - {round(l_33, 1)}'

## 30 games
wrate_30_min = np.min(expected_wins_choose_k(expected_win_rates, 10))
w_30_min = wrate_30_min * 30
l_30_min = 30 - w_30_min
wl_30_min = f'{round(w_30_min, 1)} - {round(l_30_min, 1)}'

wrate_30_max = np.max(expected_wins_choose_k(expected_win_rates, 10))
w_30_max = wrate_30_max * 30
l_30_max = 30 - w_30_max
wl_30_max = f'{round(w_30_max, 1)} - {round(l_30_max, 1)}'

## 27 games
wrate_27_min = np.min(expected_wins_choose_k(expected_win_rates, 9))
w_27_min = wrate_27_min * 27
l_27_min = 27 - w_27_min
wl_27_min = f'{round(w_27_min, 1)} - {round(l_27_min, 1)}'

wrate_27_max = np.max(expected_wins_choose_k(expected_win_rates, 9))
w_27_max = wrate_27_max * 27
l_27_max = 27 - w_27_max
wl_27_max = f'{round(w_27_max, 1)} - {round(l_27_max, 1)}'

## 24 games
wrate_24_min = np.min(expected_wins_choose_k(expected_win_rates, 8))
w_24_min = wrate_24_min * 24
l_24_min = 24 - w_24_min
wl_24_min = f'{round(w_24_min, 1)} - {round(l_24_min, 1)}'

wrate_24_max = np.max(expected_wins_choose_k(expected_win_rates, 8))
w_24_max = wrate_24_max * 24
l_24_max = 24 - w_24_max
wl_24_max = f'{round(w_24_max, 1)} - {round(l_24_max, 1)}'

In [80]:
projected_winning_pct = {
                        33: {'W': w_33, 'L': l_33, 'Win%': wrate_33, 'W-L': wl_33},
                        30: {'Min': {'W': w_30_min, 'L': l_30_min, 'Win%': wrate_30_min, 'W-L': wl_30_min},
                              'Max': {'W': w_30_max, 'L': l_30_max, 'Win%': wrate_30_max, 'W-L': wl_30_max}
                              },
                        27: {'Min': {'W': w_27_min, 'L': l_27_min, 'Win%': wrate_27_min, 'W-L': wl_27_min},
                              'Max': {'W': w_27_max, 'L': l_27_max, 'Win%': wrate_27_max, 'W-L': wl_27_max}
                              },
                        24: {'Min': {'W': w_24_min, 'L': l_24_min, 'Win%': wrate_24_min, 'W-L': wl_24_min},
                              'Max': {'W': w_24_max, 'L': l_24_max, 'Win%': wrate_24_max, 'W-L': wl_24_max}
                              },
                        }
projected_winning_pct

{33: {'W': 18.1434, 'L': 14.8566, 'Win%': 0.5498, 'W-L': '18.1 - 14.9'},
 30: {'Min': {'W': 15.79938,
   'L': 14.20062,
   'Win%': 0.526646,
   'W-L': '15.8 - 14.2'},
  'Max': {'W': 17.02758,
   'L': 12.97242,
   'Win%': 0.567586,
   'W-L': '17.0 - 13.0'}},
 27: {'Min': {'W': 13.775700000000002,
   'L': 13.224299999999998,
   'Win%': 0.5102111111111112,
   'W-L': '13.8 - 13.2'},
  'Max': {'W': 15.81066,
   'L': 11.18934,
   'Win%': 0.58558,
   'W-L': '15.8 - 11.2'}},
 24: {'Min': {'W': 11.834399999999999,
   'L': 12.165600000000001,
   'Win%': 0.4930999999999999,
   'W-L': '11.8 - 12.2'},
  'Max': {'W': 14.5842, 'L': 9.4158, 'Win%': 0.607675, 'W-L': '14.6 - 9.4'}}}

In [87]:
games = []
for i, j in projected_winning_pct.items():
    if all(k in j for k in ['W', 'L', 'Win%', 'W-L']):
        games.append({'Games': i, 'Type': 'Point', **j})
    else:
        for bound, k in j.items():
            games.append({'Games': i, 'Type': bound, **k})

df = pd.DataFrame(games)
df.to_csv('../Other/files/expected_wins_results.csv')

In [99]:
print(df)

   Games   Type         W         L      Win%          W-L
0     33  Point  18.14340  14.85660  0.549800  18.1 - 14.9
1     30    Min  15.79938  14.20062  0.526646  15.8 - 14.2
2     30    Max  17.02758  12.97242  0.567586  17.0 - 13.0
3     27    Min  13.77570  13.22430  0.510211  13.8 - 13.2
4     27    Max  15.81066  11.18934  0.585580  15.8 - 11.2
5     24    Min  11.83440  12.16560  0.493100  11.8 - 12.2
6     24    Max  14.58420   9.41580  0.607675   14.6 - 9.4
