<h1 style="text-align: center;">Schedule Optimization Effort</h1>

## Necessary Libraries

In [1]:
import pandas as pd
import numpy as np 
import itertools


## Loading in the Data

In [4]:
runs_master = pd.read_excel('files/2025 Big West Conference Runs.xlsx', sheet_name='Conference RunsIP')
runs_master

Unnamed: 0,Team,Conf R/GP,Conf RA/GP,Conf R/IP,Conf RA/IP,Conf ATT,Conf DEF
0,Cal Bap,6.17849,6.1373,0.686499,0.681922,1.000392,1.011012
1,Cal Poly,7.214724,5.042945,0.801636,0.560327,1.168174,0.830736
2,Bakersfield,4.27051,6.086475,0.474501,0.676275,0.691461,1.00264
3,Fullerton,6.0,5.823529,0.666667,0.647059,0.971492,0.959324
4,CSUN,6.236364,7.690909,0.692929,0.854545,1.009763,1.266942
5,LBSU,5.521472,6.478528,0.613497,0.719836,0.894011,1.067223
6,Sacramento State,5.2,5.04,0.577778,0.56,0.84196,0.830251
7,UCI,8.688391,5.389002,0.965377,0.598778,1.406784,0.887743
8,UCR,4.115086,8.020218,0.457232,0.891135,0.666295,1.32119
9,UCSD,7.302632,6.039474,0.811404,0.671053,1.182408,0.994897


## Simulation

### Set Home Field Advantage

In [None]:
hfa = {
    'Cal Bap': 0.23,
    'Cal Poly': 0.18,
    'UCI': 0.1, 
    'UCSB': 0.22,
    'UCR': 0.26,
    'LBSU': 0.22,
    'CSUN': 0.2,
    'UCSD': 0.24,
    'Fullerton': 0.19,
    'Utah Valley': 0.19,
    'Sacramento State': 0.17,
    'Bakersfield': 0.21
}

### Add Expected Runs

In [None]:
# Find the average runs scored by any team in the conference
average_runs = runs_master[runs_master['Team'] == 'Average']['Conf R/GP'].iloc[0]

def find_expected_runs(opponent):
    
    if opponent == 'Average':
        return np.nan 
    if opponent == 'UCSD': 
        return np.nan
    
    #Find our offensive rating
    att = runs_master[runs_master['Team'] == 'UCSD']['Conf ATT'].iloc[0]
    
    #Find opponents defensive rating
    defense = runs_master[runs_master['Team'] == opponent]['Conf DEF'].iloc[0]
    
    #Calculate expected runs
    xruns = (att * defense) * average_runs * np.sqrt(1 + hfa['UCSD'])
    
    return round(xruns, 2)


def find_opp_expected_runs(opponent):
    
    if opponent == 'Average':
        return np.nan
    
    if opponent == 'UCSD':
        return np.nan
    
    att = runs_master[runs_master['Team'] == opponent]['Conf ATT'].iloc[0]
    
    defense = runs_master[runs_master['Team'] == 'UCSD']['Conf DEF'].iloc[0]
    
    xruns = (att * defense) * average_runs * np.sqrt(1 + hfa[opponent])
    
    return round(xruns, 2)
    

runs_master['ucsd_xruns'] = runs_master['Team'].apply(find_expected_runs) 
runs_master['x_runs'] = runs_master['Team'].apply(find_opp_expected_runs)

runs_master
    

Unnamed: 0,Team,Conf R/GP,Conf RA/GP,Conf R/IP,Conf RA/IP,Conf ATT,Conf DEF,ucsd_xruns,x_runs
0,Cal Bap,6.17849,6.1373,0.686499,0.681922,1.000392,1.011012,8.22,6.82
1,Cal Poly,7.214724,5.042945,0.801636,0.560327,1.168174,0.830736,6.76,7.8
2,Bakersfield,4.27051,6.086475,0.474501,0.676275,0.691461,1.00264,8.15,4.67
3,Fullerton,6.0,5.823529,0.666667,0.647059,0.971492,0.959324,7.8,6.51
4,CSUN,6.236364,7.690909,0.692929,0.854545,1.009763,1.266942,10.3,6.8
5,LBSU,5.521472,6.478528,0.613497,0.719836,0.894011,1.067223,8.68,6.07
6,Sacramento State,5.2,5.04,0.577778,0.56,0.84196,0.830251,6.75,5.6
7,UCI,8.688391,5.389002,0.965377,0.598778,1.406784,0.887743,7.22,9.07
8,UCR,4.115086,8.020218,0.457232,0.891135,0.666295,1.32119,10.74,4.6
9,UCSD,7.302632,6.039474,0.811404,0.671053,1.182408,0.994897,,


### Developing Probability Distribution

In [None]:
# Setting constants
m = -0.01219 
n = -1.1813
p = -0.3865
b = -1.042

# Setting 12 as the maximum runs/inning to consider
R_max = 12

In [None]:
def compute_C(A):
    numerator = A  # Expected total runs in game
    denominator = 9 * sum(
        (r * np.exp(m * r + p * r + n * (r / A) + b))
        for r in range(1, R_max + 1)
    ) 
    return numerator / denominator

def probability(A, R):
    C = compute_C(A)
    if R > 0:
        return C * np.exp(m * A + p * R + n * (R / A) + b)
    else:  # R == 0
        sum_term = sum(
            np.exp(m * A + p * r + n * (r / A) + b)
            for r in range(1, R_max + 1)
        )
        return 1 - C * sum_term

### Simulate single games using runs/inning probability distribution & season simulations

In [None]:
# Develop static inning probability distributions for each team we're playing

def inning_probabilities(A):
    probs = [probability(A, r) for r in range(R_max + 1)]
    probs = np.array(probs)
    probs = probs / probs.sum()  # Normalize to exactly sum to 1
    return probs

In [None]:
# Simulate a single game
def simulate_game_custom(ucsd_xruns, opp_xruns, innings=9):
    ucsd_probs = inning_probabilities(ucsd_xruns)
    opp_probs = inning_probabilities(opp_xruns)
    
    ucsd_score = np.sum(np.random.choice(range(R_max+1), p=ucsd_probs, size=innings))
    opp_score = np.sum(np.random.choice(range(R_max+1), p=opp_probs, size=innings))
    
    return ucsd_score, opp_score

In [None]:
#Takes about 5 mins with 50,000 sims per game
## THIS FUNCTION WRITES TO AN EXCEL SHEET, THE EXCEL SHEET MAY ALREADY BE IN THE REPO ##

def simulate_season_custom_to_excel(df, filename, sims_per_game=100):
    np.random.seed(42)
    results = []
    
    with pd.ExcelWriter(filename, engine="openpyxl") as writer:
        for _, row in df.iterrows():
            team = row['Team']
            if team not in ['UCSD', 'Average']:
                ucsd_xruns = row['ucsd_xruns']
                opp_xruns = row['x_runs']

                # Simulate all games
                game_results = [simulate_game_custom(ucsd_xruns, opp_xruns) for _ in range(sims_per_game)]
                
                # Convert to DataFrame
                games_df = pd.DataFrame(game_results, columns=["UCSD Runs", f"{team} Runs"])
                
                # Write to Excel, each team gets its own sheet
                sheet_name = team[:31]  # Excel sheet names max length = 31
                games_df.to_excel(writer, sheet_name=sheet_name, index=False)
                
                game_results = [simulate_game_custom(ucsd_xruns, opp_xruns) for _ in range(sims_per_game)]
                win_pct = sum(1 for u, o in game_results if u > o) / sims_per_game
                results.append((team, win_pct))
                
        results_df = pd.DataFrame(results, columns=['Team', 'Win Percentage'])
                
        # Write the summary results to a separate sheet
        results_df.to_excel(writer, sheet_name='Win Percentages', index=False)

    print(f"Simulation results written to {filename}")

simulate_season_custom_to_excel(runs_master, 'simulated_season_results.xlsx', sims_per_game=50000)

Simulation results written to simulated_season_results.xlsx


## Conference Expected Wins

In [None]:
expected_win_rates = pd.read_csv('../Other/files/conf_win_probs.csv').copy()
expected_win_rates['expected_wins'] = expected_win_rates['Win Percentage'] * 3
expected_win_rates

Unnamed: 0,Team,Win Percentage,expected_wins
0,Cal Bap,0.54152,1.62456
1,Cal Poly,0.40564,1.21692
2,Bakersfield,0.67456,2.02368
3,Fullerton,0.53444,1.60332
4,CSUN,0.6471,1.9413
5,LBSU,0.61104,1.83312
6,Sacramento State,0.53492,1.60476
7,UCI,0.37194,1.11582
8,UCR,0.78134,2.34402
9,UCSB,0.40882,1.22646


In [None]:
def expected_wins_choose_k(df, k):
    results = []
    for combo in itertools.combinations(range(len(df)), k):
        total_wins = df.loc[list(combo), 'expected_wins'].sum() / (k * 3)
        results.append((combo, total_wins))
    return sorted(results)

In [None]:
def expected_wins_choose_k(df, k):
    results = []
    for combo in itertools.combinations(range(len(df)), k):
        total_wins = df.loc[list(combo), 'expected_wins'].sum() / (k * 3)
        results.append(total_wins)
    return sorted(results)

In [None]:
## 33 games
wrate_33 = expected_wins_choose_k(expected_win_rates, 11)[0]
w_33 = wrate_33 * 33
l_33 = 33 - w_33
wl_33 = f'{round(w_33, 1)} - {round(l_33, 1)}'

## 30 games
wrate_30_min = np.min(expected_wins_choose_k(expected_win_rates, 10))
w_30_min = wrate_30_min * 30
l_30_min = 30 - w_30_min
wl_30_min = f'{round(w_30_min, 1)} - {round(l_30_min, 1)}'

wrate_30_max = np.max(expected_wins_choose_k(expected_win_rates, 10))
w_30_max = wrate_30_max * 30
l_30_max = 30 - w_30_max
wl_30_max = f'{round(w_30_max, 1)} - {round(l_30_max, 1)}'

## 27 games
wrate_27_min = np.min(expected_wins_choose_k(expected_win_rates, 9))
w_27_min = wrate_27_min * 27
l_27_min = 27 - w_27_min
wl_27_min = f'{round(w_27_min, 1)} - {round(l_27_min, 1)}'

wrate_27_max = np.max(expected_wins_choose_k(expected_win_rates, 9))
w_27_max = wrate_27_max * 27
l_27_max = 27 - w_27_max
wl_27_max = f'{round(w_27_max, 1)} - {round(l_27_max, 1)}'

## 24 games
wrate_24_min = np.min(expected_wins_choose_k(expected_win_rates, 8))
w_24_min = wrate_24_min * 24
l_24_min = 24 - w_24_min
wl_24_min = f'{round(w_24_min, 1)} - {round(l_24_min, 1)}'

wrate_24_max = np.max(expected_wins_choose_k(expected_win_rates, 8))
w_24_max = wrate_24_max * 24
l_24_max = 24 - w_24_max
wl_24_max = f'{round(w_24_max, 1)} - {round(l_24_max, 1)}'

In [None]:
projected_winning_pct = {
                        33: {'W': w_33, 'L': l_33, 'Win%': wrate_33, 'W-L': wl_33},
                        30: {'Min': {'W': w_30_min, 'L': l_30_min, 'Win%': wrate_30_min, 'W-L': wl_30_min},
                              'Max': {'W': w_30_max, 'L': l_30_max, 'Win%': wrate_30_max, 'W-L': wl_30_max}
                              },
                        27: {'Min': {'W': w_27_min, 'L': l_27_min, 'Win%': wrate_27_min, 'W-L': wl_27_min},
                              'Max': {'W': w_27_max, 'L': l_27_max, 'Win%': wrate_27_max, 'W-L': wl_27_max}
                              },
                        24: {'Min': {'W': w_24_min, 'L': l_24_min, 'Win%': wrate_24_min, 'W-L': wl_24_min},
                              'Max': {'W': w_24_max, 'L': l_24_max, 'Win%': wrate_24_max, 'W-L': wl_24_max}
                              },
                        }
projected_winning_pct

{33: {'W': 18.1434, 'L': 14.8566, 'Win%': 0.5498, 'W-L': '18.1 - 14.9'},
 30: {'Min': {'W': 15.79938,
   'L': 14.20062,
   'Win%': 0.526646,
   'W-L': '15.8 - 14.2'},
  'Max': {'W': 17.02758,
   'L': 12.97242,
   'Win%': 0.567586,
   'W-L': '17.0 - 13.0'}},
 27: {'Min': {'W': 13.775700000000002,
   'L': 13.224299999999998,
   'Win%': 0.5102111111111112,
   'W-L': '13.8 - 13.2'},
  'Max': {'W': 15.81066,
   'L': 11.18934,
   'Win%': 0.58558,
   'W-L': '15.8 - 11.2'}},
 24: {'Min': {'W': 11.834399999999999,
   'L': 12.165600000000001,
   'Win%': 0.4930999999999999,
   'W-L': '11.8 - 12.2'},
  'Max': {'W': 14.5842, 'L': 9.4158, 'Win%': 0.607675, 'W-L': '14.6 - 9.4'}}}

In [None]:
games = []
for i, j in projected_winning_pct.items():
    if all(k in j for k in ['W', 'L', 'Win%', 'W-L']):
        games.append({'Games': i, 'Type': 'Point', **j})
    else:
        for bound, k in j.items():
            games.append({'Games': i, 'Type': bound, **k})

df = pd.DataFrame(games)
df.to_csv('../Other/files/expected_wins_results.csv')

## Non-Conference Simulations

In [2]:
nonconf = pd.read_excel('files/2025 Big West Conference Runs.xlsx', sheet_name='Non-Conference Game Log')
nonconf

Unnamed: 0,Date,away_team,away_score,home_team,home_score,innings,Unnamed: 6,total_teams,unique_teams,num_appearances,Notes,valid_nonconf_teams
0,2025-02-14 00:00:00,Gonzaga\nBulldogs,6.0,UC Davis\nAggies,1.0,9.0,,UC Davis\nAggies,UC Davis\nAggies,141.0,CONFERENCE,Pacific
1,2025-02-14 00:00:00,Utah Valley\nWolverines,2.0,UC Riverside\nHighlanders,3.0,9.0,,UC Riverside\nHighlanders,UC Riverside\nHighlanders,125.0,CONFERENCE,Santa Clara
2,2025-02-14 00:00:00,Cal Poly\nMustangs,2.0,UCLA\nBruins,3.0,9.0,,UCLA\nBruins,UCLA\nBruins,21.0,NOTED,LMU
3,2025-02-14 00:00:00,Lamar\nCardinals,17.0,CS Bakersfield\nRoadrunners,1.0,9.0,,CS Bakersfield\nRoadrunners,CS Bakersfield\nRoadrunners,135.0,CONFERENCE,USC
4,2025-02-14 00:00:00,Campbell\nCamels,1.0,UC Santa Barbara\nGauchos,6.0,9.0,,UC Santa Barbara\nGauchos,UC Santa Barbara\nGauchos,145.0,CONFERENCE,Fresno St
...,...,...,...,...,...,...,...,...,...,...,...,...
2151,,,,,,,,UC Irvine\nAnteaters,,,,
2152,,,,,,,,Long Beach St\n49ers,,,,
2153,,,,,,,,Texas A&M\nAggies,,,,
2154,,,,,,,,San Jose St\nSpartans,,,,


In [3]:
#Identify potential non-conference teams with a large amount of games played against Big West opponents
potential_nonconf_teams = list(nonconf['valid_nonconf_teams'].unique())
potential_nonconf_teams = potential_nonconf_teams[:-1].copy()
potential_nonconf_teams


['Pacific',
 'Santa Clara',
 'LMU',
 'USC',
 'Fresno St',
 'Utah Tech',
 'Nevada',
 'Pepperdine',
 'SDSU',
 'USD',
 'SJSU',
 'ASU',
 'San Francisco',
 'Oregon State']

In [4]:
potential_nonconf_teams = [
    'Pacific\nTigers', 
    'Santa Clara\nBroncos', 
    'San Francisco\nDons', 
    'San Jose St\nSpartans', 
    'Loy Marymount\nLions', 
    'USC\nTrojans',
    'Fresno St\nBulldogs',
    'Utah Tech\nRebels',
    'Nevada\nWolf Pack',
    'Pepperdine\nWaves',
    'San Diego St\nAztecs',
    'San Diego\nToreros',
    'Arizona St\nSun Devils',
    'San Francisco\nDons',
    'Oregon St\nBeavers',
    'UCLA\nBruins'
]
    

In [5]:
nonconf_gamelog = nonconf[(nonconf['away_team'].isin(potential_nonconf_teams) | nonconf['home_team'].isin(potential_nonconf_teams))][['Date', 'away_team', 'away_score', 'home_team', 'home_score', 'innings']].copy().reset_index(drop=True)
nonconf_gamelog.sample(5)

Unnamed: 0,Date,away_team,away_score,home_team,home_score,innings
325,2025-04-10 00:00:00,San Diego St\nAztecs,4.0,UC San Diego\nTritons,5.0,9.0
231,2025-04-23 00:00:00,USC\nTrojans,6.0,CS Fullerton\nTitans,4.0,9.0
42,2025-03-02 00:00:00,Fresno St\nBulldogs,2.0,UC Santa Barbara\nGauchos,1.0,9.0
119,2025-05-05 00:00:00,Oregon St\nBeavers,7.0,Hawaii\nRainbow Warriors,3.0,9.0
324,2025-04-04 00:00:00,San Diego\nToreros,5.0,UC San Diego\nTritons,2.0,9.0


In [7]:
nonconf_gamelog = nonconf_gamelog.replace({'UC San Diego\nTritons': 'UCSD',
                                           'CS Northridge\nMatadors': 'CSUN',
                                           'Pacific\nTigers' : 'Pacific',
                                           'Santa Clara\nBroncos': 'Santa Clara',
                                           'San Diego St\nAztecs': 'SDSU',
                                           'UC Santa Barbara\nGauchos': 'UCSB',
                                           'Oregon St\nBeavers': 'OSU',
                                           'UC Irvine\nAnteaters': 'UCI',
                                           'Fresno St\nBulldogs': 'Fresno St',
                                           'San Diego\nToreros': 'USD',
                                           'Arizona St\nSun Devils': 'ASU',
                                           'San Francisco\nDons': 'San Francisco',
                                           'San Jose St\nSpartans': 'SJSU',
                                           'Loy Marymount\nLions': 'LMU',
                                           'USC\nTrojans': 'USC',
                                           'Utah Tech\nRebels': 'Utah Tech',
                                           'Nevada\nWolf Pack': 'Nevada',
                                           'Pepperdine\nWaves': 'Pepperdine',
                                           'UC Riverside\nHighlanders': 'UCR',
                                           'Cal Baptist\nLancers': 'Cal Bap',
                                           'Cal Poly\nMustangs': 'Cal Poly',
                                           'Long Beach St\n49ers': 'LBSU',
                                           'CS Sacramento\nHornets': 'Sacramento State',
                                           'UCLA\nBruins': 'UCLA',
                                           'CS Bakersfield\nRoadrunners': 'Bakersfield',
                                           'CS Fullerton\nTitans': 'Fullerton',
                                           'Utah Valley\nWolverines': 'Utah Valley'
                                           }, regex=False)

new_nonconf = nonconf_gamelog[(~((nonconf_gamelog['away_team'] == 'UC Davis\nAggies') | (nonconf_gamelog['home_team'] == 'UC Davis\nAggies'))) & (~((nonconf_gamelog['away_team'] == 'Hawaii\nRainbow Warriors') | (nonconf_gamelog['home_team'] == 'Hawaii\nRainbow Warriors')))].reset_index(drop=True)
new_nonconf

Unnamed: 0,Date,away_team,away_score,home_team,home_score,innings
0,2025-02-14 00:00:00,Cal Poly,2.0,UCLA,3.0,9.0
1,2025-02-14 00:00:00,UCSD,1.0,Pacific,2.0,9.0
2,2025-02-14 00:00:00,CSUN,4.0,Santa Clara,5.0,9.0
3,2025-02-15 00:00:00,Cal Poly,2.0,UCLA,18.0,9.0
4,2025-02-15 00:00:00,UCSD,11.0,Pacific,4.0,9.0
...,...,...,...,...,...,...
319,2025-05-09 00:00:00,UCI,10.0,USD,2.0,9.0
320,2025-05-09 00:00:00,Fullerton,2.0,UCLA,10.0,9.0
321,2025-05-16 00:00:00,UCSD,5.0,SDSU,3.0,9.0
322,2025-05-16 00:00:00,Fresno St,6.0,Cal Poly,11.0,9.0


In [None]:
new_nonconf.to_csv('files/2025 Non-Conference Game Log.csv', index=False)

In [8]:
nonconf_runs = pd.DataFrame({
    'Team': ['Pacific','Santa Clara', 'San Francisco', 'UCLA', 'USC', 'LMU', 'OSU', 'ASU', 'SJSU', 'SDSU', 'USD', 'Pepperdine', 'Nevada', 'Utah Tech', 'Fresno St'],
    'R/GP': [4.133333333, 6.294117647, 6.333333333, 6.75, 6.730769231, 5.933333333, 8.333333333, 9.8, 4.416666667, 5.5, 5.153846154, 4.657142857, 7.076923077, 5.625, 6.125],
    'RA/GP': [6.266666667, 4.882352941, 5.166666667, 5.583333333, 4.692307692, 6.766666667, 4.888888889, 7.2, 9.416666667, 6.533333333, 6.730769231, 8.485714286, 8, 5.9375, 7.46875],
    'NCATT': [0.66765164, 1.016680153, 1.023014609, 1.090318202, 1.087211882, 0.95840316, 1.346071854, 1.582980501, 0.713418083, 0.888407424, 0.83249367, 0.752261871, 1.143125636, 0.908598502, 0.989362813],
    'NCDEF': [0.958991715, 0.747149365, 0.790658063, 0.85442081, 0.718066628, 1.035507011, 0.748149565, 1.101820268, 1.441038082, 0.999799873, 1.030013605, 1.298573888, 1.224244742, 0.908619145, 1.14294724]
})

nonconf_runs

Unnamed: 0,Team,R/GP,RA/GP,NCATT,NCDEF
0,Pacific,4.133333,6.266667,0.667652,0.958992
1,Santa Clara,6.294118,4.882353,1.01668,0.747149
2,San Francisco,6.333333,5.166667,1.023015,0.790658
3,UCLA,6.75,5.583333,1.090318,0.854421
4,USC,6.730769,4.692308,1.087212,0.718067
5,LMU,5.933333,6.766667,0.958403,1.035507
6,OSU,8.333333,4.888889,1.346072,0.74815
7,ASU,9.8,7.2,1.582981,1.10182
8,SJSU,4.416667,9.416667,0.713418,1.441038
9,SDSU,5.5,6.533333,0.888407,0.9998
