In [None]:
import sys
!{sys.executable} -m pip install numpy pandas

In [2]:
import pandas as pd
import numpy as np
from numpy import random

In [3]:
# 2022 season data from baseball-reference.com

lad_avg_rs = 5.23
lad_avg_ra = 3.17

sdp_avg_rs = 4.35
sdp_avg_ra = 4.07

lg_avg_rs = 4.28
lg_avg_ra = 4.28

In [4]:
# pythagorean win expectancy
# retrospectively estimates season record based on runs scored and runs allowed
def pythagorean_win_expectancy(avg_rs, avg_ra):
    win_pct = (avg_rs ** 2) / (avg_rs ** 2 + avg_ra ** 2)
    wins = int(win_pct * 162 + 0.5)
    losses = 162 - wins

    return (win_pct, wins, losses)

In [5]:
print('Pythagorean win expectancy')
print('-' * 20)

lad_win_pct, lad_wins, lad_losses = pythagorean_win_expectancy(lad_avg_rs, lad_avg_ra)
print(f'LAD: {lad_wins}-{lad_losses} ({round(lad_win_pct, 3)})')

sdp_win_pct, sdp_wins, sdp_losses = pythagorean_win_expectancy(sdp_avg_rs, sdp_avg_ra)
print(f'SDP: {sdp_wins}-{sdp_losses} ({round(sdp_win_pct, 3)})')

Pythagorean win expectancy
--------------------
LAD: 118-44 (0.731)
SDP: 86-76 (0.533)


In [6]:
# poisson distribution
# predicts season record using poisson distribution of runs scored and runs allowed
# assuming every game is vs. a league average team
def poisson_distribution(avg_rs, avg_ra, opp_avg_rs, opp_avg_ra):
    opp_aggr_rs = random.poisson(lam=opp_avg_rs, size=162)
    opp_aggr_ra = random.poisson(lam=opp_avg_ra, size=162)
    
    
    aggr_rs = random.poisson(lam=avg_rs, size=162)
    aggr_ra = random.poisson(lam=avg_ra, size=162)
    
    wins = 0
    losses = 0
    
    for i in range(0, 162):
        runs_scored = (aggr_rs[i] + opp_aggr_ra[i]) / 2
        runs_allowed = (aggr_ra[i] + opp_aggr_rs[i]) / 2
    
        if runs_scored >= runs_allowed: # what to do with tied scores?
            wins += 1
        else:
            losses += 1
    
    win_pct = wins / 162

    return (win_pct, wins, losses)

In [9]:
print('Poisson distribution')
print('-' * 20)

lad_win_pct, lad_wins, lad_losses = poisson_distribution(lad_avg_rs, lad_avg_ra, lg_avg_rs, lg_avg_ra)
print(f'LAD: {lad_wins}-{lad_losses} ({round(lad_win_pct, 3)})')

sdp_win_pct, sdp_wins, sdp_losses = poisson_distribution(sdp_avg_rs, sdp_avg_ra, lg_avg_rs, lg_avg_ra)
print(f'SDP: {sdp_wins}-{sdp_losses} ({round(sdp_win_pct, 3)})')

Poisson distribution
--------------------
LAD: 115-47 (0.71)
SDP: 85-77 (0.525)
