In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

status_codes = requests.status_codes.codes

### The Pythagorean Theorem of Baseball

> "The Pythagorean Theorem of Baseball is a creation of Bill James which relates the number of runs a team has scored and surrendered to its actual winning percentage, based on the idea that runs scored compared to runs allowed is a better indicator of a team's (future) performance than a team's actual winning percentage." - [Pythagorean Theorem of Baseball - Baseball Reference](https://www.baseball-reference.com/bullpen/Pythagorean_Theorem_of_Baseball)

In [2]:
def pythagorean(rs, ra, exp):
    rate = (rs / ra) ** exp
    return round(rate / (rate + 1), 3)

def parse_standings_table(content):
    teams = []
    stats = []

    standings = BeautifulSoup(content).select('.standings__table')
    for standing in standings:
        rows = standing.select('tbody.Table__TBODY')

        for row in rows[0].select('tr'):
            if 'subgroup-headers' in row.attrs['class']:
                continue

            teams.append(row.select('td .AnchorLink abbr')[0].attrs['title'])

        for row in rows[1].select('tr'):
            if 'subgroup-headers' in row.attrs['class']:
                continue

            columns = row.select('td')

            pct = float(columns[2].text)
            rs = int(columns[6].text)
            ra = int(columns[7].text)

            py_2 = pythagorean(rs, ra, 2)
            py_181 = pythagorean(rs, ra, 1.81)
            py_183 = pythagorean(rs, ra, 1.83)

            diff_2 = round(pct - py_2, 3)
            diff_181 = round(pct - py_181, 3)
            diff_183 = round(pct - py_183, 3)

            stats.append({ 'rs': rs, 'ra': ra, 'wpct': pct, 'py_2': py_2, 'py_181': py_181, 'py_183': py_183, 'diff_py_2': diff_2, 'diff_py_181': diff_181, 'diff_py_183': diff_183 })

    data = dict(zip(teams, stats))
    return pd.DataFrame(data).T

In [3]:
response = requests.get('https://www.espn.com/mlb/standings')

if response.status_code != status_codes['ok']:
    raise Exception('no!!!!')

df = parse_standings_table(response.content)

In [4]:
df

Unnamed: 0,rs,ra,wpct,py_2,py_181,py_183,diff_py_2,diff_py_181,diff_py_183
New York Yankees,807.0,567.0,0.611,0.67,0.654,0.656,-0.059,-0.043,-0.045
Toronto Blue Jays,775.0,679.0,0.568,0.566,0.56,0.56,0.002,0.008,0.008
Tampa Bay Rays,666.0,614.0,0.531,0.541,0.537,0.537,-0.01,-0.006,-0.006
Baltimore Orioles,674.0,688.0,0.512,0.49,0.491,0.491,0.022,0.021,0.021
Boston Red Sox,735.0,787.0,0.481,0.466,0.469,0.469,0.015,0.012,0.012
Cleveland Guardians,698.0,634.0,0.568,0.548,0.543,0.544,0.02,0.025,0.024
Chicago White Sox,686.0,717.0,0.5,0.478,0.48,0.48,0.022,0.02,0.02
Minnesota Twins,696.0,684.0,0.481,0.509,0.508,0.508,-0.028,-0.027,-0.027
Detroit Tigers,557.0,713.0,0.407,0.379,0.39,0.389,0.028,0.017,0.018
Kansas City Royals,640.0,810.0,0.401,0.384,0.395,0.394,0.017,0.006,0.007
