In [1]:
import numpy as np
import pandas as pd
import sys
from scipy.special import comb

NBA_YEARS = range(1950, 2021)

In [12]:
def generate_collapse_scores_by_year(league, year, flat=True):
    df = pd.read_csv("data/" + league + "/" + str(year) + ".csv")
    
    season = {}
    
    for index, row in df.iterrows():
        home_team = row["home_team"]
        away_team = row["away_team"]
        home_win = row["home_team_score"] > row["away_team_score"]
               
        season[home_team] = np.append(season.get(home_team, np.array([])), home_win)
        season[away_team] = np.append(season.get(away_team, np.array([])), not home_win)
        
    # remove playoffs by finding minimum games played and only retaining those    
    
    regular_season_length = sys.maxsize
    
    for team in season:
        num_team_games = len(season[team])
        if num_team_games < regular_season_length:
            regular_season_length = num_team_games
            
    for team in season:
        season[team] = season[team][:regular_season_length]
        
    season_end_length = 10 if flat else round(regular_season_length / 2)
        
    # calculate collapse scores    
        
    collapse_scores = {}
    
    for team in season:
        games = season[team]
        
        win_pct = np.sum(games) / len(games)
        season_end_wins = int(np.sum(games[-season_end_length:]))
        
        probability = 0
        
        for wins in range(0, season_end_wins + 1):
            temp = comb(season_end_length, wins) * win_pct ** wins * (1 - win_pct) ** (season_end_length - wins)
            probability += temp
            
        collapse_scores[team] = 1 / probability
        
    return collapse_scores

def generate_collapse_scores(league, years):
    collapse_scores = {}
    team_years = {}
    
    for year in years:
        year_collapse_scores = generate_collapse_scores_by_year(league, year)
        
        for team in year_collapse_scores:
            collapse_scores[team] = np.append(collapse_scores.get(team, np.array([])), year_collapse_scores[team])
            team_years[team] = np.append(team_years.get(team, np.array([])), year_collapse_scores[team])
            
    return collapse_scores

In [13]:
generate_collapse_scores("nba", NBA_YEARS)

{'TRI-CITIES BLACKHAWKS': array([1.14161241e+00, 1.95000000e+03, 1.12425107e+00, 1.95100000e+03]),
 'DENVER NUGGETS': array([7.05042235e+00, 1.95000000e+03, 1.68644852e+00, 1.97700000e+03,
        1.52971419e+00, 1.97800000e+03, 1.03340435e+00, 1.97900000e+03,
        2.12288125e+00, 1.98000000e+03, 1.02869198e+00, 1.98100000e+03,
        4.18911933e+00, 1.98200000e+03, 1.35770084e+00, 1.98300000e+03,
        2.12944174e+00, 1.98400000e+03, 1.89056612e+00, 1.98500000e+03,
        4.66088440e+00, 1.98600000e+03, 1.11534569e+00, 1.98700000e+03,
        1.10481977e+00, 1.98800000e+03, 3.43858484e+00, 1.98900000e+03,
        1.27357489e+00, 1.99000000e+03, 3.87535977e+00, 1.99100000e+03,
        6.20969569e+00, 1.99200000e+03, 1.31356619e+00, 1.99300000e+03,
        1.23856498e+00, 1.99400000e+03, 1.20754717e+00, 1.99500000e+03,
        1.77116093e+00, 1.99600000e+03, 4.33711280e+00, 1.99700000e+03,
        1.16336390e+00, 1.99800000e+03, 2.28399653e+00, 1.99900000e+03,
        1.08401214e