In [1]:
from collections import defaultdict
import itertools
import math
import random
import numpy as np
import pandas as pd
import multiprocessing

In [2]:
df = pd.read_csv("schedule.csv")
df

Unnamed: 0,date,home,score,away
0,2022-04-29,LA,2-1,NC
1,2022-04-30,POR,3-0,KC
2,2022-05-01,WAS,2-1,RGN
3,2022-05-01,HOU,0-1,SD
4,2022-05-01,ORL,0-3,NJNY
...,...,...,...,...
127,2022-10-01,WAS,,HOU
128,2022-10-01,LOU,,KC
129,2022-09-30,SD,,NC
130,2022-10-02,CHI,,LA


In [3]:
# Convert scores to tuples
df.loc[~df["score"].isnull(), "score"] = (
    df.loc[~df["score"].isnull(), "score"]
    .str.split("-")
    .apply(lambda x: (int(x[0]), int(x[1])))
)
df

Unnamed: 0,date,home,score,away
0,2022-04-29,LA,"(2, 1)",NC
1,2022-04-30,POR,"(3, 0)",KC
2,2022-05-01,WAS,"(2, 1)",RGN
3,2022-05-01,HOU,"(0, 1)",SD
4,2022-05-01,ORL,"(0, 3)",NJNY
...,...,...,...,...
127,2022-10-01,WAS,,HOU
128,2022-10-01,LOU,,KC
129,2022-09-30,SD,,NC
130,2022-10-02,CHI,,LA


In [4]:
def calc_table(matches):
    m = matches.copy()

    # Calculate home/away goals
    m["home_goals"] = m["score"].str[0]
    m["away_goals"] = m["score"].str[1]

    # Calculate home/away points
    m["home_points"] = np.where(m["home_goals"] > m["away_goals"], 3, 0) + np.where(
        ~m["home_goals"].isnull() * m["home_goals"] == m["away_goals"], 1, 0
    )
    m["away_points"] = np.where(m["home_goals"] < m["away_goals"], 3, 0) + np.where(
        ~m["home_goals"].isnull() * m["home_goals"] == m["away_goals"], 1, 0
    )

    # Calculate home/away wins
    m["home_wins"] = np.where(m["home_goals"] > m["away_goals"], 1, 0)
    m["away_wins"] = np.where(m["home_goals"] < m["away_goals"], 1, 0)

    # Combine home/away results
    results = pd.concat(
        [
            m[["home", "home_points", "home_goals", "away_goals", "home_wins"]].rename(
                columns={
                    "home": "team",
                    "home_points": "points",
                    "home_goals": "goals_for",
                    "away_goals": "goals_against",
                    "home_wins": "wins",
                }
            ),
            m[["away", "away_points", "away_goals", "home_goals", "away_wins"]].rename(
                columns={
                    "away": "team",
                    "away_points": "points",
                    "away_goals": "goals_for",
                    "home_goals": "goals_against",
                    "away_wins": "wins",
                }
            ),
        ]
    ).fillna(0)
    results[["goals_for", "goals_against"]] = results[
        ["goals_for", "goals_against"]
    ].apply(pd.to_numeric)

    # Calculate the table
    table = results.groupby(["team"]).agg(
        {"points": "sum", "goals_for": "sum", "goals_against": "sum", "wins": "sum"}
    )
    table["goals_diff"] = table["goals_for"] - table["goals_against"]
    table = table.reset_index()

    # Calculate tiebreakers for teams tied on points/goal differential/wins
    # - Goal differential
    # - Total wins
    # - Goals scored
    # - Head to head points
    # - Head to head goals scored
    table["tie_points"] = 0
    table["tie_goals"] = 0
    for index, row in table.iterrows():
        team = row["team"]
        tied = table[
            (table["team"] != team)
            & (table["points"] == row["points"])
            & (table["goals_diff"] == row["goals_diff"])
            & (table["wins"] == row["wins"])
        ].reset_index(drop=True)
        if len(tied.index) > 1:
            table.at[index, "tie_points"] = random.randint(0, 10)
        elif len(tied.index) == 1:
            opp = tied.iloc[0]["team"]
            subset = m[m["home"].isin([team, opp]) & m["away"].isin([team, opp])]
            for _, game in subset.iterrows():
                game = game.fillna(0)
                game[["home_goals", "away_goals"]] = game[
                    ["home_goals", "away_goals"]
                ].apply(pd.to_numeric)
                if game["home"] == team:
                    table.at[index, "tie_points"] += game["home_points"]
                    table.at[index, "tie_goals"] += game["home_goals"]
                else:
                    table.at[index, "tie_points"] += game["away_points"]
                    table.at[index, "tie_goals"] += game["away_goals"]

    # Sort final table
    table = table.sort_values(
        by=["points", "goals_diff", "wins", "goals_for", "tie_points", "tie_goals"],
        ascending=False,
    ).reset_index(drop=True)
    table.index = table.index + 1

    return table

In [5]:
# Current table
calc_table(df.dropna()).to_csv('table.csv')
calc_table(df.dropna())

Unnamed: 0,team,points,goals_for,goals_against,wins,goals_diff,tie_points,tie_goals
1,RGN,37,29,19,10,10,0,0
2,POR,35,43,21,9,22,0,0
3,SD,34,30,19,10,11,0,0
4,HOU,33,33,26,9,7,0,0
5,KC,33,26,28,9,-2,0,0
6,NC,31,46,33,9,13,0,0
7,CHI,30,32,25,8,7,0,0
8,LA,29,22,22,8,0,0,0
9,ORL,21,20,40,5,-20,0,0
10,WAS,19,25,28,3,-3,0,0


In [6]:
# Generate a list of all possible scores
scores = list([
    (0, 0),
    #(3, 3),
    (1, 0),
    (0, 1),
    #(3, 0),
    #(0, 3),
])

In [7]:
# Count the number of remaining games
remaining = sum(df["score"].isna())
remaining

10

In [8]:
# Calculate score permutations
ranks = empty = pd.DataFrame(
    index=df["home"].unique(), columns=range(1, len(df["home"].unique()) + 1)
).fillna(0)

def process_combination(comb):
    df2 = df.copy()
    df2.loc[df2["score"].isnull(), "score"] = comb
    rank = calc_table(df2)
    return rank["team"]

p = multiprocessing.Pool(16)
results = p.imap_unordered(process_combination, itertools.product(scores, repeat=remaining))
for result in results:
    for i, team in enumerate(result):
        ranks.at[team, i+1] += 1

ranks

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
LA,0,0,216,2106,3807,8370,11178,33372,0,0,0,0
POR,22545,17766,11799,5238,1431,270,0,0,0,0,0,0
WAS,0,0,0,0,0,0,0,0,16200,33210,9639,0
HOU,0,324,4536,12933,18009,17172,5859,216,0,0,0,0
ORL,0,0,0,0,0,0,0,0,40419,16119,2511,0
CHI,0,486,2187,5130,9693,12069,12285,17199,0,0,0,0
RGN,23328,22356,12150,1215,0,0,0,0,0,0,0,0
KC,2592,5076,10692,16524,11772,8910,3240,243,0,0,0,0
SD,10584,13041,16335,10476,6075,2214,324,0,0,0,0,0
LOU,0,0,0,0,0,0,0,0,2430,9720,46899,0


In [9]:
# Write to csv
ranks.to_csv('possibilities.csv')