In [118]:
from collections import defaultdict
import itertools
import math
import random
import numpy as np
import pandas as pd
import multiprocessing

In [119]:
df = pd.read_csv("schedule.csv")
df

Unnamed: 0,date,home,score,away
0,2022-04-29,LA,2-1,NC
1,2022-04-30,POR,3-0,KC
2,2022-05-01,WAS,2-1,RGN
3,2022-05-01,HOU,0-1,SD
4,2022-05-01,ORL,0-3,NJNY
...,...,...,...,...
127,2022-10-01,WAS,,HOU
128,2022-10-01,LOU,,KC
129,2022-09-30,SD,,NC
130,2022-10-02,CHI,,LA


In [120]:
# Convert scores to tuples
df.loc[~df["score"].isnull(), "score"] = (
    df.loc[~df["score"].isnull(), "score"]
    .str.split("-")
    .apply(lambda x: (int(x[0]), int(x[1])))
)
df

Unnamed: 0,date,home,score,away
0,2022-04-29,LA,"(2, 1)",NC
1,2022-04-30,POR,"(3, 0)",KC
2,2022-05-01,WAS,"(2, 1)",RGN
3,2022-05-01,HOU,"(0, 1)",SD
4,2022-05-01,ORL,"(0, 3)",NJNY
...,...,...,...,...
127,2022-10-01,WAS,,HOU
128,2022-10-01,LOU,,KC
129,2022-09-30,SD,,NC
130,2022-10-02,CHI,,LA


In [121]:
def calc_table(matches):
    m = matches.copy()

    # Calculate home/away goals
    m["home_goals"] = m["score"].str[0]
    m["away_goals"] = m["score"].str[1]

    # Calculate home/away points
    m["home_points"] = np.where(m["home_goals"] > m["away_goals"], 3, 0) + np.where(
        ~m["home_goals"].isnull() * m["home_goals"] == m["away_goals"], 1, 0
    )
    m["away_points"] = np.where(m["home_goals"] < m["away_goals"], 3, 0) + np.where(
        ~m["home_goals"].isnull() * m["home_goals"] == m["away_goals"], 1, 0
    )

    # Calculate home/away wins
    m["home_wins"] = np.where(m["home_goals"] > m["away_goals"], 1, 0)
    m["away_wins"] = np.where(m["home_goals"] < m["away_goals"], 1, 0)

    # Combine home/away results
    results = pd.concat(
        [
            m[["home", "home_points", "home_goals", "away_goals", "home_wins"]].rename(
                columns={
                    "home": "team",
                    "home_points": "points",
                    "home_goals": "goals_for",
                    "away_goals": "goals_against",
                    "home_wins": "wins",
                }
            ),
            m[["away", "away_points", "away_goals", "home_goals", "away_wins"]].rename(
                columns={
                    "away": "team",
                    "away_points": "points",
                    "away_goals": "goals_for",
                    "home_goals": "goals_against",
                    "away_wins": "wins",
                }
            ),
        ]
    ).fillna(0)
    results[["goals_for", "goals_against"]] = results[
        ["goals_for", "goals_against"]
    ].apply(pd.to_numeric)

    # Calculate the table
    table = results.groupby(["team"]).agg(
        {"points": "sum", "goals_for": "sum", "goals_against": "sum", "wins": "sum"}
    )
    table["goals_diff"] = table["goals_for"] - table["goals_against"]
    table = table.reset_index()

    # Calculate tiebreakers for teams tied on points/goal differential/wins
    # - Goal differential
    # - Total wins
    # - Goals scored
    # - Head to head points
    # - Head to head goals scored
    table["tie_points"] = 0
    table["tie_goals"] = 0
    for index, row in table.iterrows():
        team = row["team"]
        tied = table[
            (table["team"] != team)
            & (table["points"] == row["points"])
            & (table["goals_diff"] == row["goals_diff"])
            & (table["wins"] == row["wins"])
        ].reset_index(drop=True)
        if len(tied.index) > 1:
            table.at[index, "tie_points"] = random.randint(0, 10)
        elif len(tied.index) == 1:
            opp = tied.iloc[0]["team"]
            subset = m[m["home"].isin([team, opp]) & m["away"].isin([team, opp])]
            for _, game in subset.iterrows():
                game = game.fillna(0)
                game[["home_goals", "away_goals"]] = game[
                    ["home_goals", "away_goals"]
                ].apply(pd.to_numeric)
                if game["home"] == team:
                    table.at[index, "tie_points"] += game["home_points"]
                    table.at[index, "tie_goals"] += game["home_goals"]
                else:
                    table.at[index, "tie_points"] += game["away_points"]
                    table.at[index, "tie_goals"] += game["away_goals"]

    # Sort final table
    table = table.sort_values(
        by=["points", "goals_diff", "wins", "goals_for", "tie_points", "tie_goals"],
        ascending=False,
    ).reset_index(drop=True)
    table.index = table.index + 1

    return table

In [122]:
# Current table
calc_table(df.dropna()).to_csv('table.csv')
calc_table(df.dropna())

Unnamed: 0,team,points,goals_for,goals_against,wins,goals_diff,tie_points,tie_goals
1,KC,33,25,27,9,-2,0,0
2,POR,32,39,20,8,19,0,0
3,SD,32,29,19,9,10,0,0
4,HOU,31,32,24,8,8,4,4
5,CHI,31,32,24,8,8,1,1
6,RGN,29,24,18,7,6,0,0
7,LA,27,20,20,7,0,0,0
8,NC,26,39,31,7,8,0,0
9,ORL,22,20,35,5,-15,0,0
10,WAS,17,22,26,2,-4,0,0


In [123]:
# Generate a list of all possible scores
lim = 2
scores = list(
    map(
        lambda x: (x[0], x[1]),
        list(itertools.product(range(0, lim), range(0, lim))),
    )
)
scores

[(0, 0), (0, 1), (1, 0), (1, 1)]

In [124]:
# Count the number of remaining games
remaining = sum(df["score"].isna())
remaining

16

In [8]:
# Calculate score permutations
empty = pd.DataFrame(
    index=df["home"].unique(), columns=range(1, len(df["home"].unique()) + 1)
).fillna(0)
ranks = empty.copy()

def process_combination(comb):
    temp = empty.copy()
    df2 = df.copy()
    df2.loc[df2["score"].isnull(), "score"] = comb
    rank = calc_table(df2)
    for i, row in rank.iterrows():
        temp.at[rank.at[i, "team"], i] += 1
    return temp

p = multiprocessing.Pool(16)
results = p.imap(process_combination, itertools.combinations_with_replacement(scores, remaining))
for result in results:
    ranks = ranks.add(result)

ranks

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
LA,0,0,0,40,10,113,456,350,0,0,0,0
POR,336,262,112,119,116,24,0,0,0,0,0,0
WAS,0,0,0,0,0,0,0,0,221,671,77,0
HOU,21,66,221,254,166,226,15,0,0,0,0,0
ORL,0,0,0,0,0,0,0,0,736,200,33,0
CHI,65,221,120,86,317,115,35,10,0,0,0,0
RGN,6,40,130,139,75,405,132,42,0,0,0,0
KC,217,290,95,159,159,49,0,0,0,0,0,0
SD,324,90,288,146,100,21,0,0,0,0,0,0
LOU,0,0,0,0,0,0,0,0,12,95,727,135


In [126]:
# Write to csv
ranks.to_csv('possibilities-' + str(lim) + '.csv')