In [3]:
from collections import defaultdict
import itertools
import math
import random
import numpy as np
import pandas as pd
import multiprocessing

In [12]:
df = pd.read_csv("schedule.csv")
df

Unnamed: 0,date,home,score,away
0,2022-04-29,LA,2-1,NC
1,2022-04-30,POR,3-0,KC
2,2022-05-01,WAS,2-1,RGN
3,2022-05-01,HOU,0-1,SD
4,2022-05-01,ORL,0-3,NJNY
...,...,...,...,...
127,2022-10-01,NJNY,,POR
128,2022-10-01,WAS,,HOU
129,2022-10-01,LOU,,KC
130,2022-10-01,RGN,,ORL


In [13]:
# Convert scores to tuples
df.loc[~df["score"].isnull(), "score"] = (
    df.loc[~df["score"].isnull(), "score"]
    .str.split("-")
    .apply(lambda x: (int(x[0]), int(x[1])))
)
df["home_goals"] = df["score"].str[0]
df["away_goals"] = df["score"].str[1]
df

Unnamed: 0,date,home,score,away,home_goals,away_goals
0,2022-04-29,LA,"(2, 1)",NC,2.0,1.0
1,2022-04-30,POR,"(3, 0)",KC,3.0,0.0
2,2022-05-01,WAS,"(2, 1)",RGN,2.0,1.0
3,2022-05-01,HOU,"(0, 1)",SD,0.0,1.0
4,2022-05-01,ORL,"(0, 3)",NJNY,0.0,3.0
...,...,...,...,...,...,...
127,2022-10-01,NJNY,,POR,,
128,2022-10-01,WAS,,HOU,,
129,2022-10-01,LOU,,KC,,
130,2022-10-01,RGN,,ORL,,


In [70]:
full_tiebreak = False


def calc_table(matches):
    m = matches.copy().dropna()

    # Calculate home/away wins
    m["home_wins"] = np.where(m["home_goals"] > m["away_goals"], 1, 0)
    m["away_wins"] = np.where(m["home_goals"] < m["away_goals"], 1, 0)

    # Calculate home/away points
    m["home_points"] = 3 * m["home_wins"] + np.where(
        m["home_goals"] == m["away_goals"], 1, 0
    )
    m["away_points"] = 3 * m["away_wins"] + np.where(
        m["home_goals"] == m["away_goals"], 1, 0
    )

    # Combine home/away results
    results = pd.concat(
        [
            m[["home", "home_points", "home_goals", "away_goals", "home_wins"]].rename(
                columns={
                    "home": "team",
                    "home_points": "points",
                    "home_goals": "goals_for",
                    "away_goals": "goals_against",
                    "home_wins": "wins",
                }
            ),
            m[["away", "away_points", "away_goals", "home_goals", "away_wins"]].rename(
                columns={
                    "away": "team",
                    "away_points": "points",
                    "away_goals": "goals_for",
                    "home_goals": "goals_against",
                    "away_wins": "wins",
                }
            ),
        ]
    )

    # Calculate the table
    table = results.groupby(["team"]).agg(
        {"points": "sum", "goals_for": "sum", "goals_against": "sum", "wins": "sum"}
    )
    table["goals_diff"] = table["goals_for"] - table["goals_against"]

    if full_tiebreak:
        # Calculate tiebreakers for teams tied on points/goal differential/wins
        # - Goal differential
        # - Total wins
        # - Goals scored
        # - Head to head points
        # - Head to head goals scored
        table["tie_points"] = 0
        table["tie_goals"] = 0
        for index, row in table.iterrows():
            team = row["team"]
            tied = table[
                (table["team"] != team)
                & (table["points"] == row["points"])
                & (table["goals_diff"] == row["goals_diff"])
                & (table["wins"] == row["wins"])
            ].reset_index(drop=True)
            if len(tied.index) > 1:
                table.at[index, "tie_points"] = random.randint(0, 10)
            elif len(tied.index) == 1:
                opp = tied.iloc[0]["team"]
                subset = m[m["home"].isin([team, opp]) & m["away"].isin([team, opp])]
                for _, game in subset.iterrows():
                    game = game.fillna(0)
                    game[["home_goals", "away_goals"]] = game[
                        ["home_goals", "away_goals"]
                    ].apply(pd.to_numeric)
                    if game["home"] == team:
                        table.at[index, "tie_points"] += game["home_points"]
                        table.at[index, "tie_goals"] += game["home_goals"]
                    else:
                        table.at[index, "tie_points"] += game["away_points"]
                        table.at[index, "tie_goals"] += game["away_goals"]

        # Sort final table
        table = table.sort_values(
            by=["points", "goals_diff", "wins", "goals_for", "tie_points", "tie_goals"],
            ascending=False,
        ).reset_index(drop=True)
        table.index = table.index + 1
    else:
        # Sort final table
        table = table.sort_values(
            by=["points", "goals_diff", "wins", "goals_for"],
            ascending=False,
        ).reset_index()
        table.index = table.index + 1
        
        prev = table.iloc[0][["points", "goals_diff", "wins", "goals_for"]]
        for i in range(1, len(table.index)):
            cur = table.iloc[i][["points", "goals_diff", "wins", "goals_for"]]
            if (prev == cur).all():
                idx_list = table.index.tolist()
                idx_list[i] = idx_list[i-1]
                table.index = idx_list
            prev = cur

    return table

In [7]:
# Timing calculations
import time

In [10]:
# Original
start = time.time()
for i in range(0, 100):
    calc_table(df.dropna())
end = time.time()

print(end-start)

3.3795735836029053


In [72]:
# Remove home goals
start = time.time()
for i in range(0, 100):
    calc_table(df.dropna())
end = time.time()

print(end-start)

2.7381861209869385


In [71]:
# Current table
calc_table(df.dropna()).to_csv('table.csv')
calc_table(df.dropna())

Unnamed: 0,team,points,goals_for,goals_against,wins,goals_diff
1,POR,38,46.0,21.0,10,25.0
2,RGN,37,29.0,19.0,10,10.0
3,KC,36,29.0,28.0,10,1.0
4,SD,35,32.0,21.0,10,11.0
5,HOU,33,33.0,26.0,9,7.0
6,NC,31,46.0,33.0,9,13.0
7,CHI,30,32.0,28.0,8,4.0
8,LA,29,23.0,25.0,8,-2.0
9,ORL,22,22.0,42.0,5,-20.0
10,LOU,20,22.0,35.0,4,-13.0


In [37]:
# Look at relative score frequencies
df['score'].value_counts()

(2, 2)    16
(1, 0)    15
(0, 1)    13
(1, 1)    11
(1, 2)    10
(2, 1)     8
(0, 0)     7
(3, 0)     6
(0, 2)     5
(0, 3)     5
(2, 0)     4
(2, 3)     3
(4, 0)     3
(1, 3)     2
(3, 1)     2
(2, 4)     2
(0, 4)     2
(4, 1)     2
(5, 0)     2
(3, 4)     2
(3, 3)     2
(3, 2)     1
(5, 1)     1
(4, 3)     1
(6, 0)     1
Name: score, dtype: int64

In [38]:
# Generate a list of all possible scores
scores = list([
    (0, 0),
    (2, 2),
    (1, 0),
    (0, 1),
    (4, 0),
    (0, 4),
])

In [39]:
# Count the number of remaining games
remaining = sum(df["score"].isna())
remaining

6

In [40]:
# Calculate score permutations
ranks = empty = pd.DataFrame(
    index=df["home"].unique(), columns=range(1, len(df["home"].unique()) + 1)
).fillna(0)

def process_combination(comb):
    df2 = df.copy()
    df2.loc[df2["score"].isnull(), "score"] = comb
    rank = calc_table(df2)
    return rank["team"]

p = multiprocessing.Pool(16)
results = p.imap_unordered(process_combination, itertools.product(scores, repeat=remaining))
for result in results:
    for i, team in enumerate(result):
        ranks.at[team, i+1] += 1

ranks

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
LA,0,0,0,0,0,5184,10368,31104,0,0,0,0
POR,32832,12096,1728,0,0,0,0,0,0,0,0,0
WAS,0,0,0,0,0,0,0,0,3456,13824,29376,0
HOU,0,0,2592,7776,23328,11664,1296,0,0,0,0,0
ORL,0,0,0,0,0,0,0,0,32832,12096,1728,0
CHI,0,0,0,0,2592,9072,19440,15552,0,0,0,0
RGN,10368,19008,13824,3456,0,0,0,0,0,0,0,0
KC,3456,8640,17280,13824,3456,0,0,0,0,0,0,0
SD,0,6912,11232,21600,6912,0,0,0,0,0,0,0
LOU,0,0,0,0,0,0,0,0,10368,20736,15552,0


In [41]:
# Write to csv
ranks.to_csv('possibilities.csv')