In [1]:
from collections import defaultdict
import itertools
import numpy as np
import pandas as pd

In [110]:
table = pd.DataFrame(
    [
        ["Colombia", "A", 3, 4, 3],
        ["France", "A", 3, 4, 4],
        ["Canada", "A", 0, 4, 2],
        ["New Zealand", "A", 0, 1, 4],
        ["United States", "B", 6, 7, 1],
        ["Germany", "B", 3, 4, 4],
        ["Australia", "B", 3, 6, 8],
        ["Zambia", "B", 0, 5, 9],
        ["Spain", "C", 6, 3, 1],
        ["Japan", "C", 3, 3, 3],
        ["Brazil", "C", 3, 2, 2],
        ["Nigeria", "C", 0, 0, 2],
    ],
    columns=["team", "group", "points", "gf", "ga"]
)
table = table.set_index("team")
table

Unnamed: 0_level_0,group,points,gf,ga
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Colombia,A,3,4,3
France,A,3,4,4
Canada,A,0,4,2
New Zealand,A,0,1,4
United States,B,6,7,1
Germany,B,3,4,4
Australia,B,3,6,8
Zambia,B,0,5,9
Spain,C,6,3,1
Japan,C,3,3,3


In [111]:
schedule = pd.DataFrame(
    [
        ["Japan", "Nigeria"],
        ["Brazil", "Spain"],
        ["Zambia", "Germany"],
        ["Australia", "United States"],
        ["New Zealand", "France"],
        ["Colombia", "Canada"],
    ],
    columns=["home", "away"],
)
schedule

Unnamed: 0,home,away
0,Japan,Nigeria
1,Brazil,Spain
2,Zambia,Germany
3,Australia,United States
4,New Zealand,France
5,Colombia,Canada


In [112]:
# Return a list of teams that advance based on the table
def calc_advance(t):
    table = t.copy()
    table["gd"] = table["gf"] - table["ga"]
    table = table.sort_values(["points", "gd", "gf"], ascending=False)

    res = []
    for g in ["A", "B", "C"]:
        res = res + list(table[table["group"] == g].index.values[0:2])
    
    # Remove all 1 and 2 seeds
    table = table[~table.index.isin(res)]
    
    # Add the top 3rd seeded team
    third_group = table.iloc[0]["group"]
    res = res + list(table.index.values[0:1])
    
    # Add the next best 3rd seeded team from other groups
    table = table[~(table["group"] == third_group)]
    res = res + list(table.index.values[0:1])
    
    return res

In [113]:
def update_table(scores):
    results = schedule.copy()
    
    results["home_score"] = list(zip(*scores))[0]
    results["away_score"] = list(zip(*scores))[1]
    
    new_table = table.copy()
    for i, row in results.iterrows():
        new_table.at[row["home"], "gf"] += row["home_score"] 
        new_table.at[row["away"], "gf"] += row["away_score"] 
        new_table.at[row["home"], "ga"] += row["away_score"] 
        new_table.at[row["away"], "ga"] += row["home_score"] 
        
        if row["home_score"] == row["away_score"]:
            new_table.at[row["home"], "points"] += 1
            new_table.at[row["away"], "points"] += 1
        elif row["home_score"] > row["away_score"]:
            new_table.at[row["home"], "points"] += 3
        else:
            new_table.at[row["away"], "points"] += 3
    return new_table

In [125]:
# List of all possible scores to assign to games
val = 8
scores = list([
    (0, 0),
    (val, 0),
    (0, val),
])

In [126]:
# calculate who advances based on all the results
poss = pd.DataFrame(itertools.product(scores, repeat=6))
poss["advance"] = poss.apply(
    lambda row: calc_advance(update_table(list(row[0:6]))), axis=1
)
poss

Unnamed: 0,0,1,2,3,4,5,advance
0,"(0, 0)","(0, 0)","(0, 0)","(0, 0)","(0, 0)","(0, 0)","[Colombia, France, United States, Germany, Spa..."
1,"(0, 0)","(0, 0)","(0, 0)","(0, 0)","(0, 0)","(8, 0)","[Colombia, France, United States, Germany, Spa..."
2,"(0, 0)","(0, 0)","(0, 0)","(0, 0)","(0, 0)","(0, 8)","[France, Canada, United States, Germany, Spain..."
3,"(0, 0)","(0, 0)","(0, 0)","(0, 0)","(8, 0)","(0, 0)","[Colombia, New Zealand, United States, Germany..."
4,"(0, 0)","(0, 0)","(0, 0)","(0, 0)","(8, 0)","(8, 0)","[Colombia, New Zealand, United States, Germany..."
...,...,...,...,...,...,...,...
724,"(0, 8)","(0, 8)","(0, 8)","(0, 8)","(8, 0)","(8, 0)","[Colombia, New Zealand, United States, Germany..."
725,"(0, 8)","(0, 8)","(0, 8)","(0, 8)","(8, 0)","(0, 8)","[Canada, New Zealand, United States, Germany, ..."
726,"(0, 8)","(0, 8)","(0, 8)","(0, 8)","(0, 8)","(0, 0)","[France, Colombia, United States, Germany, Spa..."
727,"(0, 8)","(0, 8)","(0, 8)","(0, 8)","(0, 8)","(8, 0)","[Colombia, France, United States, Germany, Spa..."


In [127]:
# Split into country specific columns
for country in table.index:
    poss[country] = poss["advance"].str.contains(country, regex=False)
poss

Unnamed: 0,0,1,2,3,4,5,advance,Colombia,France,Canada,New Zealand,United States,Germany,Australia,Zambia,Spain,Japan,Brazil,Nigeria
0,"(0, 0)","(0, 0)","(0, 0)","(0, 0)","(0, 0)","(0, 0)","[Colombia, France, United States, Germany, Spa...",True,True,False,False,True,True,True,False,True,True,True,False
1,"(0, 0)","(0, 0)","(0, 0)","(0, 0)","(0, 0)","(8, 0)","[Colombia, France, United States, Germany, Spa...",True,True,False,False,True,True,True,False,True,True,True,False
2,"(0, 0)","(0, 0)","(0, 0)","(0, 0)","(0, 0)","(0, 8)","[France, Canada, United States, Germany, Spain...",False,True,True,False,True,True,True,False,True,True,True,False
3,"(0, 0)","(0, 0)","(0, 0)","(0, 0)","(8, 0)","(0, 0)","[Colombia, New Zealand, United States, Germany...",True,False,False,True,True,True,True,False,True,True,True,False
4,"(0, 0)","(0, 0)","(0, 0)","(0, 0)","(8, 0)","(8, 0)","[Colombia, New Zealand, United States, Germany...",True,False,False,True,True,True,True,False,True,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
724,"(0, 8)","(0, 8)","(0, 8)","(0, 8)","(8, 0)","(8, 0)","[Colombia, New Zealand, United States, Germany...",True,True,False,True,True,True,False,False,True,True,False,True
725,"(0, 8)","(0, 8)","(0, 8)","(0, 8)","(8, 0)","(0, 8)","[Canada, New Zealand, United States, Germany, ...",True,False,True,True,True,True,False,False,True,True,False,True
726,"(0, 8)","(0, 8)","(0, 8)","(0, 8)","(0, 8)","(0, 0)","[France, Colombia, United States, Germany, Spa...",True,True,False,False,True,True,True,False,True,True,False,True
727,"(0, 8)","(0, 8)","(0, 8)","(0, 8)","(0, 8)","(8, 0)","[Colombia, France, United States, Germany, Spa...",True,True,False,False,True,True,True,False,True,True,False,True


In [128]:
# Write to file
poss = poss.drop(columns=["advance"])
poss.columns = list(schedule["home"] + "-" + schedule["away"]) + list(poss.columns[6:])
poss.to_csv("olympics_groups_" + str(val) + ".csv")

In [129]:
# Calculate how many ways each team advances
poss[poss.columns[7:]].sum()

France           576
Canada           243
New Zealand      243
United States    729
Germany          537
Australia        558
Zambia           243
Spain            729
Japan            532
Brazil           578
Nigeria          243
dtype: int64