In [1]:
import pandas as pd
from collections import defaultdict

In [2]:
df = pd.read_csv("minutes.csv")
df["minutes"] = df["exit"] - df["enter"]
df

Unnamed: 0,date,player,enter,exit,minutes
0,2021-09-16,Adriana Franch,0,90,90
1,2021-09-16,Crystal Dunn,0,63,63
2,2021-09-16,Tierna Davidson,0,90,90
3,2021-09-16,Becky Sauerbrunn,0,72,72
4,2021-09-16,Kelley O'Hara,0,90,90
...,...,...,...,...,...
508,2023-04-11,Julie Ertz,46,90,44
509,2023-04-11,Ashley Hatch,46,90,44
510,2023-04-11,Casey Krueger,46,90,44
511,2023-04-11,Kristie Mewis,60,90,30


In [3]:
# Assert that all matches have 990 minutes
sum(df.groupby("date").sum("minutes")["minutes"] != 990)

0

In [4]:
df_mins = (
    df.groupby("player")
    .sum("minutes")
    .sort_values("minutes")
    .reset_index()[["player", "minutes"]]
    .set_index("player")
)
df_mins

Unnamed: 0_level_0,minutes
player,Unnamed: 1_level_1
Bethany Balcer,12
Imani Dorsey,13
Morgan Weaver,36
Sam Mewis,44
Morgan Gautrat,46
Julie Ertz,67
Jaelin Howell,83
Aubrey Kingsbury,90
Tobin Heath,101
Alyssa Thompson,114


In [5]:
df_index = df.set_index(["date", "player"])
pairs = defaultdict(int)

for p1 in df["player"].unique():
    for p2 in df["player"].unique():
        if p1 == p2:
            pairs[(p1, p2)] = df_mins.loc[p1, "minutes"]
        else:
            mins = 0
            for d in df["date"].unique():
                if (
                    df_index.index.isin([(d, p1)]).any()
                    and df_index.index.isin([(d, p2)]).any()
                ):
                    p1_mins = df_index.loc[d, p1]
                    p2_mins = df_index.loc[d, p2]
                    mins = mins + max(
                        min(p1_mins["exit"], p2_mins["exit"])
                        - max(p1_mins["enter"], p2_mins["enter"]),
                        0,
                    )
            pairs[(p1, p2)] += mins

pairs

defaultdict(int,
            {('Adriana Franch', 'Adriana Franch'): 180,
             ('Adriana Franch', 'Crystal Dunn'): 63,
             ('Adriana Franch', 'Tierna Davidson'): 180,
             ('Adriana Franch', 'Becky Sauerbrunn'): 162,
             ('Adriana Franch', "Kelley O'Hara"): 180,
             ('Adriana Franch', 'Kristie Mewis'): 103,
             ('Adriana Franch', 'Andi Sullivan'): 90,
             ('Adriana Franch', 'Rose Lavelle'): 149,
             ('Adriana Franch', 'Mallory Swanson'): 103,
             ('Adriana Franch', 'Carli Lloyd'): 117,
             ('Adriana Franch', 'Lynn Williams'): 74,
             ('Adriana Franch', 'Catarina Macario'): 77,
             ('Adriana Franch', 'Abby Dahlkemper'): 18,
             ('Adriana Franch', 'Alex Morgan'): 94,
             ('Adriana Franch', 'Tobin Heath'): 62,
             ('Adriana Franch', 'Sophia Smith'): 75,
             ('Adriana Franch', 'Emily Sonnett'): 27,
             ('Adriana Franch', 'Jane Campbell'): 0,


In [6]:
df_pairs = pd.DataFrame.from_dict(pairs, orient="index", columns=["minutes"])
df_pairs["p1"] = df_pairs.index.str[0]
df_pairs["p2"] = df_pairs.index.str[1]
df_pairs = df_pairs[["p1", "p2", "minutes"]].reset_index(drop=True)
df_pairs.to_csv("pairs.csv", index=False)
df_pairs

Unnamed: 0,p1,p2,minutes
0,Adriana Franch,Adriana Franch,180
1,Adriana Franch,Crystal Dunn,63
2,Adriana Franch,Tierna Davidson,180
3,Adriana Franch,Becky Sauerbrunn,162
4,Adriana Franch,Kelley O'Hara,180
...,...,...,...
1931,Julie Ertz,Carson Pickett,0
1932,Julie Ertz,Hailie Mace,0
1933,Julie Ertz,Sam Coffey,0
1934,Julie Ertz,Alyssa Thompson,44


In [7]:
df_pairs[df_pairs["minutes"] == df_pairs["minutes"].max()]

Unnamed: 0,p1,p2,minutes
270,Andi Sullivan,Andi Sullivan,1970


In [13]:
df_pairs[(df_pairs["minutes"] > 1300) & ~(df_pairs["p1"] == df_pairs["p2"])]

Unnamed: 0,p1,p2,minutes
271,Andi Sullivan,Rose Lavelle,1337
287,Andi Sullivan,Alana Cook,1368
314,Rose Lavelle,Andi Sullivan,1337
327,Rose Lavelle,Lindsey Horan,1390
373,Mallory Swanson,Emily Fox,1309
843,Lindsey Horan,Rose Lavelle,1390
932,Emily Fox,Mallory Swanson,1309
1018,Alana Cook,Andi Sullivan,1368
