In [161]:
import pandas as pd
import datetime

In [162]:
# Read in the raw data
df = pd.read_csv("2022_goals.csv")
df

Unnamed: 0,date,scorer,assist
0,2022-04-29T22:30:00,Vanessa Gilles,Jun Endo
1,2022-04-29T22:30:00,Jun Endo,Savannah McCaskill
2,2022-04-29T22:30:00,Debinha,Emily Gray
3,2022-04-30T18:00:00,Morgan Weaver,
4,2022-04-30T18:00:00,Christine Sinclair,
...,...,...,...
375,2022-10-23T17:00:00,Crystal Dunn,
376,2022-10-23T19:30:00,Alexis Loera,
377,2022-10-23T19:30:00,Kristen Hamilton,
378,2022-10-29T19:59:00,Sophia Smith,


In [163]:
# Filter to the regular season
df["date"] = pd.to_datetime(df['date'])
df = df[df["date"] < datetime.datetime(2022, 10, 3)]


In [164]:
# Calculate the table of goals
goals = pd.DataFrame(
    df.rename(columns={"scorer": "name"}).groupby(["name"])[["name"]].count()
).rename(columns={"name": "goals"}).sort_values("goals", ascending=False).reset_index()
goals

Unnamed: 0,name,goals
0,Alex Morgan,15
1,Sophia Smith,14
2,Debinha,12
3,Diana Ordóñez,11
4,Mallory Pugh,11
...,...,...
129,Olivia Athens,1
130,Paige Nielsen (OG),1
131,Kaleigh Riehl,1
132,Rachel Hill,1


In [165]:
# Calculate the table of assists
assists = pd.DataFrame(
    df.rename(columns={"assist": "name"}).groupby(["name"])[["name"]].count()
).rename(columns={"name": "assist"}).sort_values("assist", ascending=False).reset_index()
assists

Unnamed: 0,name,assist
0,Mallory Pugh,6
1,Carson Pickett,6
2,Yazmeen Ryan,5
3,Ashley Sanchez,5
4,Kerolin,4
...,...,...
115,Jasmyne Spencer,1
116,Jaelene Daniels,1
117,Isabel Rodriguez,1
118,Imani Dorsey,1


In [166]:
# Combine the goals and assists table
stats = goals.set_index("name").join(assists.set_index("name"), how="outer").fillna(0).astype(
    int
).sort_values(["goals", "assist"], ascending=False).reset_index()
stats

Unnamed: 0,name,goals,assist
0,Alex Morgan,15,2
1,Sophia Smith,14,3
2,Debinha,12,4
3,Mallory Pugh,11,6
4,Diana Ordóñez,11,0
...,...,...,...
170,Megan Reid,0,1
171,Paige Nielsen,0,1
172,Quinn,0,1
173,Shuang Wang,0,1


In [167]:
# Count all the goal combos independent where order matters
df_combo_goals = df.dropna()
df_combo_goals = (
    df_combo_goals.groupby(["scorer", "assist"])[["scorer"]]
    .count()
    .rename(columns={"scorer": "goals"})
    .reset_index()
)
df_combo_goals

Unnamed: 0,scorer,assist,goals
0,Abby Erceg,Carson Pickett,1
1,Alex Chidiac,Jaelin Howell,1
2,Alex Morgan,Emily van Egmond,1
3,Alex Morgan,Kailen Sheridan,1
4,Alex Morgan,Sofia Jakobsson,2
...,...,...,...
204,Vanessa Gilles,Jun Endo,1
205,Yazmeen Ryan,Madison Pogarch,1
206,Yazmeen Ryan,Morgan Weaver,1
207,Yuki Nagasato,Mallory Pugh,2


In [168]:
df_combos = df.dropna()
df_combos.loc[5,"scorer"]

'Sophia Smith'

In [169]:
# Order all goals so the higher scorer is first
df_combos = df.dropna()
df_combos = df_combos.rename(columns={"scorer": "p1", "assist": "p2"})
for index, row in df_combos.iterrows():
    p1_index = stats.loc[stats["name"] == row["p1"]].index[0]
    p2_index = stats.loc[stats["name"] == row["p2"]].index[0]
    if p2_index < p1_index:
        p1 = row["p1"]
        p2 = row["p2"]
        df_combos.loc[index, "p1"] = p2
        df_combos.loc[index, "p2"] = p1
df_combos

Unnamed: 0,date,p1,p2
0,2022-04-29 22:30:00,Jun Endo,Vanessa Gilles
1,2022-04-29 22:30:00,Savannah McCaskill,Jun Endo
2,2022-04-29 22:30:00,Debinha,Emily Gray
5,2022-04-30 18:00:00,Sophia Smith,Meghan Klingenberg
6,2022-04-30 20:00:00,Ella Stevens,Danielle Colaprico
...,...,...,...
362,2022-10-01 22:00:00,Megan Rapinoe,Jordyn Huitema
363,2022-10-01 22:00:00,Jordyn Huitema,Quinn
364,2022-10-01 22:00:00,Bethany Balcer,Jessica Fishlock
365,2022-10-02 18:00:00,Mallory Pugh,Sarah Luebbert


In [170]:
# Calculate the table of combos
combos = df_combos.groupby(["p1", "p2"])[["p1"]].count().rename(
    columns={"p1": "combos"}
).sort_values("combos", ascending=False).reset_index()
combos

Unnamed: 0,p1,p2,combos
0,Diana Ordóñez,Carson Pickett,4
1,Debinha,Kerolin,3
2,Midge Purce,Ifeoma Onumonu,3
3,Nadia Nadim,Jessica McDonald,3
4,Mallory Pugh,Bianca St-Georges,3
...,...,...,...
191,Hannah Betfort,Madison Pogarch,1
192,Hina Sugita,Christine Sinclair,1
193,Hina Sugita,Janine Beckie,1
194,Hina Sugita,Meghan Klingenberg,1


In [171]:
# p1, p2, p1g, p1a, p2g, p2a, p1g_p2a, p2g_p1a
merged = combos
merged = (
    pd.merge(merged, stats, left_on="p1", right_on="name", how="left")
    .rename(columns={"goals": "p1g", "assist": "p1a"})
    .drop(["name"], axis=1)
)
merged = (
    pd.merge(merged, stats, left_on="p2", right_on="name", how="left")
    .rename(columns={"goals": "p2g", "assist": "p2a"})
    .drop(["name"], axis=1)
)
merged = pd.merge(
    merged,
    df_combo_goals,
    left_on=["p1", "p2"],
    right_on=["scorer", "assist"],
    how="left",
).rename(columns={"goals": "p1g_p2a"}).drop(["scorer", "assist"], axis=1).fillna(0)
merged["p1g_p2a"] = merged["p1g_p2a"].astype(int)
merged = pd.merge(
    merged,
    df_combo_goals,
    left_on=["p1", "p2"],
    right_on=["assist", "scorer"],
    how="left",
).rename(columns={"goals": "p2g_p1a"}).drop(["scorer", "assist"], axis=1).fillna(0)
merged["p2g_p1a"] = merged["p2g_p1a"].astype(int)
merged["goals"] = merged["p1g"] + merged["p2g"]
merged

Unnamed: 0,p1,p2,combos,p1g,p1a,p2g,p2a,p1g_p2a,p2g_p1a,goals
0,Diana Ordóñez,Carson Pickett,4,11,0,1,6,4,0,12
1,Debinha,Kerolin,3,12,4,6,4,2,1,18
2,Midge Purce,Ifeoma Onumonu,3,3,3,2,2,1,2,5
3,Nadia Nadim,Jessica McDonald,3,6,0,3,4,3,0,9
4,Mallory Pugh,Bianca St-Georges,3,11,6,2,2,2,1,13
...,...,...,...,...,...,...,...,...,...,...
191,Hannah Betfort,Madison Pogarch,1,1,0,0,2,1,0,1
192,Hina Sugita,Christine Sinclair,1,5,4,5,0,0,1,10
193,Hina Sugita,Janine Beckie,1,5,4,0,2,1,0,5
194,Hina Sugita,Meghan Klingenberg,1,5,4,0,2,1,0,5


In [174]:
# Use total goals as a tiebreaker for the sort
merged = merged.sort_values(["combos", "goals", "p1g"], ascending=False)
merged

Unnamed: 0,p1,p2,combos,p1g,p1a,p2g,p2a,p1g_p2a,p2g_p1a,goals
0,Diana Ordóñez,Carson Pickett,4,11,0,1,6,4,0,12
6,Alex Morgan,Taylor Kornieck,3,15,2,3,3,3,0,18
1,Debinha,Kerolin,3,12,4,6,4,2,1,18
10,Sophia Smith,Olivia Lynn Moultrie,3,14,3,3,4,2,1,17
8,Mallory Pugh,Yuki Nagasato,3,11,6,3,3,1,2,14
...,...,...,...,...,...,...,...,...,...,...
146,Brianna Pinto,Ryan Williams,1,1,2,0,3,1,0,1
156,Jodie Taylor,Katie Johnson,1,1,0,0,3,1,0,1
157,Joelle Anderson,Cali Jean Farquharson,1,1,0,0,1,1,0,1
161,Kaleigh Riehl,Kelsey Turnbow,1,1,0,0,2,1,0,1


In [175]:
merged.to_csv("2022_combos.csv", index=False)