In [1]:
import pandas as pd
import datetime
from common import Data

In [4]:
df = Data.get_nwsl_goals()
df

Unnamed: 0,season,date,home,away,player,team,assist,period,minute,second,x,y
0,2023,2023-03-25,NC,KC,Mille Gejl Jensen,NC,Narumi Miura,1,22,24,70.4,63.8
1,2023,2023-03-25,SD,CHI,Yuki Ogimi Nagasato,CHI,,1,17,41,97.6,47.4
2,2023,2023-03-25,SD,CHI,Amirah Ali,SD,,1,21,34,98.3,48.4
3,2023,2023-03-25,SD,CHI,Jaedyn Shaw,SD,,1,31,7,81.6,50.4
4,2023,2023-03-25,SD,CHI,Mallory Pugh Swanson,CHI,,1,48,18,88.5,50.0
...,...,...,...,...,...,...,...,...,...,...,...,...
335,2023,2023-10-15,ORL,HOU,Marta,ORL,,2,86,5,88.5,50.0
336,2023,2023-10-15,SD,LOU,Jaedyn Shaw,SD,Alex Morgan,1,25,46,92.6,53.6
337,2023,2023-10-15,SD,LOU,Alex Morgan,SD,Madison Pogarch,2,46,19,84.3,62.7
338,2023,2023-10-15,WAS,NC,Tyler Lussi,NC,Manaka Matsukubo,1,24,43,95.0,54.3


In [5]:
# Filter to the regular season
df["date"] = pd.to_datetime(df['date'])
df = df[df["date"] < datetime.datetime(2023, 10, 18)]

In [6]:
# Calculate the table of goals
goals = pd.DataFrame(
    df.rename(columns={"player": "name"}).groupby(["name"])[["name"]].count()
).rename(columns={"name": "goals"}).sort_values("goals", ascending=False).reset_index()
goals

Unnamed: 0,name,goals
0,Sophia Smith,11
1,Kerolin,10
2,Debinha,9
3,Ashley Hatch,9
4,Alex Morgan,7
...,...,...
130,Narumi Miura,1
131,Natalie Jacobs,1
132,Kelli Hubly,1
133,Olivia Van der Jagt,1


In [7]:
# Calculate the table of assists
assists = pd.DataFrame(
    df.rename(columns={"assist": "name"}).groupby(["name"])[["name"]].count()
).rename(columns={"name": "assist"}).sort_values("assist", ascending=False).reset_index()
assists

Unnamed: 0,name,assist
0,Sam Coffey,8
1,Claire Emslie,5
2,Julia Bianchi,5
3,Alex Morgan,5
4,Megan Rapinoe,5
...,...,...
113,Michelle Cooper,1
114,Ella Stevens,1
115,Mikayla Cluff,1
116,Mille Gejl,1


In [8]:
# Combine the goals and assists table
stats = goals.set_index("name").join(assists.set_index("name"), how="outer").fillna(0).astype(
    int
).sort_values(["goals", "assist"], ascending=False).reset_index()
stats

Unnamed: 0,name,goals,assist
0,Sophia Smith,11,5
1,Kerolin,10,0
2,Ashley Hatch,9,2
3,Debinha,9,0
4,Alex Morgan,7,5
...,...,...,...
186,Rebecca Holloway,0,1
187,Rose Lavelle,0,1
188,Sarah Griffith,0,1
189,Sinead Farrelly,0,1


In [9]:
# Count all the goal combos independent where order matters
df_combo_goals = df.dropna()
df_combo_goals = (
    df_combo_goals.groupby(["player", "assist"])[["player"]]
    .count()
    .rename(columns={"player": "goals"})
    .reset_index()
)
df_combo_goals

Unnamed: 0,player,assist,goals
0,Abby Erceg,Shuang Wang,2
1,Adriana Leal da Silva,Ally Watt,1
2,Adriana Leal da Silva,Erika Tymrak,1
3,Adriana Leal da Silva,Julie Doyle,1
4,Adriana Leal da Silva,Marta Vieira da Silva Veiga,1
...,...,...,...
197,Veronica Latsko,Megan Rapinoe,3
198,Victoria Pickett,Olivia Wingate,1
199,Yazmeen Ryan,Kristen Edmonds,1
200,Yuki Ogimi Nagasato,Casey Krueger,1


In [10]:
# Order all goals so the higher scorer is first
df_combos = df.dropna()
df_combos = df_combos.rename(columns={"player": "p1", "assist": "p2"})
for index, row in df_combos.iterrows():
    p1_index = stats.loc[stats["name"] == row["p1"]].index[0]
    p2_index = stats.loc[stats["name"] == row["p2"]].index[0]
    if p2_index < p1_index:
        p1 = row["p1"]
        p2 = row["p2"]
        df_combos.loc[index, "p1"] = p2
        df_combos.loc[index, "p2"] = p1
df_combos

Unnamed: 0,season,date,home,away,p1,team,p2,period,minute,second,x,y
0,2023,2023-03-25,NC,KC,Mille Gejl Jensen,NC,Narumi Miura,1,22,24,70.4,63.8
6,2023,2023-03-26,LA,NJY,Alyssa Thompson,LA,Dani Weatherholt,1,10,44,78.7,69.6
8,2023,2023-03-26,LA,NJY,Lynn Williams,NJY,Midge Purce,2,64,1,86.4,34.0
10,2023,2023-03-26,POR,ORL,Sophia Smith,POR,Christine Sinclair,1,21,28,90.3,48.9
12,2023,2023-03-26,POR,ORL,Sophia Smith,POR,Michele Vasconcelos,2,75,7,96.3,50.4
...,...,...,...,...,...,...,...,...,...,...,...,...
332,2023,2023-10-15,NJY,KC,Yazmeen Ryan,NJY,Kristen Edmonds,1,14,18,93.1,34.4
333,2023,2023-10-15,NJY,KC,Alexa Spaanstra,KC,Alex Loera,1,25,58,84.9,37.3
336,2023,2023-10-15,SD,LOU,Alex Morgan,SD,Jaedyn Shaw,1,25,46,92.6,53.6
337,2023,2023-10-15,SD,LOU,Alex Morgan,SD,Madison Pogarch,2,46,19,84.3,62.7


In [11]:
# Calculate the table of combos
combos = df_combos.groupby(["p1", "p2"])[["p1"]].count().rename(
    columns={"p1": "combos"}
).sort_values("combos", ascending=False).reset_index()
combos

Unnamed: 0,p1,p2,combos
0,Lynn Williams,Midge Purce,3
1,Savannah McCaskill,Katie Johnson,3
2,Megan Rapinoe,Veronica Latsko,3
3,Sophia Smith,Morgan Weaver,3
4,Jaedyn Shaw,Makenzy Doniak,3
...,...,...,...
186,Debinha,Mimmi Larsson,1
187,Debinha,Vanessa DiBernardo,1
188,Ebony Salmon,María Sánchez,1
189,Ella Stevens,Julia Bianchi,1


In [12]:
# p1, p2, p1g, p1a, p2g, p2a, p1g_p2a, p2g_p1a
merged = combos
merged = (
    pd.merge(merged, stats, left_on="p1", right_on="name", how="left")
    .rename(columns={"goals": "p1g", "assist": "p1a"})
    .drop(["name"], axis=1)
)
merged = (
    pd.merge(merged, stats, left_on="p2", right_on="name", how="left")
    .rename(columns={"goals": "p2g", "assist": "p2a"})
    .drop(["name"], axis=1)
)
merged = pd.merge(
    merged,
    df_combo_goals,
    left_on=["p1", "p2"],
    right_on=["player", "assist"],
    how="left",
).rename(columns={"goals": "p1g_p2a"}).drop(["player", "assist"], axis=1).fillna(0)
merged["p1g_p2a"] = merged["p1g_p2a"].astype(int)
merged = pd.merge(
    merged,
    df_combo_goals,
    left_on=["p1", "p2"],
    right_on=["assist", "player"],
    how="left",
).rename(columns={"goals": "p2g_p1a"}).drop(["player", "assist"], axis=1).fillna(0)
merged["p2g_p1a"] = merged["p2g_p1a"].astype(int)
merged["goals"] = merged["p1g"] + merged["p2g"]
merged

Unnamed: 0,p1,p2,combos,p1g,p1a,p2g,p2a,p1g_p2a,p2g_p1a,goals
0,Lynn Williams,Midge Purce,3,7,2,4,2,2,1,11
1,Savannah McCaskill,Katie Johnson,3,4,3,3,1,1,2,7
2,Megan Rapinoe,Veronica Latsko,3,4,5,4,2,0,3,8
3,Sophia Smith,Morgan Weaver,3,11,5,7,4,2,1,18
4,Jaedyn Shaw,Makenzy Doniak,3,6,3,3,2,1,2,9
...,...,...,...,...,...,...,...,...,...,...
186,Debinha,Mimmi Larsson,1,9,0,2,2,1,0,11
187,Debinha,Vanessa DiBernardo,1,9,0,0,2,1,0,9
188,Ebony Salmon,María Sánchez,1,1,2,0,4,1,0,1
189,Ella Stevens,Julia Bianchi,1,4,1,0,5,1,0,4


In [13]:
# Use total goals as a tiebreaker for the sort
merged = merged.sort_values(["combos", "goals", "p1g"], ascending=False)
merged

Unnamed: 0,p1,p2,combos,p1g,p1a,p2g,p2a,p1g_p2a,p2g_p1a,goals
3,Sophia Smith,Morgan Weaver,3,11,5,7,4,2,1,18
0,Lynn Williams,Midge Purce,3,7,2,4,2,2,1,11
4,Jaedyn Shaw,Makenzy Doniak,3,6,3,3,2,1,2,9
2,Megan Rapinoe,Veronica Latsko,3,4,5,4,2,0,3,8
1,Savannah McCaskill,Katie Johnson,3,4,3,3,1,1,2,7
...,...,...,...,...,...,...,...,...,...,...
138,Ally Prisock,Caprice Dydasco,1,1,0,0,1,1,0,1
143,Andressa,Nichelle Prince,1,1,0,0,2,1,0,1
161,Jordan Elisabeth Baggett,Lauren Milliet,1,1,0,0,1,1,0,1
168,Haley Hopkins,Kerolin Nicoli Israel Ferraz,1,1,1,0,3,1,0,1


In [14]:
merged.to_csv("data/2023_combos.csv", index=False)