In [45]:
import pandas as pd
import datetime
from common import Data

In [46]:
df = Data.get_nwsl_matches()
df = df[
    ~df["season"].str.contains("Fall")
    & ~df["season"].str.contains("Challenge")
    & ~df["is_playoffs"]
]

df

Unnamed: 0,date,time,season,home,away,home_score,away_score,home_penalty_score,away_penalty_score,home_xg,away_xg,referee,attendance,stadium,is_forfeit,is_extra_time,is_pks,is_playoffs
0,2013-04-13,19:35,2013,KC,POR,1,1,0.0,0.0,,,Kari Seitz,6784.0,Shawnee Mission District Stadium,False,False,False,False
1,2013-04-14,17:00,2013,CHI,RGN,1,1,0.0,0.0,,,Josh Wilkens,1255.0,Benedictine U. Sports Complex Stadium,False,False,False,False
2,2013-04-14,18:00,2013,NJNY,WNY,1,0,0.0,0.0,,,John McCloskey,2611.0,Yurcak Field,False,False,False,False
3,2013-04-14,18:30,2013,BOS,WAS,1,1,0.0,0.0,,,Hernan Aguilar,2634.0,Dilboy Stadium,False,False,False,False
4,2013-04-20,19:00,2013,WAS,WNY,1,1,0.0,0.0,,,Kari Seitz,4569.0,Maryland SoccerPlex,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1460,2024-11-02,17:00,2024,ORL,RGN,3,2,0.0,0.0,2.9,1.4,Melinda Sopka,7765.0,Inter&Co Stadium,False,False,False,False
1461,2024-11-02,19:30,2024,NC,WAS,0,1,0.0,0.0,0.4,0.7,Elvis Osmanovic,10026.0,Sahlen's Stadium at WakeMed Soccer Park,False,False,False,False
1462,2024-11-02,20:30,2024,HOU,BAY,2,3,0.0,0.0,1.1,2.1,Matthew Thompson,8176.0,Shell Energy Stadium,False,False,False,False
1463,2024-11-03,14:00,2024,CHI,KCC,1,3,0.0,0.0,1.3,1.9,Jaclyn Metz,5595.0,SeatGeek Stadium,False,False,False,False


In [47]:
# Filter to desired columns
def get_points(team, opp):
    if team > opp:
        return 3
    elif team < opp:
        return 0
    else:
        return 1


home = df.copy()
home["points"] = home.apply(
    lambda x: get_points(x["home_score"], x["away_score"]), axis=1
)
home = home[["date", "home", "season", "points"]]
home = home.rename(columns={"home": "team"})

away = df.copy()
away["points"] = away.apply(
    lambda x: get_points(x["away_score"], x["home_score"]), axis=1
)
away = away[["date", "away", "season", "points"]]
away = away.rename(columns={"away": "team"})

df = pd.concat([home, away])
df

Unnamed: 0,date,team,season,points
0,2013-04-13,KC,2013,1
1,2013-04-14,CHI,2013,1
2,2013-04-14,NJNY,2013,3
3,2013-04-14,BOS,2013,1
4,2013-04-20,WAS,2013,1
...,...,...,...,...
1460,2024-11-02,RGN,2024,0
1461,2024-11-02,WAS,2024,3
1462,2024-11-02,BAY,2024,3
1463,2024-11-03,KCC,2024,3


In [48]:
def cumulative_sum(lists):
    cu_list = []
    length = len(lists)
    cu_list = [sum(lists[0:x:1]) for x in range(0, length + 1)]
    return cu_list[1:]


def get_points(team, season):
    subset = df[(df["team"] == team) & (df["season"] == season)]
    return cumulative_sum(list(subset["points"]))

matches_per_season = {
    "2013": 22,
    "2014": 24,
    "2015": 20,
    "2016": 20,
    "2017": 24,
    "2018": 24,
    "2019": 24,
    "2021": 24,
    "2022": 22,
    "2023": 22,
    "2024": 26,
}

# Aggregate by team, season
results = pd.DataFrame([], columns=["team", "season", "points"])
for i, row in df.groupby(["team", "season"]).count().reset_index().iterrows():
    points = get_points(row["team"], row["season"])
    if len(points) != matches_per_season[str(row["season"])]:
        raise Error("incorrect number of matches")
    results.loc[len(results.index)] = [
        row["team"],
        row["season"],
        points,
    ]

results = results.sort_values(["season", "team"])
results

Unnamed: 0,team,season,points
1,BOS,2013,"[1, 4, 7, 8, 8, 8, 11, 11, 12, 15, 18, 21, 22,..."
6,CHI,2013,"[1, 1, 1, 4, 4, 5, 8, 9, 12, 15, 16, 16, 17, 1..."
27,KC,2013,"[1, 4, 7, 7, 7, 10, 13, 14, 17, 20, 20, 23, 23..."
50,NJNY,2013,"[3, 4, 7, 10, 10, 11, 12, 15, 16, 16, 19, 22, ..."
69,POR,2013,"[3, 3, 6, 6, 9, 12, 12, 13, 14, 17, 17, 18, 21..."
...,...,...,...
79,POR,2024,"[0, 1, 4, 7, 10, 13, 13, 16, 16, 16, 16, 19, 2..."
90,RGN,2024,"[3, 3, 6, 7, 7, 8, 9, 10, 13, 13, 13, 13, 16, ..."
93,SD,2024,"[0, 3, 6, 9, 10, 11, 11, 11, 12, 12, 15, 15, 1..."
96,UTA,2024,"[0, 3, 3, 4, 4, 4, 4, 5, 8, 8, 11, 14, 14, 14,..."


In [49]:
# Write to a file
results.to_csv("final.csv", index=False)