In [1]:
from common import Data, Playoffs
import pandas as pd

In [2]:
df_spots = pd.DataFrame(
    [
        ["2013", 4, 8],
        ["2014", 4, 9],
        ["2015", 4, 9],
        ["2016", 4, 10],
        ["2017", 4, 10],
        ["2018", 4, 9],
        ["2019", 4, 9],
        ["2021", 6, 10],
        ["2022", 6, 12],
        ["2023", 6, 12],
    ],
    columns=["season", "spots", "teams"],
).set_index("season")

In [3]:
df = Data.get_nwsl_matches()
df = df[df["season"].isin(df_spots.index) & ~df["is_playoffs"]]

In [5]:
df_clinched = pd.DataFrame([], columns=["season", "team", "date"])

for season in df_spots.index:
    df_season = df[df["season"] == season]
    teams = list(df_season["home"].unique())
    dates = df_season["date"].unique()
    spots = df_spots.loc[season, "spots"]
    clinched_teams = list()

    dates_start = int(len(dates) / 2)
    print(season, "first date:", dates[dates_start])

    for date in dates[dates_start:]:
        # Clear out all matches after that date
        df_temp = df_season.copy()
        df_temp.loc[df_temp["date"] > date, ["home_score", "away_score"]] = None

        # For each team calculate the lowest position
        g = Playoffs.calc_games_matrix(df_temp, teams)
        p0 = Playoffs.calc_initial_points(df_temp, teams)
        for team in teams:
            if team not in clinched_teams:
                lowest = Playoffs.calculate_lowest_finish(team, teams, g, p0)

                # If lowest position is >= playoff spots, mark them as clinched on date
                if lowest <= spots:
                    clinched_teams.append(team)
                    df_clinched.loc[len(df_clinched.index)] = [season, team, date]

        # Once enough teams clinch, return
        if len(clinched_teams) == spots:
            break

        # If not enough teams have clinched through regular means, add the rest of the
        # teams. Might need to update based on tie breakers
        if date == dates[-1]:
            standings = Data.get_nwsl_standings(df_temp)
            for team in standings.loc[1:spots, "team"]:
                if team not in clinched_teams:
                    clinched_teams.append(team)
                    df_clinched.loc[len(df_clinched.index)] = [season, team, date]

df_clinched

2013 first date: 2013-06-23 00:00:00
2014 first date: 2014-06-19 00:00:00
2015 first date: 2015-07-12 00:00:00
2016 first date: 2016-06-24 00:00:00
2017 first date: 2017-07-08 00:00:00
2018 first date: 2018-06-23 00:00:00
2019 first date: 2019-07-19 00:00:00
2021 first date: 2021-08-07 00:00:00
2022 first date: 2022-07-16 00:00:00
2023 first date: 2023-06-18 00:00:00


Unnamed: 0,season,team,date
0,2013,KC,2013-08-04
1,2013,NJNY,2013-08-07
2,2013,POR,2013-08-07
3,2013,WNY,2013-08-07
4,2014,RGN,2014-07-17
5,2014,KC,2014-08-02
6,2014,POR,2014-08-17
7,2014,CHI,2014-08-20
8,2015,RGN,2015-08-12
9,2015,CHI,2015-08-23


In [6]:
df_clinched.to_csv("data/historical_clinching.csv", index=False)