In [60]:
import pandas as pd

In [61]:
team = "POR"

In [62]:
df = pd.read_csv("data/all_matches.csv")
df["kickoff"] = pd.to_datetime(df["kickoff"])
df

Unnamed: 0,kickoff,season,home_team,away_team,home_team_score,away_team_score,home_penalty_score,away_penalty_score,is_playoff
0,2013-04-14 17:00:00,2013,CHI,RGN,1,1,0,0,False
1,2013-04-14 18:00:00,2013,NJY,WNY,1,0,0,0,False
2,2013-04-14 18:30:00,2013,BOS,WAS,1,1,0,0,False
3,2013-04-13 19:35:00,2013,KC,POR,1,1,0,0,False
4,2013-04-20 19:00:00,2013,WAS,WNY,1,1,0,0,False
...,...,...,...,...,...,...,...,...,...
1101,2022-10-16 17:00:00,2022,HOU,KCC,1,2,0,0,True
1102,2022-10-16 22:00:00,2022,SD,CHI,2,1,0,0,True
1103,2022-10-23 17:00:00,2022,POR,SD,2,1,0,0,True
1104,2022-10-23 19:30:00,2022,RGN,KCC,0,2,0,0,True


In [63]:
# Filter to the desired team
filtered = df[(df["home_team"] == team) | (df["away_team"] == team)]

# Filter to the regular season
filtered = filtered[filtered["season"].isin(list(map(str, range(2013, 2023))))]

filtered = filtered[~filtered["is_playoff"]]

filtered

Unnamed: 0,kickoff,season,home_team,away_team,home_team_score,away_team_score,home_penalty_score,away_penalty_score,is_playoff
0,2013-04-14 17:00:00,2013,CHI,RGN,1,1,0,0,False
9,2013-04-27 19:00:00,2013,CHI,POR,0,2,0,0,False
11,2013-05-04 18:00:00,2013,BOS,CHI,4,1,0,0,False
14,2013-05-08 19:00:00,2013,NJY,CHI,1,1,0,0,False
18,2013-05-12 17:00:00,2013,CHI,POR,0,2,0,0,False
...,...,...,...,...,...,...,...,...,...
1072,2022-09-09 22:00:00,2022,RGN,CHI,2,2,0,0,False
1078,2022-09-14 20:00:00,2022,CHI,KCC,4,0,0,0,False
1082,2022-09-17 20:00:00,2022,CHI,HOU,0,1,0,0,False
1091,2022-09-25 16:00:00,2022,POR,CHI,3,0,0,0,False


In [64]:
def map_to_result(row):
    went_to_penalties = row["home_penalty_score"] > 0 or row["away_penalty_score"] > 0
    score = "_team_score" if not went_to_penalties else "_penalty_score"
    if row["home" + score] == row["away" + score]:
        return "D"
    elif row["home" + score] > row["away" + score]:
        if row["home_team"] == team:
            return "W"
        else:
            return "L"
    elif row["home" + score] < row["away" + score]:
        if row["home_team"] == team:
            return "L"
        else:
            return "W"

filtered["result"] = filtered.apply(map_to_result, axis=1)
filtered

Unnamed: 0,kickoff,season,home_team,away_team,home_team_score,away_team_score,home_penalty_score,away_penalty_score,is_playoff,result
0,2013-04-14 17:00:00,2013,CHI,RGN,1,1,0,0,False,D
9,2013-04-27 19:00:00,2013,CHI,POR,0,2,0,0,False,L
11,2013-05-04 18:00:00,2013,BOS,CHI,4,1,0,0,False,L
14,2013-05-08 19:00:00,2013,NJY,CHI,1,1,0,0,False,D
18,2013-05-12 17:00:00,2013,CHI,POR,0,2,0,0,False,L
...,...,...,...,...,...,...,...,...,...,...
1072,2022-09-09 22:00:00,2022,RGN,CHI,2,2,0,0,False,D
1078,2022-09-14 20:00:00,2022,CHI,KCC,4,0,0,0,False,W
1082,2022-09-17 20:00:00,2022,CHI,HOU,0,1,0,0,False,L
1091,2022-09-25 16:00:00,2022,POR,CHI,3,0,0,0,False,L


In [65]:
filtered.groupby("result").count()

Unnamed: 0_level_0,kickoff,season,home_team,away_team,home_team_score,away_team_score,home_penalty_score,away_penalty_score,is_playoff
result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
D,58,58,58,58,58,58,58,58,58
L,58,58,58,58,58,58,58,58,58
W,88,88,88,88,88,88,88,88,88


In [66]:
# Genereate a table with date, season, team score, opponent score, result
out = filtered
out["date"] = out["kickoff"].dt.date

out["team_score"] = out.apply(
    lambda row: row["home_team_score"]
    if team == row["home_team"]
    else row["away_team_score"],
    axis=1,
)
out["opp_score"] = out.apply(
    lambda row: row["home_team_score"]
    if team != row["home_team"]
    else row["away_team_score"],
    axis=1,
)

out[["date", "season", "team_score", "opp_score", "result"]].to_csv(
    "data/" + team + "_reg_matches.csv", index=False
)