# NFL Historic to Present Matches - Processing

In [1]:
import pandas as pd
import time
import re

In [6]:
out_df = pd.read_csv("historic_match_scores.csv")
out_df["Date"] = out_df["Date"].astype("datetime64") 
out_df["Week"] = [row.lstrip().rstrip() for row in out_df["Week"].astype("str")]
out_df = out_df.drop(["Unnamed: 4", "Unnamed: 8"], axis=1)

* PtsW - Points scored by the winning team
* PtsL - Points score by the losing team
* YdsW - Yards gained by the winning team
* YdsL - Yards gained by the losing team
* TOW - Turnovers by winning Team
* TOL - Turnovers by losing Team

* Wild Card - Added in 1978 (March), being the 1977 season

In [7]:
def get_home_team(Winner, Loser, AtHome):
    if AtHome == "@":
        HomeTeam = Loser
    else:
        HomeTeam = Winner
    
    return HomeTeam

In [8]:
out_df["HomeTeam"] = out_df.apply(lambda row : get_home_team(row["Winner/tie"],
                                  row["Loser/tie"], row["AtHome"]), axis = 1)

In [9]:
def get_season(DateField):
    """This function parses the given match date to process and the return the year of the season being played.

    Args:
        DateField (datetime): The column/field that contains the date that the match was played upon

    Returns:
        Int: The year of the season being played
    """
    if (DateField.month >= 1) & (DateField.month <= 3):
        SeasonYear = DateField.year -1
    else:
        SeasonYear = DateField.year
    return SeasonYear

In [10]:
out_df["Season"] = out_df.apply(lambda row : get_season(row["Date"]), axis = 1)

In [19]:
out_df["Week"] = out_df["Week"].replace("Conf. Champ", "Champ")

## 2020 Season

In [20]:
df_2020 = out_df[out_df["Season"] == 2020]
print(f"Number of games played: {len(df_2020)}")

Number of games played: 269


In [21]:
wk_occurence_2020 = df_2020.groupby(["Week"]).size().reset_index().rename(columns={0:'count'}).sort_values(by="count", ascending=False)

### Week Types

In [22]:
unique_weeks = {}
for season in out_df["Season"].unique():
    unique_weeks[season] = {}
    temp_df = out_df[out_df["Season"] == season]
    weeks_found = temp_df["Week"].unique().tolist()
    unique_weeks[season] = [str(i) for i in weeks_found]

In [23]:
all_time_unique = sorted({x for v in unique_weeks.values() for x in v})

### Max Week

* We want to obtain the highest week number present in each season
* Following this we can convert the post-season games to a week number, taking account for the fact that the number of weeks varies by season

In [25]:
max_week = {}
for season in out_df["Season"].unique():
    max_week[season] = {}
    temp_df = out_df["Week"][out_df["Season"] == season].astype("str")
    week_values = [int(i) for i in temp_df if not re.search('[a-zA-Z]', i)]
    #week_values = [int(i) for i in week_values]
    max_week[season] = max(week_values)

### Replacement Keys

In [22]:
week_correction_dict = {}
for season in max_week.keys():
    week_correction_dict[season] = {}
    temp_unique_weeks = unique_weeks[season]
    temp_max_weeks = max_week[season]
    
    if (season < 1966):
        week_correction_dict[season]["Division"] =temp_max_weeks + 1
        week_correction_dict[season]["Champ"] = temp_max_weeks + 2
    elif (season >= 1966) & (season <= 1978):
        week_correction_dict[season]["Division"] =temp_max_weeks + 1
        week_correction_dict[season]["Champ"] = temp_max_weeks + 2
        week_correction_dict[season]["Super Bowl"] = temp_max_weeks + 3
    else:
        week_correction_dict[season]["Wild Card"] = temp_max_weeks + 1
        week_correction_dict[season]["Division"] =temp_max_weeks + 2
        week_correction_dict[season]["Champ"] = temp_max_weeks + 3
        week_correction_dict[season]["Super Bowl"] = temp_max_weeks + 4

In [47]:
def do_complete_week_col(season_value, week_value):
    for old, new in week_correction_dict[season_value].items():
        new_week = week_value.replace(old, str(new))
    return new_week

In [48]:
out_df["ModifiedWeek"] = out_df.apply(lambda row : do_complete_week_col(row["Season"], row["Week"]), axis=1)

In [60]:
out_df

## Super Bowl History

In [61]:
sb_games = out_df[out_df["Week"] == "Super Bowl"]

TypeError: 'NoneType' object is not subscriptable