In [11]:
import pandas as pd
import numpy as np
import csv

In [12]:
TARGET_FOLDER = "intermediate-data/"
SOURCE_FOLDER = "raw-data/"
SEARCH_YEARS = ['2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022']


In [22]:
# standardizes team name for indexing
def standardize_nfl_team_name (original_series) : 
    converted_team_names = []
    for raw_content in original_series :
        new_team_name = ''
        if raw_content == "Eagles" or raw_content == "PHI" :
            new_team_name =  "Philadelphia Eagles"
        elif raw_content == "Seahawks" or raw_content == "SEA" :
            new_team_name =  "Seattle Seahawks"
        elif raw_content == "Titans" or raw_content == "TEN" :
            new_team_name =  "Tennessee Titans"
        elif raw_content == "Broncos" or raw_content == "DEN" :
            new_team_name =  "Denver Broncos"
        elif raw_content == "Giants" or raw_content == "NYG" :
            new_team_name =  "New York Giants"
        elif raw_content == "Texans" or raw_content == "HOU" :
            new_team_name =  "Houston Texans"
        elif raw_content == "Saints" or raw_content == "NO" :
            new_team_name =  "New Orleans Saints"
        elif raw_content == "Vikings" or raw_content == "MN" :
            new_team_name =  "Minnesota Vikings"
        elif raw_content == "Chargers" or raw_content == "LAC" :
            new_team_name =  "Los Angeles Chargers"
        elif raw_content == "Falcons" or raw_content == "ATL" :
            new_team_name =  "Atlanta Falcons"
        elif raw_content == "Lions" or raw_content == "DET" :
            new_team_name =  "Detroit Lions"
        elif raw_content == "49ers" or raw_content == "SF" :
            new_team_name =  "San Francisco 49ers"
        elif raw_content == "Jets" or raw_content == "NYJ" :
            new_team_name =  "New York Jets"
        elif raw_content == "Packers" or raw_content == "GB" :
            new_team_name =  "Greenbay Packers"
        elif raw_content == "Patriots" or raw_content == "NE" :
            new_team_name =  "New England Patriots"
        elif raw_content == "Commanders" or raw_content == "WAS" :
            new_team_name =  "Washington Commanders"
        elif raw_content == "Bears" or raw_content == "CHI" :
            new_team_name =  "Chicago Bears"
        elif raw_content == "Panthers" or raw_content == "CAR" :
            new_team_name =  "Carolina Panthers"
        elif raw_content == "Cardinals" or raw_content == "ARI" :
            new_team_name =  "Arizona Cardinals"
        elif raw_content == "Cowboys" or raw_content == "DAL" :
            new_team_name =  "Dallas Cowboys"
        elif raw_content == "Colts" or raw_content == "IND" :
            new_team_name =  "Indianapolis Colts"
        elif raw_content == "Chiefs" or raw_content == "KC" :
            new_team_name =  "Kansas City Chiefs"
        elif raw_content == "Colts" or raw_content == "IND" :
            new_team_name =  "Indianapolis Colts"
        elif raw_content == "Rams" or raw_content == "LA" :
            new_team_name =  "Los Angeles Rams"
        elif raw_content == "Buccaneers" or raw_content == "TB" :
            new_team_name =  "Tampa Bay Buccaneers"
        elif raw_content == "Steelers" or raw_content == "PIT" :
            new_team_name =  "Pittsburgh Steelers"
        elif raw_content == "Jaguars" or raw_content == "JAX" :
            new_team_name =  "Jacksonville Jaguars"
        elif raw_content == "Bengals" or raw_content == "CIN" :
            new_team_name =  "Cincinnati Bengals"
        elif raw_content == "Ravens" or raw_content == "BAL" :
            new_team_name =  "Baltimore Ravens"
        elif raw_content == "Bills" or raw_content == "BUF" :
            new_team_name = "Buffalo Bills"
        elif raw_content == "Dolphins" or raw_content == "MIA" :
            new_team_name =  "Miami Dolphins"
        elif raw_content == "Browns" or raw_content == "CLE" :
            new_team_name =  "Cleveland Browns"
        elif raw_content == "Raiders" or raw_content == "LV" :
            new_team_name =  "Las Vegas Raiders"
        else :
            raise Exception(f"No matching index found for [{raw_content}]")

        converted_team_names.append(new_team_name)

    return converted_team_names
        


In [59]:
# Combine nfl team spending data sets into one data set while converting money-string to numeric value
def create_nfl_team_spending_dataframe () :
    final_df = pd.DataFrame()
    for search_year in SEARCH_YEARS :
        # load CSV
        df = pd.read_csv(f"raw-data/nfl_team_spending_{search_year}_table.csv")

        df_columns = df.columns

        for col in df_columns :
            if col == 'Team' :
                # standardize team names
                df[col] = standardize_nfl_team_name(df[col])
            else :
                # Remove currency symbols and commas, then convert to float
                df[col] = df[col].replace(r'[$,]', '', regex=True).astype(int)

            # add year as column
            df['year'] = search_year

        final_df = pd.concat([final_df, df], ignore_index=True)

    # reindex using row count
    # row_index = range(len(final_df))
    # final_df.index = row_index
    final_df.reset_index(drop=True)
    final_df.drop(df.columns[df.columns.str.contains('unnamed', case=False)], axis=1, inplace=True)

    return final_df


In [60]:
df = create_nfl_team_spending_dataframe ()
df.head(65)

Unnamed: 0,Team,QB,RB,WR,TE,OL,Offense,IDL,EDGE,LB,S,CB,Defense,year
0,Philadelphia Eagles,13385137,10203112,19241989,5509036,25664899,74004173,5264666,10241101,10004817,6920158,6032738,38463480,2013
1,Seattle Seahawks,1557085,10799653,16831423,12778788,27955261,69922210,7701509,25013832,7377232,8579619,3507877,52180069,2013
2,Tennessee Titans,6336958,15376098,12686896,6979500,26721984,68101436,6496528,8776293,5740835,10777066,9602477,41393199,2013
3,Denver Broncos,18716295,5070632,10120554,8013902,25754095,67675478,6880138,10234767,8030669,5894346,17559225,48599145,2013
4,New York Giants,21998400,5036739,8677626,2998913,24235900,62947578,5118995,9523813,7916847,12704990,11202110,46466755,2013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,Buffalo Bills,4663940,8362867,8979223,4489668,14455361,40951059,16611040,13270000,5414838,10503423,7069748,52869049,2014
61,Miami Dolphins,5859205,2412352,8242874,6007127,17575011,40096569,15944118,11848139,9006508,5027138,13351955,55177858,2014
62,Cleveland Browns,4171299,2200883,5666354,2620527,23789034,38448097,19697955,14276906,7765000,2500318,10928931,55169110,2014
63,Las Vegas Raiders,1146517,11735884,4609651,1496363,10275023,29263438,5163512,2078750,6548030,6764294,6782330,27336916,2014


In [61]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 320 entries, 0 to 319
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Team     320 non-null    object
 1   QB       320 non-null    int32 
 2   RB       320 non-null    int32 
 3   WR       320 non-null    int32 
 4   TE       320 non-null    int32 
 5   OL       320 non-null    int32 
 6   Offense  320 non-null    int32 
 7   IDL      320 non-null    int32 
 8   EDGE     320 non-null    int32 
 9   LB       320 non-null    int32 
 10  S        320 non-null    int32 
 11  CB       320 non-null    int32 
 12  Defense  320 non-null    int32 
 13  year     320 non-null    object
dtypes: int32(12), object(2)
memory usage: 20.1+ KB


In [63]:
# Save results in file: intermediate/nfl_team_spending_2013_2022.csv
df.to_csv("intermediate-data/nfl_team_spending_2013_2022.csv", index=True)