In [30]:
import pandas as pd
import json
import os

In [31]:
# Read CSV files into pandas DataFrames
df_salaries = pd.read_csv('nfl_salaries_2011-2022.csv')
df_superbowl = pd.read_csv('Super_Bowl_Winners.csv')
df_playoff = pd.read_csv('nfl_playoff_win_loss_records_2011_to_2022.csv')
df_reg_season = pd.read_csv('nfl_reg_season_win_loss_records_2011_to_2022.csv')

In [32]:
df_salaries

Unnamed: 0,index,year,team,name,position,cap_hit,cap_percentage
0,1,2011,arizona-cardinals,Larry Fitzgerald,WR,"$2,000,000",13.28
1,2,2011,arizona-cardinals,Levi Brown,LT,"$6,777,500",7.70
2,3,2011,arizona-cardinals,Adrian Wilson,S,"$3,500,000",6.17
3,4,2011,arizona-cardinals,Darnell Dockett,DE,"$2,350,000",4.58
4,5,2011,arizona-cardinals,Derek Anderson,QB,"$4,087,500",4.40
...,...,...,...,...,...,...,...
20663,20664,2022,washington-football-team,David Bada,DT,"$705,000",0.04
20664,20665,2022,washington-football-team,Nate Gerry,OLB,"$57,500",0.03
20665,20666,2022,washington-football-team,Jaret Patterson,RB,"$825,000",0.02
20666,20667,2022,washington-football-team,Alex Akingbulu,T,"$705,000",0.02


In [33]:
df_playoff 

Unnamed: 0,Year,Team,Win,Loss,Tie
0,2011,NY Giants,4,0,0
1,2011,New England,2,1,0
2,2011,Baltimore,1,1,0
3,2011,Houston,1,1,0
4,2011,Denver,1,1,0
...,...,...,...,...,...
145,2022,Minnesota,0,1,0
146,2022,Baltimore,0,1,0
147,2022,LA Chargers,0,1,0
148,2022,Seattle,0,1,0


In [34]:
df_reg_season

Unnamed: 0,Year,Team,Win,Loss,Tie
0,2011,Green Bay,15,1,0
1,2011,San Francisco,13,3,0
2,2011,New England,13,3,0
3,2011,New Orleans,13,3,0
4,2011,Baltimore,12,4,0
...,...,...,...,...,...
379,2022,LA Rams,5,12,0
380,2022,Indianapolis,4,12,1
381,2022,Arizona,4,13,0
382,2022,Houston,3,13,1


In [35]:
df_superbowl

Unnamed: 0,year,team,superbowl wins
0,2011,NY Giants,1
1,2012,Baltimore,1
2,2013,Seattle,1
3,2014,New England,1
4,2015,Denver,1
5,2016,New England,1
6,2017,Philadelphia,1
7,2018,New England,1
8,2019,Kansas City,1
9,2020,Tampa Bay,1


In [36]:
#clean and standardize data 

# Make cap % a float
df_salaries['cap_percentage'] = df_salaries['cap_percentage'].astype(float)

# standardize team name to just include city first name
def clean_team_name(team):
    parts = team.split('-')
    city_name = parts[:-1]
    if city_name[-1] == 'football':
        city_name = city_name[:-1]
    return ' '.join(word.title() for word in city_name)

# Apply the custom function to the 'team' column in highest_salary_df
df_salaries['team'] = df_salaries['team'].apply(clean_team_name)

#Standardize New York to be either NY Giants or NY Jets
ny_indices = df_salaries[df_salaries['team'] == 'New York'].index

# Group by 'year' and iterate through each group to replace 'New York' with 'NY Giants' and 'NY Jets'
for _, group in df_salaries[df_salaries['team'] == 'New York'].groupby('year'):
    first_instance = True
    for idx in group.index:
        if first_instance:
            df_salaries.loc[idx, 'team'] = 'NY Giants'
            first_instance = False
        else:
            df_salaries.loc[idx, 'team'] = 'NY Jets'

df_salaries['cap_hit'] = df_salaries['cap_hit'].replace('-', '0')            
df_salaries['cap_hit'] = df_salaries['cap_hit'].str.replace('$', '').str.replace(',', '').astype(float)

df_salaries

  df_salaries['cap_hit'] = df_salaries['cap_hit'].str.replace('$', '').str.replace(',', '').astype(float)


Unnamed: 0,index,year,team,name,position,cap_hit,cap_percentage
0,1,2011,Arizona,Larry Fitzgerald,WR,2000000.0,13.28
1,2,2011,Arizona,Levi Brown,LT,6777500.0,7.70
2,3,2011,Arizona,Adrian Wilson,S,3500000.0,6.17
3,4,2011,Arizona,Darnell Dockett,DE,2350000.0,4.58
4,5,2011,Arizona,Derek Anderson,QB,4087500.0,4.40
...,...,...,...,...,...,...,...
20663,20664,2022,Washington,David Bada,DT,705000.0,0.04
20664,20665,2022,Washington,Nate Gerry,OLB,57500.0,0.03
20665,20666,2022,Washington,Jaret Patterson,RB,825000.0,0.02
20666,20667,2022,Washington,Alex Akingbulu,T,705000.0,0.02


In [37]:
# 2. Create structure for storing data
result = []

In [38]:
# Create a dictionary to make data access easier for filling
data_dict = {}

In [39]:
# 3. Populate the structure
# - Salaries
for index, row in df_salaries.iterrows():
    year = str(row['year'])
    team = row['team']
    player_data = {
        'year': year,
        'team': team,
        'name': row['name'],
        'position': row['position'],
        'cap_hit': row['cap_hit'],
        'cap_percentage': str(row['cap_percentage'])
    }
    
    if year not in data_dict:
        data_dict[year] = {}
    
    if team not in data_dict[year]:
        data_dict[year][team] = {
            'players': [],
            'superbowl_winner': '',
            'win_loss': {},
            'playoff_win_loss': {}
        }
    
    data_dict[year][team]['players'].append(player_data)


In [40]:
# - Super Bowl Winners
for index, row in df_superbowl.iterrows():
    year = str(row['year'])
    team = row['team']
    if year in data_dict:
        for team_key in data_dict[year]:
            if team == team_key:
                data_dict[year][team]['superbowl_winner'] = team


In [41]:
# - Playoff Win/Loss
for index, row in df_playoff.iterrows():
    year = str(row['Year'])
    team = row['Team']
    win_loss_data = {
        'win': row['Win'],
        'loss': row['Loss'],
        'tie': row['Tie']
    }
    
    if year in data_dict and team in data_dict[year]:
        data_dict[year][team]['playoff_win_loss'] = win_loss_data


In [42]:
# - Regular Season Win/Loss
for index, row in df_reg_season.iterrows():
    year = str(row['Year'])
    team = row['Team']
    win_loss_data = {
        'win': row['Win'],
        'loss': row['Loss'],
        'tie': row['Tie']
    }
    
    if year in data_dict and team in data_dict[year]:
        data_dict[year][team]['win_loss'] = win_loss_data


In [43]:
# Convert the populated dictionary into the desired list format
for year, teams_data in data_dict.items():
    year_data = {year: teams_data}
    result.append(year_data)

In [44]:
# 4. Export to JSON
with open('seasons.json', 'w') as f:
    json.dump(result, f, indent=4)