## Collecting Cap Data

This project will utilize cap data from Spotrac, as Spotrac is a comprehensive site that provides cap data for all NFL teams. Spotrac goes back to 2011, offering a decent range of historical data.

In [23]:
# IMPORTS
import csv
import requests
import bs4
import pandas as pd

In [24]:
years = [x for x in range(2011, 2025, 1)]

In [25]:
teams = {
    'ARI': 'Arizona Cardinals',
    'ATL': 'Atlanta Falcons',
    'BAL': 'Baltimore Ravens',
    'BUF': 'Buffalo Bills',
    'CAR': 'Carolina Panthers',
    'CHI': 'Chicago Bears',
    'CIN': 'Cincinnati Bengals',
    'CLE': 'Cleveland Browns',
    'DAL': 'Dallas Cowboys',
    'DEN': 'Denver Broncos',
    'DET': 'Detroit Lions',
    'GB': 'Green Bay Packers',
    'HOU': 'Houston Texans',
    'IND': 'Indianapolis Colts',
    'JAX': 'Jacksonville Jaguars',
    'KC': 'Kansas City Chiefs',
    'LV': 'Las Vegas Raiders',
    'LAC': 'Los Angeles Chargers',
    'LAR': 'Los Angeles Rams',
    'MIA': 'Miami Dolphins',
    'MIN': 'Minnesota Vikings',
    'NE': 'New England Patriots',
    'NO': 'New Orleans Saints',
    'NYG': 'New York Giants',
    'NYJ': 'New York Jets',
    'PHI': 'Philadelphia Eagles',
    'PIT': 'Pittsburgh Steelers',
    'SF': 'San Francisco 49ers',
    'SEA': 'Seattle Seahawks',
    'TB': 'Tampa Bay Buccaneers',
    'TEN': 'Tennessee Titans',
    'WAS': 'Washington Commanders',
}

url_teams = {
    'ARI': 'arizona-cardinals',
    'ATL': 'atlanta-falcons',
    'BAL': 'baltimore-ravens',
    'BUF': 'buffalo-bills',
    'CAR': 'carolina-panthers',
    'CHI': 'chicago-bears',
    'CIN': 'cincinnati-bengals',
    'CLE': 'cleveland-browns',
    'DAL': 'dallas-cowboys',
    'DEN': 'denver-broncos',
    'DET': 'detroit-lions',
    'GB': 'green-bay-packers',
    'HOU': 'houston-texans',
    'IND': 'indianapolis-colts',
    'JAX': 'jacksonville-jaguars',
    'KC': 'kansas-city-chiefs',
    'LV': 'las-vegas-raiders',
    'LAC': 'los-angeles-chargers',
    'LAR': 'los-angeles-rams',
    'MIA': 'miami-dolphins',
    'MIN': 'minnesota-vikings',
    'NE': 'new-england-patriots',
    'NO': 'new-orleans-saints',
    'NYG': 'new-york-giants',
    'NYJ': 'new-york-jets',
    'PHI': 'philadelphia-eagles',
    'PIT': 'pittsburgh-steelers',
    'SF': 'san-francisco-49ers',
    'SEA': 'seattle-seahawks',
    'TB': 'tampa-bay-buccaneers',
    'TEN': 'tennessee-titans',
    'WAS': 'washington-commanders',
}

### Testing

In [26]:
# Testing
url = 'https://www.spotrac.com/nfl/philadelphia-eagles/overview/_/year/2024/sort/cap_total'

table_ind = {
    0: 'Active Roster',
    1: 'IR',
    2: 'Practice Squad',
    3: 'Dead Money'
}

text = requests.get(url).text

soup = bs4.BeautifulSoup(text, 'html.parser')
tables = soup.find_all('table')

In [27]:
def clean_name(word):
    split_pt = word.find('\n')
    return word[split_pt+1:]

def clean_header(word):
    split_pt = word.find('\n')
    return word[:split_pt]

### Active Roster

In [28]:
curr = tables[0]
headers = [header.text.strip() for header in curr.find_all('th')]
rows = []
for row in curr.find_all('tr')[1:]:
    rows.append([cell.text.strip() for cell in row.find_all('td')])

In [29]:
new_headers = [clean_header(x) if '\n' in x else x for x in headers]
new_headers[1] = 'Player'

In [30]:
active_df = pd.DataFrame(rows, columns=new_headers)
active_df.head()

Unnamed: 0,Unnamed: 1,Player,Pos,Age,Cap Hit,Cap Hit Pct,Dead Cap,Cash Total,Free Agent
0,1,Johnson\nLane Johnson,RT,34,"$15,865,000",6.21%,"($45,065,000)","$20,000,000",2027
1,2,Hurts\nJalen Hurts,QB,26,"$13,558,800",5.31%,"($120,871,200)","$40,000,000",2029
2,3,Brown\nA.J. Brown,WR,27,"$11,878,894",4.65%,"($73,220,682)","$21,250,000",2030
3,4,Mailata\nJordan Mailata,LT,27,"$11,616,000",4.55%,"($77,892,000)","$21,875,000",2029
4,5,Slay\nDarius Slay,CB,33,"$10,666,000",4.18%,"($33,357,932)","$11,350,000",2026


In [31]:
active_df.drop(columns=[''], inplace=True)

In [32]:
for col in ['Cap Hit', 'Dead Cap', 'Cash Total']:
    active_df[col] = active_df[col].str.replace('$', '').str.replace(',', '')
    if (col == 'Dead Cap'):
        active_df[col] = active_df[col].str.replace('(', '')
        active_df[col] = active_df[col].str.replace(')', '')
        active_df[col] = active_df[col].str.replace('-', '0')
    active_df[col] = active_df[col].astype(float)

In [33]:
active_df.head()

Unnamed: 0,Player,Pos,Age,Cap Hit,Cap Hit Pct,Dead Cap,Cash Total,Free Agent
0,Johnson\nLane Johnson,RT,34,15865000.0,6.21%,45065000.0,20000000.0,2027
1,Hurts\nJalen Hurts,QB,26,13558800.0,5.31%,120871200.0,40000000.0,2029
2,Brown\nA.J. Brown,WR,27,11878894.0,4.65%,73220682.0,21250000.0,2030
3,Mailata\nJordan Mailata,LT,27,11616000.0,4.55%,77892000.0,21875000.0,2029
4,Slay\nDarius Slay,CB,33,10666000.0,4.18%,33357932.0,11350000.0,2026


In [34]:
active_df['Cap Hit Pct'] = active_df['Cap Hit Pct'].str.replace('%', '').astype(float)
active_df['Player'] = active_df['Player'].apply(lambda x: clean_name(x))
active_df.head()

Unnamed: 0,Player,Pos,Age,Cap Hit,Cap Hit Pct,Dead Cap,Cash Total,Free Agent
0,Lane Johnson,RT,34,15865000.0,6.21,45065000.0,20000000.0,2027
1,Jalen Hurts,QB,26,13558800.0,5.31,120871200.0,40000000.0,2029
2,A.J. Brown,WR,27,11878894.0,4.65,73220682.0,21250000.0,2030
3,Jordan Mailata,LT,27,11616000.0,4.55,77892000.0,21875000.0,2029
4,Darius Slay,CB,33,10666000.0,4.18,33357932.0,11350000.0,2026


### IR

In [35]:
curr = tables[1]
headers = [header.text.strip() for header in curr.find_all('th')]
rows = []
for row in curr.find_all('tr')[1:]:
    rows.append([cell.text.strip() for cell in row.find_all('td')])

In [36]:
new_headers = [clean_header(x) if '\n' in x else x for x in headers]
new_headers[1] = 'Player'

In [37]:
ir_df = pd.DataFrame(rows, columns=new_headers)
ir_df.head()

Unnamed: 0,Unnamed: 1,Player,Pos,Age,Cap Hit,Cap Hit Pct,Dead Cap,Cash Total,Free Agent
0,1,Graham\nBrandon Graham,OLB,36,"$8,198,000",3.21%,"($17,216,000)","$4,000,000",2025
1,2,Bradberry\nJames Bradberry,S,31,"$4,305,000",1.69%,"($15,118,000)","$8,850,000",2026
2,3,Young\nByron Young,DT,24,"$1,007,669",0.39%,"($1,007,669)","$1,007,669",2027
3,4,Covey\nBritain Covey,WR,27,"$985,000",0.39%,-,"$985,000",2025
4,5,VanSumeren\nBen VanSumeren,ILB,24,"$915,000",0.36%,-,"$915,000",2025


In [38]:
for col in ['Cap Hit', 'Dead Cap', 'Cash Total']:
    ir_df[col] = ir_df[col].str.replace('$', '').str.replace(',', '')
    if (col == 'Dead Cap'):
        ir_df[col] = ir_df[col].str.replace('(', '')
        ir_df[col] = ir_df[col].str.replace(')', '')
        ir_df[col] = ir_df[col].str.replace('-', '0')
    ir_df[col] = ir_df[col].astype(float)

In [39]:
ir_df.drop(columns=[''], inplace=True)

In [None]:
ir_df['Cap Hit Pct'] = ir_df['Cap Hit Pct'].str.replace('%', '').astype(float)
ir_df['Player'] = ir_df['Player'].apply(lambda x: clean_name(x))

Unnamed: 0,Player,Pos,Age,Cap Hit,Cap Hit Pct,Dead Cap,Cash Total,Free Agent
0,Brandon Graham,OLB,36,8198000.0,3.21,17216000.0,4000000.0,2025
1,James Bradberry,S,31,4305000.0,1.69,15118000.0,8850000.0,2026
2,Byron Young,DT,24,1007669.0,0.39,1007669.0,1007669.0,2027
3,Britain Covey,WR,27,985000.0,0.39,0.0,985000.0,2025
4,Ben VanSumeren,ILB,24,915000.0,0.36,0.0,915000.0,2025


In [None]:
ir_df.head()

Link for SB winner each year: https://www.pro-football-reference.com/years/

Link for standings each year: https://www.pro-football-reference.com/years/2024/