# Positional Spending Data
**Name:** Jaime Avendaño  
**Date:** 5/20/2021  
**Data:** https://overthecap.com/positional-spending/  
**NFL Logos:** https://raw.githubusercontent.com/statsbylopez/BlogPosts/master/nfl_teamlogos.csv  
<br><br>
This notebook scrapes the data from overthecap.com and stored a parquet file to be used for analysis.  
NFL Logos are also pulled and resized for use in visualizations.

In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import urllib.request
import requests
from bs4 import BeautifulSoup
from PIL import Image

import janitor

In [24]:
spending_url = 'https://overthecap.com/positional-spending/'
page = requests.get(spending_url)
page

<Response [200]>

In [25]:
soup = BeautifulSoup(page.content, 'html.parser')
position_spending_content = soup.find_all('table')
dfs = pd.read_html(str(position_spending_content))

In [26]:
year = 2013
for df in dfs:
    df['year'] = year
    year += 1
nfl_df = pd.concat(dfs)
nfl_df.shape

(383, 14)

In [27]:
nfl_df = nfl_df.clean_names()\
            .filter_on('year <= 2021')
nfl_df.shape

(288, 14)

In [30]:
nfl_df.loc[:, nfl_df.columns[1:-1]] = nfl_df[nfl_df.columns[1:-1]].replace('[\$,]', '', regex=True).astype(int)
nfl_df.head()

Unnamed: 0,team,qb,rb,wr,te,ol,offense,idl,edge,lb,s,cb,defense,year
0,Eagles,13385137,10203112,19241989,5509036,25664899,74004173,5264666,10241101,10004817,6920158,6032738,38463480,2013
1,Seahawks,1557085,10799653,16831423,12778788,27955261,69922210,7701509,25013832,7377232,8579619,3507877,52180069,2013
2,Titans,6336958,15376098,12686896,6979500,26721984,68101436,6496528,8776293,5740835,10777066,9602477,41393199,2013
3,Broncos,18716295,5070632,10120554,8013902,24318758,66240141,6880138,5313554,12951882,5894346,17559225,48599145,2013
4,Giants,21998400,5036739,8677626,2998913,24235900,62947578,5118995,9523813,7916847,12704990,11202110,46466755,2013


In [7]:
# Data from https://en.wikipedia.org/wiki/Salary_cap
nfl_salary_cap = [[2013, 123], [2014, 133], [2015, 143.28], [2016, 155.27], [2017, 167], [2018, 177.2], [2019, 188.2], [2020, 198.2], [2021, 182.5]]
cap_df = pd.DataFrame(nfl_salary_cap, columns=['year', 'cap'])
cap_df.cap = cap_df.cap * 1e6
cap_df

Unnamed: 0,year,cap
0,2013,123000000.0
1,2014,133000000.0
2,2015,143280000.0
3,2016,155270000.0
4,2017,167000000.0
5,2018,177200000.0
6,2019,188200000.0
7,2020,198200000.0
8,2021,182500000.0


In [8]:
nfl_df = nfl_df.join(cap_df.set_index('year'), on='year')
nfl_df.team = nfl_df.team.astype('category')
nfl_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 288 entries, 0 to 31
Data columns (total 15 columns):
 #   Column   Non-Null Count  Dtype   
---  ------   --------------  -----   
 0   team     288 non-null    category
 1   qb       288 non-null    int32   
 2   rb       288 non-null    int32   
 3   wr       288 non-null    int32   
 4   te       288 non-null    int32   
 5   ol       288 non-null    int32   
 6   offense  288 non-null    int32   
 7   idl      288 non-null    int32   
 8   edge     288 non-null    int32   
 9   lb       288 non-null    int32   
 10  s        288 non-null    int32   
 11  cb       288 non-null    int32   
 12  defense  288 non-null    int32   
 13  year     288 non-null    int64   
 14  cap      288 non-null    float64 
dtypes: category(1), float64(1), int32(12), int64(1)
memory usage: 22.0 KB


In [9]:
nfl_df.to_parquet('nfl_df.parquet')

## Pulling NFL Logos
Code based on: https://gist.github.com/Deryck97/dff8d33e9f841568201a2a0d5519ac5e

In [2]:
logo_url = pd.read_csv('https://raw.githubusercontent.com/statsbylopez/BlogPosts/master/nfl_teamlogos.csv')

In [17]:
logo_url['team_short'] = logo_url.team.str.split().str[-1]
logo_url.team_short[logo_url.team_short == 'Team'] = 'Washington'
logo_url.head()

Unnamed: 0,team,team_code,url,team_short
0,Arizona Cardinals,ARI,https://upload.wikimedia.org/wikipedia/en/thum...,Cardinals
1,Atlanta Falcons,ATL,https://upload.wikimedia.org/wikipedia/en/thum...,Falcons
2,Baltimore Ravens,BAL,https://upload.wikimedia.org/wikipedia/en/thum...,Ravens
3,Buffalo Bills,BUF,https://upload.wikimedia.org/wikipedia/en/thum...,Bills
4,Carolina Panthers,CAR,https://upload.wikimedia.org/wikipedia/en/thum...,Panthers


In [23]:
for i in range(0,len(logo_url)):
    img_path = os.getcwd() + '/Logos/' + logo_url.team_short.iloc[i] + '.png'
    urllib.request.urlretrieve(logo_url.url.iloc[i], img_path)
    img = Image.open(img_path)
    img.thumbnail((100, 100), Image.ANTIALIAS)
    img.save(img_path)