## Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

%load_ext autoreload
%autoreload 2

## 01 Load the data

In [2]:
file_name = 'data/nfl.csv'
nfl = pd.read_csv(file_name)

In [3]:
nfl.head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Tremon Smith,Philadelphia Eagles,RB,7/20/1996,570000
1,Shawn Williams,Cincinnati Bengals,SS,5/13/1991,3500000
2,Adam Butler,New England Patriots,DT,4/12/1994,645000
3,Derek Wolfe,Denver Broncos,DE,2/24/1990,8000000
4,Jake Ryan,Jacksonville Jaguars,OLB,2/27/1992,1000000


In [4]:
nfl.dtypes

Name        object
Team        object
Position    object
Birthday    object
Salary       int64
dtype: object

In [5]:
# Check for missing values
nfl.isnull().sum()

Name        0
Team        0
Position    0
Birthday    0
Salary      0
dtype: int64

In [7]:
# Convert the 'date' column to datetime format
nfl['Birthday'] = pd.to_datetime(nfl['Birthday'])

In [8]:
nfl.dtypes

Name                object
Team                object
Position            object
Birthday    datetime64[ns]
Salary               int64
dtype: object

## 02 Set the index

In [9]:
nfl.head()

Unnamed: 0,Name,Team,Position,Birthday,Salary
0,Tremon Smith,Philadelphia Eagles,RB,1996-07-20,570000
1,Shawn Williams,Cincinnati Bengals,SS,1991-05-13,3500000
2,Adam Butler,New England Patriots,DT,1994-04-12,645000
3,Derek Wolfe,Denver Broncos,DE,1990-02-24,8000000
4,Jake Ryan,Jacksonville Jaguars,OLB,1992-02-27,1000000


In [10]:
# Set the DataFrame index to store the player names 'Name'
nfl = nfl.set_index('Name')

In [11]:
nfl.head()

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tremon Smith,Philadelphia Eagles,RB,1996-07-20,570000
Shawn Williams,Cincinnati Bengals,SS,1991-05-13,3500000
Adam Butler,New England Patriots,DT,1994-04-12,645000
Derek Wolfe,Denver Broncos,DE,1990-02-24,8000000
Jake Ryan,Jacksonville Jaguars,OLB,1992-02-27,1000000


## 03 Count the number of players

In [17]:
nfl.head()

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tremon Smith,Philadelphia Eagles,RB,1996-07-20,570000
Shawn Williams,Cincinnati Bengals,SS,1991-05-13,3500000
Adam Butler,New England Patriots,DT,1994-04-12,645000
Derek Wolfe,Denver Broncos,DE,1990-02-24,8000000
Jake Ryan,Jacksonville Jaguars,OLB,1992-02-27,1000000


In [20]:
nfl.shape, nfl['Team'].nunique()

((1655, 4), 32)

In [22]:
# How can we count the number of players per team in this data set?
nfl['Team'].value_counts().head()   

Team
New York Jets          58
Kansas City Chiefs     56
Washington Redskins    56
New Orleans Saints     55
San Francisco 49Ers    55
Name: count, dtype: int64

## 04 Five highest-paid players

In [24]:
# Who are the five highest-paid players?
nfl.nlargest(5, 'Salary')[['Team', 'Position', 'Salary']]

Unnamed: 0_level_0,Team,Position,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Kirk Cousins,Minnesota Vikings,QB,27500000
Marcus Mariota,Tennessee Titans,QB,20922000
Jameis Winston,Tampa Bay Buccaneers,QB,20922000
Derek Carr,Oakland Raiders,QB,19900000
Jimmy Garoppolo,San Francisco 49Ers,QB,17200000


## 05 Sort the values

In [25]:
# How can we sort the data set first by teams in alphabetical order and then by salary in descending order?
nfl.sort_values(by=['Team', 'Salary'], ascending=[True, False]).head()

Unnamed: 0_level_0,Team,Position,Birthday,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chandler Jones,Arizona Cardinals,OLB,1990-02-27,16500000
Patrick Peterson,Arizona Cardinals,CB,1990-07-11,11000000
Larry Fitzgerald,Arizona Cardinals,WR,1983-08-31,11000000
David Johnson,Arizona Cardinals,RB,1991-12-16,5700000
Justin Pugh,Arizona Cardinals,G,1990-08-15,5000000


## 06 Oldest player

In [28]:
# Who is the oldest player on the New York Jets roster, and what is his birthday?

# Add a new column 'Age' to the DataFrame
nfl['Age'] = (pd.to_datetime('today') - nfl['Birthday']).dt.days / 365

# Find the oldest player in the New York Jets team
mask_jets = nfl['Team'] == 'New York Jets'
oldest_jets_player = nfl[mask_jets].nlargest(1, 'Age')[['Age', 'Birthday']]

nfl[mask_jets].sort_values(by='Age', ascending=False).head()

Unnamed: 0_level_0,Team,Position,Birthday,Salary,Age
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ryan Kalil,New York Jets,C,1985-03-29,2400000,40.317808
Steve McLendon,New York Jets,NT,1986-01-03,1500000,39.550685
Albert McClellan,New York Jets,ILB,1986-06-04,805000,39.134247
Demaryius Thomas,New York Jets,WR,1987-10-25,1200000,37.742466
Bilal Powell,New York Jets,RB,1988-10-04,930000,36.79726


In [29]:
nfl.loc[mask_jets, ['Age', 'Birthday']].sort_values(by='Age', ascending=False).head()

Unnamed: 0_level_0,Age,Birthday
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ryan Kalil,40.317808,1985-03-29
Steve McLendon,39.550685,1986-01-03
Albert McClellan,39.134247,1986-06-04
Demaryius Thomas,37.742466,1987-10-25
Bilal Powell,36.79726,1988-10-04
