In [172]:
import pandas as pd
import numpy as np

In [173]:
prefix = 'Career_Stats_'
defensive = pd.read_csv(prefix + 'Defensive.csv')
kickers = pd.read_csv(prefix + 'Field_Goal_Kickers.csv')
oline = pd.read_csv(prefix + 'Offensive_Line.csv')
qbs = pd.read_csv(prefix + 'Passing.csv', thousands=',')
receivers = pd.read_csv(prefix + 'Receiving.csv', thousands=',')
rushing = pd.read_csv(prefix + 'Rushing.csv')

def clean(df):
    df = df.replace(to_replace='--', value=np.nan)
    df = df.replace({',' : ''}, regex=True)
    for col in df.columns[5:]:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

defensive = clean(defensive)
kickers = clean(kickers)
oline = clean(oline)
qbs = clean(qbs)
receivers = clean(receivers)
rushing = clean(rushing)

In [174]:
active_def = defensive.loc[defensive['Year'] >= 2006]
active_kickers = kickers.loc[kickers['Year'] >= 2006]
active_oline = oline.loc[oline['Year'] >= 2006]
active_qbs = qbs.loc[qbs['Year'] >= 2006]
active_receivers = receivers.loc[receivers['Year'] >= 2006]
active_rush = rushing.loc[rushing['Year'] >= 2006]

def get_active_players(df):
    non_active_players = []
    for name, group in df.groupby(['Name']):
        if (not 2016 in group['Year'].values):
            non_active_players.append(name)
    return df[~df['Name'].isin(non_active_players)]

active_def = get_active_players(active_def)
active_kickers = get_active_players(active_kickers)
active_oline = get_active_players(active_oline)
active_qbs = get_active_players(active_qbs)
active_receivers = get_active_players(active_receivers)
active_rushers = get_active_players(active_rush)

In [175]:
def get_under3(df):
    non_under3 = []
    for name, group in df.groupby(['Name']):
        if (len(group['Year']) > 3):
            non_under3.append(name)
    df = df[~df['Name'].isin(non_under3)]
    df = df.loc[df['Year'] == 2016]
    return df

young_def = get_under3(active_def)
young_kickers = get_under3(active_kickers)
young_oline = get_under3(active_oline)
young_qbs = get_under3(active_qbs)
young_receivers = get_under3(active_receivers)
young_rbs = get_under3(active_rushers)

def get_over6(df):
    over6 = []
    for name, group in df.groupby(['Name']):
        if (len(group['Year']) >= 6):
            over6.append(name)
    df = df[df['Name'].isin(over6)]
    df = df.loc[df['Year'] == 2016]
    return df

vet_def = get_over6(active_def)
vet_kickers = get_over6(active_kickers)
vet_oline = get_over6(active_oline)
vet_qbs = get_over6(active_qbs)
vet_receivers = get_over6(active_receivers)
vet_rbs = get_over6(active_rush)

In [176]:
# Top running backs
young_rbs = young_rbs.dropna(subset=['Rushing Yards' , 'Yards Per Carry', 'Rushing TDs'], how='any')
young_rbs_by_yds = young_rbs.sort_values(['Rushing Yards'], ascending=False)
display(young_rbs_by_yds)

vet_rbs = vet_rbs.dropna(subset=['Rushing Yards', 'Yards Per Carry', 'Rushing TDs'], how='any')
vet_rbs_by_yds = vet_rbs.sort_values(['Rushing Yards'], ascending=False)
display(vet_rbs_by_yds)

Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Rushing Attempts,Rushing Attempts Per Game,Rushing Yards,Yards Per Carry,Rushing Yards Per Game,Rushing TDs,Longest Rushing Run,Rushing First Downs,Percentage of Rushing First Downs,Rushing More Than 20 Yards,Rushing More Than 40 Yards,Fumbles
6661,ezekielelliott/2555224,Elliott Ezekiel,RB,2016,Dallas Cowboys,15,322.0,21.5,1631.0,5.1,108.7,15.0,,91.0,28.3,14.0,3.0,5.0
2915,jordanhoward/2555418,Howard Jordan,RB,2016,Chicago Bears,15,252.0,16.8,1313.0,5.2,87.5,6.0,69.0,70.0,27.8,10.0,2.0,1.0
14346,jayajayi/2552582,Ajayi Jay,RB,2016,Miami Dolphins,15,260.0,17.3,1272.0,4.9,84.8,8.0,,60.0,23.1,10.0,4.0,4.0
12600,devontafreeman/2543583,Freeman Devonta,RB,2016,Atlanta Falcons,16,227.0,14.2,1079.0,4.8,67.4,11.0,,61.0,26.9,7.0,2.0,1.0
10016,melvingordon/2552469,Gordon Melvin,RB,2016,San Diego Chargers,13,254.0,19.5,997.0,3.9,76.7,10.0,48.0,61.0,24.0,7.0,3.0,2.0
1438,carloshyde/2543743,Hyde Carlos,RB,2016,San Francisco 49ers,13,217.0,16.7,988.0,4.6,76.0,6.0,47.0,48.0,22.1,7.0,2.0,4.0
5690,isaiahcrowell/2550189,Crowell Isaiah,RB,2016,Cleveland Browns,16,198.0,12.4,952.0,4.8,59.5,7.0,,45.0,22.7,8.0,3.0,2.0
5710,spencerware/2540204,Ware Spencer,RB,2016,Kansas City Chiefs,14,214.0,15.3,921.0,4.3,65.8,3.0,46.0,47.0,22.0,3.0,2.0,3.0
2713,toddgurley/2552475,Gurley Todd,RB,2016,Los Angeles Rams,16,278.0,17.4,885.0,3.2,55.3,6.0,,48.0,17.3,2.0,0.0,2.0
4852,jeremyhill/2543603,Hill Jeremy,RB,2016,Cincinnati Bengals,15,222.0,14.8,839.0,3.8,55.9,9.0,,42.0,18.9,5.0,3.0,0.0


Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Rushing Attempts,Rushing Attempts Per Game,Rushing Yards,Yards Per Carry,Rushing Yards Per Game,Rushing TDs,Longest Rushing Run,Rushing First Downs,Percentage of Rushing First Downs,Rushing More Than 20 Yards,Rushing More Than 40 Yards,Fumbles
10310,demarcomurray/2495207,Murray DeMarco,RB,2016,Tennessee Titans,16,293.0,18.3,1287.0,4.4,80.4,9.0,,64.0,21.8,4.0,2.0,3.0
2769,leseanmccoy/79607,McCoy LeSean,RB,2016,Buffalo Bills,15,234.0,15.6,1267.0,5.4,84.5,13.0,,55.0,23.5,11.0,3.0,3.0
2412,davidjohnson/2553435,Johnson David,RB,2016,Arizona Cardinals,16,293.0,18.3,1239.0,4.2,77.4,16.0,,72.0,24.6,6.0,2.0,5.0
2625,legarretteblount/497149,Blount LeGarrette,RB,2016,New England Patriots,16,299.0,18.7,1161.0,3.9,72.6,18.0,44.0,67.0,22.4,7.0,3.0,2.0
7741,markingram/2495466,Ingram Mark,RB,2016,New Orleans Saints,16,205.0,12.8,1043.0,5.1,65.2,6.0,,49.0,23.9,4.0,2.0,2.0
8070,frankgore/2506404,Gore Frank,RB,2016,Indianapolis Colts,16,263.0,16.4,1025.0,3.9,64.1,4.0,22.0,50.0,19.0,1.0,0.0,2.0
3106,jonathanstewart/949,Stewart Jonathan,RB,2016,Carolina Panthers,13,218.0,16.8,824.0,3.8,63.4,9.0,47.0,47.0,21.6,6.0,1.0,3.0
14742,mattforte/234,Forte Matt,RB,2016,New York Jets,14,218.0,15.6,813.0,3.7,58.1,7.0,32.0,35.0,16.1,5.0,0.0,1.0
5460,bilalpowell/2495328,Powell Bilal,RB,2016,New York Jets,16,131.0,8.2,722.0,5.5,45.1,3.0,,45.0,34.4,4.0,0.0,0.0
11304,ryanmathews/497188,Mathews Ryan,RB,2016,Philadelphia Eagles,13,155.0,11.9,661.0,4.3,50.8,8.0,30.0,38.0,24.5,6.0,0.0,3.0


In [177]:
### Top receivers
young_receivers = young_receivers.dropna(subset=['Receptions', 'Receiving Yards', 'Receiving TDs'], how='any')
young_wrs_by_yds = young_receivers.sort_values(['Receiving Yards'], ascending=False)
display(young_wrs_by_yds)

vet_receivers = vet_receivers.dropna(subset=['Receptions', 'Receiving Yards', 'Receiving TDs'], how='all')
vet_wrs_by_yds = vet_receivers.sort_values(['Receiving Yards'], ascending=False)
display(vet_wrs_by_yds)

Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Receptions,Receiving Yards,Yards Per Reception,Yards Per Game,Longest Reception,Receiving TDs,Receptions Longer than 20 Yards,Receptions Longer than 40 Yards,First Down Receptions,Fumbles
5985,odellbeckham/2543496,Beckham Odell,WR,2016,New York Giants,16,101.0,1367.0,13.5,85.4,,10.0,20.0,6.0,65.0,2.0
11472,mikeevans/2543468,Evans Mike,WR,2016,Tampa Bay Buccaneers,16,96.0,1321.0,13.8,82.6,,12.0,15.0,1.0,81.0,0.0
1125,brandincooks/2543498,Cooks Brandin,WR,2016,New Orleans Saints,16,78.0,1173.0,15.0,73.3,,8.0,15.0,6.0,45.0,1.0
409,amaricooper/2552487,Cooper Amari,WR,2016,Oakland Raiders,16,83.0,1153.0,13.9,72.1,,5.0,21.0,2.0,47.0,2.0
6049,michaelthomas/2556370,Thomas Michael,WR,2016,New Orleans Saints,15,92.0,1137.0,12.4,75.8,46.0,9.0,18.0,1.0,62.0,2.0
3905,jarvislandry/2543488,Landry Jarvis,WR,2016,Miami Dolphins,16,94.0,1136.0,12.1,71.0,71.0,4.0,16.0,3.0,52.0,1.0
10092,tyrellwilliams/2553913,Williams Tyrell,WR,2016,San Diego Chargers,16,69.0,1059.0,15.3,66.2,,7.0,19.0,6.0,48.0,0.0
4278,davanteadams/2543495,Adams Davante,WR,2016,Green Bay Packers,16,75.0,997.0,13.3,62.3,,12.0,17.0,4.0,45.0,2.0
6429,adamthielen/2541785,Thielen Adam,WR,2016,Minnesota Vikings,16,69.0,967.0,14.0,60.4,,5.0,16.0,2.0,44.0,1.0
15618,kelvinbenjamin/2543471,Benjamin Kelvin,WR,2016,Carolina Panthers,16,63.0,941.0,14.9,58.8,50.0,7.0,10.0,3.0,50.0,1.0


Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Receptions,Receiving Yards,Yards Per Reception,Yards Per Game,Longest Reception,Receiving TDs,Receptions Longer than 20 Yards,Receptions Longer than 40 Yards,First Down Receptions,Fumbles
5419,juliojones/2495454,Jones Julio,WR,2016,Atlanta Falcons,14,83.0,1409.0,17.0,100.6,,6.0,27.0,5.0,64.0,0.0
3494,antoniobrown/2508061,Brown Antonio,WR,2016,Pittsburgh Steelers,15,106.0,1284.0,12.1,85.6,51.0,12.0,22.0,3.0,64.0,0.0
15807,jordynelson/1032,Nelson Jordy,WR,2016,Green Bay Packers,16,97.0,1257.0,13.0,78.6,60.0,14.0,19.0,4.0,62.0,1.0
5425,dougbaldwin/2530747,Baldwin Doug,WR,2016,Seattle Seahawks,16,94.0,1128.0,12.0,70.5,59.0,7.0,16.0,5.0,52.0,1.0
770,julianedelman/238498,Edelman Julian,WR,2016,New England Patriots,16,98.0,1106.0,11.3,69.1,,3.0,11.0,1.0,55.0,1.0
1745,demaryiusthomas/497328,Thomas Demaryius,WR,2016,Denver Broncos,16,90.0,1083.0,12.0,67.7,,5.0,10.0,3.0,50.0,2.0
16830,goldentate/497326,Tate Golden,WR,2016,Detroit Lions,16,91.0,1077.0,11.8,67.3,67.0,4.0,13.0,4.0,46.0,1.0
3501,gregolsen/2495700,Olsen Greg,TE,2016,Carolina Panthers,16,80.0,1073.0,13.4,67.1,,3.0,14.0,1.0,54.0,0.0
11370,pierregarcon/2346,Garcon Pierre,WR,2016,Washington Redskins,16,79.0,1041.0,13.2,65.1,,3.0,15.0,3.0,52.0,0.0
2116,emmanuelsanders/497322,Sanders Emmanuel,WR,2016,Denver Broncos,16,79.0,1032.0,13.1,64.5,64.0,5.0,12.0,2.0,50.0,1.0


In [178]:
### Top quarterbacks
display(young_qbs)
young_qbs = young_qbs.dropna(subset=['Passing Yards', 'TD Passes', 'Ints'], how='any')
young_qbs_by_yds = young_qbs.sort_values(['Passing Yards'], ascending=False)
display(young_qbs_by_yds)

vet_qbs = vet_qbs.dropna(subset=['Passing Yards', 'TD Passes', 'Ints'], how='any')
vet_qbs_by_yds = vet_qbs.sort_values(['Passing Yards'], ascending=False)
display(vet_qbs_by_yds)

Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Passes Attempted,Passes Completed,Completion Percentage,Pass Attempts Per Game,...,TD Passes,Percentage of TDs per Attempts,Ints,Int Rate,Longest Pass,Passes Longer than 20 Yards,Passes Longer than 40 Yards,Sacks,Sacked Yards Lost,Passer Rating
16,tomsavage/2543640,Savage Tom,QB,2016,Houston Texans,3,73.0,46.0,63.0,24.3,...,0.0,0.0,0.0,0.0,32.0,5.0,0.0,5.0,36.0,80.9
394,connorcook/2555332,Cook Connor,QB,2016,Oakland Raiders,1,21.0,14.0,66.7,21.0,...,1.0,4.8,1.0,4.8,,1.0,0.0,2.0,7.0,83.4
400,jamisoncrowder/2552415,Crowder Jamison,WR,2016,Washington Redskins,16,,,,0.0,...,,,,,,,,,,0.0
918,bradwing/2540208,Wing Brad,P,2016,New York Giants,16,,,,0.0,...,,,,,,,,,,0.0
996,williesnead/2550256,Snead Willie,WR,2016,New Orleans Saints,15,1.0,1.0,100.0,0.1,...,1.0,100.0,0.0,0.0,,1.0,1.0,0.0,0.0,158.3
1243,dakprescott/2555260,Prescott Dak,QB,2016,Dallas Cowboys,16,459.0,311.0,67.8,28.7,...,23.0,5.0,4.0,0.9,,38.0,8.0,25.0,143.0,104.9
1454,marcusmariota/2552466,Mariota Marcus,QB,2016,Tennessee Titans,15,451.0,276.0,61.2,30.1,...,26.0,5.8,9.0,2.0,60.0,50.0,7.0,23.0,156.0,95.6
1736,derekcarr/2543499,Carr Derek,QB,2016,Oakland Raiders,15,560.0,357.0,63.8,37.3,...,28.0,5.0,6.0,1.1,,50.0,8.0,16.0,79.0,96.7
1814,jarvislandry/2543488,Landry Jarvis,WR,2016,Miami Dolphins,16,1.0,0.0,0.0,0.1,...,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,39.6
2515,carsonwentz/2555259,Wentz Carson,QB,2016,Philadelphia Eagles,16,607.0,379.0,62.4,37.9,...,16.0,2.6,14.0,2.3,,39.0,6.0,33.0,213.0,79.3


Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Passes Attempted,Passes Completed,Completion Percentage,Pass Attempts Per Game,...,TD Passes,Percentage of TDs per Attempts,Ints,Int Rate,Longest Pass,Passes Longer than 20 Yards,Passes Longer than 40 Yards,Sacks,Sacked Yards Lost,Passer Rating
7335,jameiswinston/2552033,Winston Jameis,QB,2016,Tampa Bay Buccaneers,16,567.0,345.0,60.8,35.4,...,28.0,4.9,18.0,3.2,,43.0,4.0,35.0,239.0,86.1
1736,derekcarr/2543499,Carr Derek,QB,2016,Oakland Raiders,15,560.0,357.0,63.8,37.3,...,28.0,5.0,6.0,1.1,,50.0,8.0,16.0,79.0,96.7
6263,blakebortles/2543477,Bortles Blake,QB,2016,Jacksonville Jaguars,16,625.0,368.0,58.9,39.1,...,23.0,3.7,16.0,2.6,,43.0,4.0,34.0,197.0,78.8
2515,carsonwentz/2555259,Wentz Carson,QB,2016,Philadelphia Eagles,16,607.0,379.0,62.4,37.9,...,16.0,2.6,14.0,2.3,,39.0,6.0,33.0,213.0,79.3
1243,dakprescott/2555260,Prescott Dak,QB,2016,Dallas Cowboys,16,459.0,311.0,67.8,28.7,...,23.0,5.0,4.0,0.9,,38.0,8.0,25.0,143.0,104.9
1454,marcusmariota/2552466,Mariota Marcus,QB,2016,Tennessee Titans,15,451.0,276.0,61.2,30.1,...,26.0,5.8,9.0,2.0,60.0,50.0,7.0,23.0,156.0,95.6
5532,trevorsiemian/2553457,Siemian Trevor,QB,2016,Denver Broncos,14,486.0,289.0,59.5,34.7,...,18.0,3.7,10.0,2.1,,37.0,8.0,31.0,187.0,84.6
4214,codykessler/2555387,Kessler Cody,QB,2016,Cleveland Browns,9,195.0,128.0,65.6,21.7,...,6.0,3.1,2.0,1.0,44.0,17.0,4.0,21.0,140.0,92.3
8108,jaredgoff/2555334,Goff Jared,QB,2016,Los Angeles Rams,7,205.0,112.0,54.6,29.3,...,5.0,2.4,7.0,3.4,66.0,12.0,1.0,26.0,222.0,63.6
7460,brycepetty/2552369,Petty Bryce,QB,2016,New York Jets,6,133.0,75.0,56.4,22.2,...,3.0,2.3,7.0,5.3,52.0,9.0,4.0,13.0,79.0,60.0


Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Passes Attempted,Passes Completed,Completion Percentage,Pass Attempts Per Game,...,TD Passes,Percentage of TDs per Attempts,Ints,Int Rate,Longest Pass,Passes Longer than 20 Yards,Passes Longer than 40 Yards,Sacks,Sacked Yards Lost,Passer Rating
956,drewbrees/2504775,Brees Drew,QB,2016,New Orleans Saints,16,673.0,471.0,70.0,42.1,...,37.0,5.5,15.0,2.2,,70.0,10.0,27.0,184.0,101.7
3321,mattryan/310,Ryan Matt,QB,2016,Atlanta Falcons,16,534.0,373.0,69.9,33.4,...,38.0,7.1,7.0,1.3,,69.0,17.0,37.0,235.0,117.1
8469,aaronrodgers/2506363,Rodgers Aaron,QB,2016,Green Bay Packers,16,610.0,401.0,65.7,38.1,...,40.0,6.6,7.0,1.1,,57.0,10.0,35.0,246.0,104.2
976,philiprivers/2506121,Rivers Philip,QB,2016,San Diego Chargers,16,578.0,349.0,60.4,36.1,...,33.0,5.7,21.0,3.6,59.0,57.0,16.0,36.0,188.0,87.9
2009,matthewstafford/79860,Stafford Matthew,QB,2016,Detroit Lions,16,594.0,388.0,65.3,37.1,...,24.0,4.0,10.0,1.7,,52.0,10.0,37.0,216.0,93.3
6394,joeflacco/382,Flacco Joe,QB,2016,Baltimore Ravens,16,672.0,436.0,64.9,42.0,...,20.0,3.0,15.0,2.2,,40.0,11.0,33.0,243.0,83.5
6680,carsonpalmer/2505245,Palmer Carson,QB,2016,Arizona Cardinals,15,597.0,364.0,61.0,39.8,...,26.0,4.4,14.0,2.3,,48.0,6.0,40.0,281.0,87.2
5158,andydalton/2495143,Dalton Andy,QB,2016,Cincinnati Bengals,16,563.0,364.0,64.7,35.2,...,18.0,3.2,8.0,1.4,,53.0,13.0,41.0,264.0,91.8
4648,elimanning/2505996,Manning Eli,QB,2016,New York Giants,16,598.0,377.0,63.0,37.4,...,26.0,4.3,16.0,2.7,,46.0,11.0,21.0,142.0,86.0
5820,sambradford/497095,Bradford Sam,QB,2016,Minnesota Vikings,15,552.0,395.0,71.6,36.8,...,20.0,3.6,5.0,0.9,,49.0,8.0,37.0,276.0,99.3


In [201]:
# Secondary
young_dbs = young_def.loc[(active_def['Position'] == 'SS') | (active_def['Position'] == 'FS') | (active_def['Position'] == 'CB')]
# Lineman
young_dline = young_def.loc[(active_def['Position'] == 'DE') | (active_def['Position'] == 'DT') | (active_def['Position'] == 'NT')]
# Linebackers
young_linebackers = young_def.loc[(active_def['Position'] == 'MLB') | (active_def['Position'] == 'ILB') 
                             | (active_def['Position'] == 'OLB')]

# Secondary
vet_dbs = vet_def.loc[(active_def['Position'] == 'SS') | (active_def['Position'] == 'FS') | (active_def['Position'] == 'CB')]
# Lineman
vet_dline = vet_def.loc[(active_def['Position'] == 'DE') | (active_def['Position'] == 'DT') | (active_def['Position'] == 'NT')]
# Linebackers
vet_linebackers = vet_def.loc[(active_def['Position'] == 'MLB') | (active_def['Position'] == 'ILB') 
                             | (active_def['Position'] == 'OLB')]

young_dbs = young_dbs.dropna(subset=['Ints', 'Passes Defended'], how='any')
young_dbs_by_pd = young_dbs.sort_values(['Passes Defended'], ascending=False)
display(young_dbs_by_pd.head())

vet_dbs = vet_dbs.dropna(subset=['Ints', 'Passes Defended'], how='any')
vet_dbs_by_pd = vet_dbs.sort_values(['Passes Defended'], ascending=False)
display(vet_dbs_by_pd.head())

young_dline = young_dline.dropna(subset=['Total Tackles', 'Sacks'], how='any')
young_dline_by_sacks = young_dline.sort_values(['Sacks'], ascending=False)
display(young_dline_by_sacks.head())

vet_dline = vet_dline.dropna(subset=['Total Tackles', 'Sacks'], how='any')
vet_dline_by_sacks = vet_dline.sort_values(['Sacks'], ascending=False)
display(vet_dline_by_sacks.head())

young_lbs = young_linebackers.dropna(subset=['Total Tackles', 'Ints', 'Sacks', 'Passes Defended'], how='any')
young_lbs_by_tackles = young_lbs.sort_values(['Total Tackles'], ascending=False)
display(young_lbs_by_tackles.head())

vet_lbs = vet_linebackers.dropna(subset=['Total Tackles', 'Ints', 'Sacks', 'Passes Defended'], how='any')
vet_lbs_by_tackles = vet_lbs.sort_values(['Total Tackles'], ascending=False)
display(vet_lbs_by_tackles.head())

Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Total Tackles,Solo Tackles,Assisted Tackles,Sacks,Safties,Passes Defended,Ints,Ints for TDs,Int Yards,Yards Per Int,Longest Int Return
9736,marcuspeters/2552488,Peters Marcus,CB,2016,Kansas City Chiefs,15,45.0,35.0,10.0,0.0,,20.0,6.0,0.0,63.0,10.5,28.0
13274,malcolmbutler/2550613,Butler Malcolm,CB,2016,New England Patriots,16,63.0,48.0,15.0,1.0,,17.0,4.0,0.0,28.0,7.0,21.0
20539,jalenramsey/2555280,Ramsey Jalen,CB,2016,Jacksonville Jaguars,16,65.0,55.0,10.0,0.0,0.0,14.0,2.0,1.0,65.0,32.5,35.0
14170,artieburns/2555344,Burns Artie,CB,2016,Pittsburgh Steelers,16,65.0,52.0,13.0,0.0,,13.0,3.0,0.0,13.0,4.3,9.0
2542,landoncollins/2552454,Collins Landon,SS,2016,New York Giants,16,125.0,100.0,25.0,4.0,0.0,13.0,5.0,1.0,72.0,14.4,


Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Total Tackles,Solo Tackles,Assisted Tackles,Sacks,Safties,Passes Defended,Ints,Ints for TDs,Int Yards,Yards Per Int,Longest Int Return
16412,brentgrimes/2506861,Grimes Brent,CB,2016,Tampa Bay Buccaneers,16,57.0,51.0,6.0,0.0,0.0,24.0,4.0,1.0,37.0,9.2,
880,dominiquerodgers-cromartie/306,Rodgers-Cromartie Dominique,CB,2016,New York Giants,15,49.0,41.0,8.0,1.0,,21.0,6.0,0.0,28.0,4.7,28.0
22325,leodismckelvin/1298,McKelvin Leodis,CB,2016,Philadelphia Eagles,13,43.0,32.0,11.0,0.0,0.0,16.0,2.0,1.0,29.0,14.5,
6539,byronmaxwell/2495310,Maxwell Byron,CB,2016,Miami Dolphins,13,53.0,43.0,10.0,0.0,,15.0,2.0,0.0,29.0,14.5,27.0
960,tramainebrock/2507917,Brock Tramaine,CB,2016,San Francisco 49ers,16,59.0,49.0,10.0,0.0,,14.0,1.0,0.0,39.0,39.0,39.0


Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Total Tackles,Solo Tackles,Assisted Tackles,Sacks,Safties,Passes Defended,Ints,Ints for TDs,Int Yards,Yards Per Int,Longest Int Return
6161,daniellehunter/2552603,Hunter Danielle,DE,2016,Minnesota Vikings,16,56.0,34.0,22.0,12.5,1.0,1.0,,,,0.0,
19562,khalilmack/2543463,Mack Khalil,DE,2016,Oakland Raiders,16,73.0,54.0,19.0,11.0,0.0,3.0,1.0,1.0,6.0,6.0,
1962,joeybosa/2555249,Bosa Joey,DE,2016,San Diego Chargers,12,41.0,29.0,12.0,10.5,,0.0,,,,0.0,
21044,frankclark/2552629,Clark Frank,DE,2016,Seattle Seahawks,15,47.0,25.0,22.0,10.0,,0.0,,,,0.0,
15106,trentmurphy/2543503,Murphy Trent,DE,2016,Washington Redskins,16,47.0,29.0,18.0,9.0,,1.0,,,,0.0,


Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Total Tackles,Solo Tackles,Assisted Tackles,Sacks,Safties,Passes Defended,Ints,Ints for TDs,Int Yards,Yards Per Int,Longest Int Return
14439,cliffavril/1330,Avril Cliff,DE,2016,Seattle Seahawks,16,39.0,20.0,19.0,11.5,,3.0,,,,0.0,
15196,cameronwake/2506314,Wake Cameron,DE,2016,Miami Dolphins,16,29.0,22.0,7.0,11.5,,2.0,1.0,0.0,12.0,12.0,12.0
14123,marioaddison/2530474,Addison Mario,DE,2016,Carolina Panthers,14,27.0,22.0,5.0,9.5,1.0,1.0,,,,0.0,
4917,genoatkins/496762,Atkins Geno,DT,2016,Cincinnati Bengals,16,32.0,21.0,11.0,9.0,,0.0,,,,0.0,
2638,calaiscampbell/744,Campbell Calais,DT,2016,Arizona Cardinals,16,53.0,34.0,19.0,8.0,1.0,6.0,1.0,0.0,1.0,1.0,1.0


Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Total Tackles,Solo Tackles,Assisted Tackles,Sacks,Safties,Passes Defended,Ints,Ints for TDs,Int Yards,Yards Per Int,Longest Int Return
5171,prestonsmith/2552276,Smith Preston,OLB,2016,Washington Redskins,16,38.0,22.0,16.0,4.5,,3.0,1.0,0.0,22.0,22.0,22.0
2030,ryanshazier/2543486,Shazier Ryan,ILB,2016,Pittsburgh Steelers,13,87.0,55.0,32.0,3.5,,9.0,3.0,0.0,0.0,0.0,0.0
18677,kwonalexander/2552592,Alexander Kwon,MLB,2016,Tampa Bay Buccaneers,16,145.0,108.0,37.0,3.0,0.0,7.0,1.0,1.0,38.0,38.0,
19580,erickendricks/2552312,Kendricks Eric,MLB,2016,Minnesota Vikings,15,109.0,70.0,39.0,2.5,0.0,9.0,1.0,1.0,77.0,77.0,
5052,lorenzomauldin/2552297,Mauldin Lorenzo,OLB,2016,New York Jets,11,16.0,7.0,9.0,2.5,,2.0,1.0,0.0,0.0,0.0,0.0


Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Total Tackles,Solo Tackles,Assisted Tackles,Sacks,Safties,Passes Defended,Ints,Ints for TDs,Int Yards,Yards Per Int,Longest Int Return
20895,lorenzoalexander/2506268,Alexander Lorenzo,OLB,2016,Buffalo Bills,16,76.0,56.0,20.0,12.5,,6.0,1.0,0.0,28.0,28.0,28.0
20733,jamesharrison/2504844,Harrison James,OLB,2016,Pittsburgh Steelers,15,53.0,39.0,14.0,5.0,,1.0,1.0,0.0,0.0,0.0,0.0
17994,thomasdavis/2506352,Davis Thomas,OLB,2016,Carolina Panthers,16,106.0,73.0,33.0,2.5,0.0,4.0,3.0,0.0,35.0,11.7,31.0
7326,wesleywoodyard/2354,Woodyard Wesley,ILB,2016,Tennessee Titans,16,57.0,42.0,15.0,2.0,,5.0,1.0,0.0,21.0,21.0,21.0
12761,paulposluszny/2495712,Posluszny Paul,MLB,2016,Jacksonville Jaguars,16,133.0,97.0,36.0,1.5,,3.0,1.0,0.0,0.0,0.0,0.0


In [194]:
young_kickers = young_kickers.dropna(subset=['FGs Made', 'FGs Attempted'], how='any')
young_kickers_by_made = young_kickers.sort_values(['FG Percentage'], ascending=False)
display(young_kickers_by_made)

vet_kickers = vet_kickers.dropna(subset=['FGs Made', 'FGs Attempted'], how='any')
vet_kickers_by_made = vet_kickers.sort_values(['FG Percentage'], ascending=False)
display(vet_kickers_by_made.iloc(0)[0]['Name'])

Unnamed: 0,Player Id,Name,Position,Year,Team,Games Played,Kicks Blocked,Longest FG Made,FGs Made,FGs Attempted,...,FGs Made 40-49 Yards,FGs Attempted 40-49 Yards,FG Percentage 40-49 Yards,FGs Made 50+ Yards,FGs Attempted 50+ Yards,FG Percentage 50+ Yards,Extra Points Attempted,Extra Points Made,Percentage of Extra Points Made,Extra Points Blocked
1988,cairosantos/2550636,Santos Cairo,K,2016,Kansas City Chiefs,16,0.0,54.0,31.0,35.0,...,6.0,7.0,85.7,2.0,2.0,100.0,39.0,36.0,92.3,1.0
1097,brandonmcmanus/2541556,McManus Brandon,K,2016,Denver Broncos,16,0.0,55.0,29.0,34.0,...,7.0,9.0,77.8,3.0,6.0,50.0,33.0,32.0,97.0,1.0
721,chrisboswell/2550545,Boswell Chris,K,2016,Pittsburgh Steelers,15,1.0,49.0,21.0,25.0,...,11.0,12.0,91.7,0.0,2.0,0.0,36.0,36.0,100.0,0.0
1642,willutz/2556601,Lutz Wil,K,2016,New Orleans Saints,16,2.0,57.0,28.0,34.0,...,8.0,8.0,100.0,3.0,7.0,42.9,50.0,49.0,98.0,1.0
982,joshlambo/2553833,Lambo Josh,K,2016,San Diego Chargers,16,1.0,47.0,26.0,32.0,...,6.0,8.0,75.0,0.0,3.0,0.0,46.0,42.0,91.3,3.0
1121,dustinhopkins/2539227,Hopkins Dustin,K,2016,Washington Redskins,16,0.0,53.0,34.0,42.0,...,7.0,9.0,77.8,3.0,7.0,42.9,39.0,36.0,92.3,0.0
504,codyparkey/2550380,Parkey Cody,K,2016,Cleveland Browns,14,0.0,51.0,20.0,25.0,...,7.0,12.0,58.3,1.0,1.0,100.0,21.0,20.0,95.2,0.0
126,jasonmyers/2553112,Myers Jason,K,2016,Jacksonville Jaguars,16,2.0,56.0,27.0,34.0,...,10.0,11.0,90.9,7.0,12.0,58.3,32.0,29.0,90.6,0.0
1431,andrewfranks/2553648,Franks Andrew,K,2016,Miami Dolphins,16,2.0,55.0,16.0,21.0,...,1.0,3.0,33.3,1.0,2.0,50.0,42.0,41.0,97.6,0.0
907,chandlercatanzaro/2550325,Catanzaro Chandler,K,2016,Arizona Cardinals,16,1.0,60.0,21.0,28.0,...,6.0,8.0,75.0,3.0,6.0,50.0,47.0,43.0,91.5,1.0


'Gould Robbie'

In [195]:
 
### Younger Team
top_qb = young_qbs_by_yds.iloc(0)[0]
top_rb = young_rbs_by_yds.iloc(0)[0]
top_wr1 = young_wrs_by_yds.iloc(0)[0]
top_wr2 = young_wrs_by_yds.iloc(0)[1]
top_db1 = young_dbs_by_pd.iloc(0)[0]
top_db2 = young_dbs_by_pd.iloc(0)[1]
top_lb1 = young_lbs_by_tackles.iloc(0)[0]
top_lb2 = young_lbs_by_tackles.iloc(0)[1]
top_dline1 = young_dline_by_sacks.iloc(0)[0]
top_dline2 = young_dline_by_sacks.iloc(0)[1]
top_kicker = young_kickers_by_made.iloc(0)[0]

youngins = {'QB' : top_qb, 'RB' : top_rb, 'WR1' : top_wr1, 'WR2' : top_wr2, 'DB1' : top_db1, 'DB2' : top_db2, 
           'LB1' : top_lb1, 'LB2' : top_lb2, 'DL1' : top_dline1, 'DL2' : top_dline2, 'K' : top_kicker}

### Older Team
top_qb = vet_qbs_by_yds.iloc(0)[0]
top_rb = vet_rbs_by_yds.iloc(0)[0]
top_wr1 = vet_wrs_by_yds.iloc(0)[0]
top_wr2 = vet_wrs_by_yds.iloc(0)[1]
top_db1 = vet_dbs_by_pd.iloc(0)[0]
top_db2 = vet_dbs_by_pd.iloc(0)[1]
top_lb1 = vet_lbs_by_tackles.iloc(0)[0]
top_lb2 = vet_lbs_by_tackles.iloc(0)[1]
top_dline1 = vet_dline_by_sacks.iloc(0)[0]
top_dline2 = vet_dline_by_sacks.iloc(0)[1]
top_kicker = vet_kickers_by_made.iloc(0)[0]

oldheads = {'QB' : top_qb, 'RB' : top_rb, 'WR1' : top_wr1, 'WR2' : top_wr2, 'DB1' : top_db1, 'DB2' : top_db2, 
           'LB1' : top_lb1, 'LB2' : top_lb2, 'DL1' : top_dline1, 'DL2' : top_dline2, 'K' : top_kicker}

display(youngins)
display(oldheads)

{'QB': Player Id                         jameiswinston/2552033
 Name                                     Winston Jameis
 Position                                             QB
 Year                                               2016
 Team                               Tampa Bay Buccaneers
 Games Played                                         16
 Passes Attempted                                    567
 Passes Completed                                    345
 Completion Percentage                              60.8
 Pass Attempts Per Game                             35.4
 Passing Yards                                      4090
 Passing Yards Per Attempt                           7.2
 Passing Yards Per Game                            255.6
 TD Passes                                            28
 Percentage of TDs per Attempts                      4.9
 Ints                                                 18
 Int Rate                                            3.2
 Longest Pass            

{'QB': Player Id                          drewbrees/2504775
 Name                                      Brees Drew
 Position                                          QB
 Year                                            2016
 Team                              New Orleans Saints
 Games Played                                      16
 Passes Attempted                                 673
 Passes Completed                                 471
 Completion Percentage                             70
 Pass Attempts Per Game                          42.1
 Passing Yards                                   5208
 Passing Yards Per Attempt                        7.7
 Passing Yards Per Game                         325.5
 TD Passes                                         37
 Percentage of TDs per Attempts                   5.5
 Ints                                              15
 Int Rate                                         2.2
 Longest Pass                                     NaN
 Passes Longer than 20

In [216]:
box_scores = pd.read_csv('box_scores.csv')
# box_scores = box_scores[box_scores['date'].str.contains('2016')]
box_scores = box_scores[['home_score', 'visitor_score', 'home_net_yards_rushing', 'visitor_net_yards_rushing', 
                        'home_net_yards_passing', 'visitor_net_yards_passing', 'home_field_goals', 'visitor_field_goals',
                       'home_sack_splits', 'visitor_sack_splits']]
box_scores = box_scores.replace('-', '0-0')
box_scores['home_sacks'] = pd.Series(0, index=box_scores.index)
box_scores['visitor_sacks'] = pd.Series(0, index=box_scores.index)
# box_scores['home_fg_made'] = pd.Series(0, index=box_scores.index)
# box_scores['visitor_fg_made'] = pd.Series(0, index=box_scores.index)
# box_scores['home_fg_att'] = pd.Series(0, index=box_scores.index)
# box_scores['visitor_fg_att'] = pd.Series(0, index=box_scores.index)
box_scores['home_fg%'] = pd.Series(0, index=box_scores.index, dtype='float')
box_scores['visitor_fg%'] = pd.Series(0, index=box_scores.index, dtype='float')
for index, row in box_scores.iterrows():
    home_sack_splits = row['home_sack_splits']
    away_sack_splits = row['visitor_sack_splits']
    home_field_goals = row['home_field_goals']
    away_field_goals = row['visitor_field_goals']
    home_sacks = int(home_sack_splits.split('-')[0])
    away_sacks = int(away_sack_splits.split('-')[0])
    home_fg_made = float(home_field_goals.split('-')[0])
    away_fg_made = float(away_field_goals.split('-')[0])
    home_fg_att = float(home_field_goals.split('-')[1])
    away_fg_att = float(away_field_goals.split('-')[1])
    
    box_scores.at[index, 'home_sacks'] = home_sacks
    box_scores.at[index, 'visitor_sacks'] = away_sacks
    # box_scores.at[index, 'home_fg_made'] = home_fg_made
    # box_scores.at[index, 'visitor_fg_made'] = away_fg_made
    # box_scores.at[index, 'home_fg_att'] = home_fg_att
    # box_scores.at[index, 'visitor_fg_att'] = away_fg_att
    if (home_fg_att != 0):
        box_scores.at[index, 'home_fg%'] = home_fg_made/home_fg_att
    if (away_fg_att != 0):
        box_scores.at[index, 'visitor_fg%'] = away_fg_made/away_fg_att

box_scores = box_scores.drop(columns=['home_field_goals', 'visitor_field_goals', 'home_sack_splits', 'visitor_sack_splits'])
for col in box_scores.columns:
    box_scores[col] = pd.to_numeric(box_scores[col], errors='coerce')
    
for index, row in box_scores.iterrows():
    home_score = row['home_score']
    away_score = row['visitor_score']
    box_scores.at[index, 'winner'] = 0 if home_score > away_score else 1
box_scores = box_scores.drop(columns=['home_score', 'visitor_score'])
display(box_scores)

Unnamed: 0,home_net_yards_rushing,visitor_net_yards_rushing,home_net_yards_passing,visitor_net_yards_passing,home_sacks,visitor_sacks,home_fg%,visitor_fg%,winner
0,127,191,376,198,3,3,1.000000,1.000000,0.0
1,145,64,275,242,5,3,1.000000,0.333333,0.0
2,207,80,191,175,1,3,1.000000,1.000000,0.0
3,72,185,246,170,5,0,0.666667,1.000000,1.0
4,94,79,329,301,3,0,0.500000,0.833333,1.0
5,212,25,190,133,2,2,1.000000,0.000000,0.0
6,86,193,341,167,2,1,1.000000,1.000000,1.0
7,123,139,445,333,1,0,1.000000,1.000000,0.0
8,115,131,206,241,0,3,1.000000,0.000000,0.0
9,67,162,178,243,4,4,0.500000,1.000000,1.0


In [217]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
labels = box_scores['winner']
features = box_scores.drop(columns=['winner'])
feature_list = box_scores.columns

train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.10, random_state = 42)
    
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)

rf = RandomForestClassifier(n_estimators = 1000, random_state = 42)
rf.fit(train_features, train_labels)


Training Features Shape: (3895, 8)
Training Labels Shape: (3895,)
Testing Features Shape: (433, 8)
Testing Labels Shape: (433,)


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=None,
            oob_score=False, random_state=42, verbose=0, warm_start=False)

In [218]:
predictions = rf.predict(test_features)
print(accuracy_score(test_labels, predictions))
print(box_scores.columns)

0.74364896073903
Index(['home_net_yards_rushing', 'visitor_net_yards_rushing',
       'home_net_yards_passing', 'visitor_net_yards_passing', 'home_sacks',
       'visitor_sacks', 'home_fg%', 'visitor_fg%', 'winner'],
      dtype='object')


In [220]:
def get_per_game(team, pos, category):
    pos1 = team[pos + '1']
    pos2 = team[pos + '2']
    pos1_cat_per_game = pos1[category]/pos1['Games Played']
    pos2_cat_per_game = pos2[category]/pos2['Games Played']
    return pos1_cat_per_game + pos2_cat_per_game

# Young -> Home, Old -> Away
home_rushing = youngins['RB']['Rushing Yards Per Game']
away_rushing = oldheads['RB']['Rushing Yards Per Game']
home_passing = youngins['QB']['Passing Yards Per Game']
away_passing = oldheads['QB']['Passing Yards Per Game']
home_kicking = youngins['K']['FG Percentage']
away_kicking = oldheads['K']['FG Percentage']
home_ints = get_per_game(youngins, 'DB', 'Ints')
away_ints = get_per_game(oldheads, 'DB', 'Ints')
home_sacks = get_per_game(youngins, 'DL', 'Sacks')
away_sacks = get_per_game(oldheads, 'DL', 'Sacks')

data = {'home_net_yards_rushing' : home_rushing, 'visitor_net_yards_rushing' : away_rushing, 'home_net_yards_passing' : home_passing,
       'visitor_net_yards_passing' : away_passing, 'home_sacks' : home_sacks, 'visitor_sacks' : away_sacks, 'home_fg%' : home_kicking,
       'visitor_fg%' : away_kicking}
game_df = pd.DataFrame(data=data, index=[0])
display(game_df)
predictions = rf.predict(game_df)
print(predictions)

Unnamed: 0,home_net_yards_rushing,visitor_net_yards_rushing,home_net_yards_passing,visitor_net_yards_passing,home_sacks,visitor_sacks,home_fg%,visitor_fg%
0,108.7,80.4,255.6,325.5,1.46875,1.4375,88.6,100.0


[0.]
