In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup, Comment
import requests
import time

Below looping over multiple NFL drafts and scraping the table on the webpage to get players' information and links to their personal pro, and college stats pages. 

In [2]:
draft_years = ['2018', '2017', '2016', '2015', '2014', '2013', '2012', '2011', '2010', '2009']
all_drafts = pd.DataFrame()

for year in draft_years:
    
    url = 'https://www.pro-football-reference.com/years/' + year + '/draft.htm'

    response = requests.get(url)
    html = response.text
    soup = BeautifulSoup(html, "lxml")

    table = soup.tbody.find_all('tr', {'class': False})
    draft = []
    for i in range(len(table)):
        draft_pick = {}
        draft_pick['draft_round'] = table[i].th.text
        draft_pick['draft_overall'] = table[i].find('td', {'data-stat': 'draft_pick'}).text
        draft_pick['team_nfl'] = table[i].find('td', {'data-stat': 'team'}).text
        draft_pick['player'] = table[i].find('td', {'data-stat': 'player'}).text
        draft_pick['position'] = table[i].find('td', {'data-stat': 'pos'}).text
        draft_pick['draft_age'] = table[i].find('td', {'data-stat': 'age'}).text
        draft_pick['draft_year'] = int(year)
        draft_pick['school'] = table[i].find('td', {'data-stat': 'college_id'}).text
        try:
            draft_pick['pro_stats_url'] = "https://www.pro-football-reference.com" + table[i].find('td', {'data-stat': 'player'}).a.attrs['href']
        except:
            pass
        try:
            draft_pick['college_stats_url'] = table[i].find('td', {'data-stat': 'college_link'}).a.attrs['href']
        except:
            pass
        draft.append(draft_pick)

    draft_year = pd.DataFrame(draft)
    
    all_drafts = pd.concat([all_drafts, draft_year], ignore_index=True, sort=False)


In [3]:
all_drafts

Unnamed: 0,college_stats_url,draft_age,draft_overall,draft_round,draft_year,player,position,pro_stats_url,school,team_nfl
0,http://www.sports-reference.com/cfb/players/ba...,23,1,1,2018,Baker Mayfield,QB,https://www.pro-football-reference.com/players...,Oklahoma,CLE
1,http://www.sports-reference.com/cfb/players/sa...,21,2,1,2018,Saquon Barkley,RB,https://www.pro-football-reference.com/players...,Penn St.,NYG
2,http://www.sports-reference.com/cfb/players/sa...,21,3,1,2018,Sam Darnold,QB,https://www.pro-football-reference.com/players...,USC,NYJ
3,http://www.sports-reference.com/cfb/players/de...,21,4,1,2018,Denzel Ward,CB,https://www.pro-football-reference.com/players...,Ohio St.,CLE
4,http://www.sports-reference.com/cfb/players/br...,22,5,1,2018,Bradley Chubb,DE,https://www.pro-football-reference.com/players...,North Carolina St.,DEN
5,http://www.sports-reference.com/cfb/players/qu...,22,6,1,2018,Quenton Nelson,G,https://www.pro-football-reference.com/players...,Notre Dame,IND
6,http://www.sports-reference.com/cfb/players/jo...,22,7,1,2018,Josh Allen,QB,https://www.pro-football-reference.com/players...,Wyoming,BUF
7,http://www.sports-reference.com/cfb/players/ro...,21,8,1,2018,Roquan Smith,ILB,https://www.pro-football-reference.com/players...,Georgia,CHI
8,http://www.sports-reference.com/cfb/players/mi...,24,9,1,2018,Mike McGlinchey,T,https://www.pro-football-reference.com/players...,Notre Dame,SFO
9,http://www.sports-reference.com/cfb/players/jo...,21,10,1,2018,Josh Rosen,QB,https://www.pro-football-reference.com/players...,UCLA,ARI


In [4]:
all_drafts['draft_year'].value_counts()

2015    256
2009    256
2018    256
2014    256
2010    255
2013    254
2011    254
2017    253
2016    253
2012    253
Name: draft_year, dtype: int64

In [5]:
all_drafts.to_csv('./data/college/all_drafts.csv')

In [6]:
all_drafts.isnull().sum()

college_stats_url    436
draft_age              0
draft_overall          0
draft_round            0
draft_year             0
player                 0
position               0
pro_stats_url         18
school                 0
team_nfl               0
dtype: int64

In [7]:
all_drafts[all_drafts['college_stats_url'].isnull()==True]['position'].value_counts()

DB     64
T      59
WR     43
G      38
LB     32
DT     32
DE     32
RB     23
OL     19
TE     19
C      17
QB     15
CB     15
S      10
OLB    10
K       2
P       2
NT      2
FB      1
DL      1
Name: position, dtype: int64

Below I looped over the dataframe I had just created above from scraping multiple draft tables on pro-football-reference.com and now was using their 'college_stats_url' from that table and the html embedded in that link to get each player's college stats.  This took me multiple iterations to get right, but when I finally got it working I looped through the tables on each individuals page and got all relevant statistics that were not redundant.  I left out a scoring table that had data contained by all other tables.  

I excluded certain positions (see 'relevant_pos' below) from my scraping for the sake of time, as positions such as Offensive Linemen, or Kickers did not have much or any college stats recorded, and therefore I would not be able to create a predictive model on those positions. 

In [17]:
relevant_pos = ['QB', 'RB', 'FB', 'WR', 'TE', 'CB', 'DE', 'ILB', 'S', 'DT', 'OLB', 'LB']
columns = ['player', 'year', 'school', 'conference', 'class', 'pos', 'games', 'pass_cmp', 'pass_att', 
           'pass_cmp_pct','pass_yds', 'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int', 
           'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td', 'rec', 'rec_yds','rec_yds_per_rec', 
           'rec_td', 'scrim_att', 'scrim_yds', 'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists', 
           'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds', 'def_int_yds_per_int', 'def_int_td', 
           'pass_defended', 'fumbles_rec', 'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced', 'punt_ret', 
           'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td', 'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 
           'kick_ret_td']

all_player_stats = pd.DataFrame(columns = columns)

for index, row in all_drafts.iterrows():
    time.sleep(1)
    try:
        if (pd.isnull(row['college_stats_url'])==False) & (row['position'] in relevant_pos):
            print(row['player'])

            url = row['college_stats_url']
            html = requests.get(url).text
            college_stats_soup = BeautifulSoup(html, 'lxml')
            college_tables_soup = college_stats_soup.find_all('div', {'class': 'table_wrapper'})
            player_df = pd.DataFrame()

            for i in range(len(college_tables_soup)):
                print(i)
                if college_tables_soup[i].find('h2').text in ['Passing', 'Receiving & Rushing', 'Rushing & Receiving',
                                                              'Defense & Fumbles', 'Punt & Kick Returns']:

                    if i == 0:
                        ## regular scraping
                        table = college_tables_soup[i].tbody.find_all('tr')
                        cat_stats = []
                        if college_tables_soup[i].find('h2').text == 'Passing':
                            for trow in range(len(table)):
                                year_stats = {}
                                year_stats['player'] = college_stats_soup.body.find('h1', {'itemprop': 'name'}).text
                                year_stats['year'] = table[trow].find('th', {'data-stat': 'year_id'}).text.replace('*', '')                    
                                year_stats['school'] = table[trow].find('td', {'data-stat': 'school_name'}).text
                                year_stats['conference'] = table[trow].find('td', {'data-stat': 'conf_abbr'}).text
                                year_stats['class'] = table[trow].find('td', {'data-stat': 'class'}).text
                                year_stats['pos'] = table[trow].find('td', {'data-stat': 'pos'}).text
                                year_stats['games'] = table[trow].find('td', {'data-stat': 'g'}).text
                                year_stats['pass_cmp'] = table[trow].find('td', {'data-stat': 'pass_cmp'}).text
                                year_stats['pass_att'] = table[trow].find('td', {'data-stat': 'pass_att'}).text
                                year_stats['pass_cmp_pct'] = table[trow].find('td', {'data-stat': 'pass_cmp_pct'}).text
                                year_stats['pass_yds'] = table[trow].find('td', {'data-stat': 'pass_yds'}).text
                                year_stats['pass_yds_per_att'] = table[trow].find('td', {'data-stat': 'pass_yds_per_att'}).text
                                year_stats['adj_pass_yds_per_att'] = table[trow].find('td', {'data-stat': 'adj_pass_yds_per_att'}).text
                                year_stats['pass_td'] = table[trow].find('td', {'data-stat': 'pass_td'}).text
                                year_stats['pass_int'] = table[trow].find('td', {'data-stat': 'pass_int'}).text
                                year_stats['pass_rating'] = table[trow].find('td', {'data-stat': 'pass_rating'}).text
                                cat_stats.append(year_stats)                    

                        elif (college_tables_soup[i].find('h2').text == 'Receiving & Rushing') | (college_tables_soup[i].find('h2').text == 'Rushing & Receiving'):
                            for trow in range(len(table)):
                                year_stats = {}
                                year_stats['player'] = college_stats_soup.body.find('h1', {'itemprop': 'name'}).text                    
                                year_stats['year'] = table[trow].find('th', {'data-stat': 'year_id'}).text.replace('*', '')
                                year_stats['school'] = table[trow].find('td', {'data-stat': 'school_name'}).text
                                year_stats['conference'] = table[trow].find('td', {'data-stat': 'conf_abbr'}).text
                                year_stats['class'] = table[trow].find('td', {'data-stat': 'class'}).text
                                year_stats['pos'] = table[trow].find('td', {'data-stat': 'pos'}).text
                                year_stats['games'] = table[trow].find('td', {'data-stat': 'g'}).text
                                year_stats['rush_att'] = table[trow].find('td', {'data-stat': 'rush_att'}).text
                                year_stats['rush_yds'] = table[trow].find('td', {'data-stat': 'rush_yds'}).text
                                year_stats['rush_yds_per_att'] = table[trow].find('td', {'data-stat': 'rush_yds_per_att'}).text
                                year_stats['rush_td'] = table[trow].find('td', {'data-stat': 'rush_td'}).text
                                year_stats['rec'] = table[trow].find('td', {'data-stat': 'rec'}).text
                                year_stats['rec_yds'] = table[trow].find('td', {'data-stat': 'rec_yds'}).text
                                year_stats['rec_yds_per_rec'] = table[trow].find('td', {'data-stat': 'rec_yds_per_rec'}).text
                                year_stats['rec_td'] = table[trow].find('td', {'data-stat': 'rec_td'}).text
                                year_stats['scrim_att'] = table[trow].find('td', {'data-stat': 'scrim_att'}).text
                                year_stats['scrim_yds'] = table[trow].find('td', {'data-stat': 'scrim_yds'}).text
                                year_stats['scrim_yds_per_att'] = table[trow].find('td', {'data-stat': 'scrim_yds_per_att'}).text
                                year_stats['scrim_td'] = table[trow].find('td', {'data-stat': 'scrim_td'}).text
                                cat_stats.append(year_stats) 

                        elif college_tables_soup[i].find('h2').text == 'Defense & Fumbles':
                            for trow in range(len(table)):
                                year_stats = {}
                                year_stats['player'] = college_stats_soup.body.find('h1', {'itemprop': 'name'}).text
                                year_stats['year'] = table[trow].find('th', {'data-stat': 'year_id'}).text.replace('*', '')
                                year_stats['school'] = table[trow].find('td', {'data-stat': 'school_name'}).text
                                year_stats['conference'] = table[trow].find('td', {'data-stat': 'conf_abbr'}).text
                                year_stats['class'] = table[trow].find('td', {'data-stat': 'class'}).text
                                year_stats['pos'] = table[trow].find('td', {'data-stat': 'pos'}).text
                                year_stats['games'] = table[trow].find('td', {'data-stat': 'g'}).text
                                year_stats['tackles_solo'] = table[trow].find('td', {'data-stat': 'tackles_solo'}).text
                                year_stats['tackles_assists'] = table[trow].find('td', {'data-stat': 'tackles_assists'}).text
                                year_stats['tackles_total'] = table[trow].find('td', {'data-stat': 'tackles_total'}).text
                                year_stats['tackles_loss'] = table[trow].find('td', {'data-stat': 'tackles_loss'}).text
                                year_stats['sacks'] = table[trow].find('td', {'data-stat': 'sacks'}).text
                                year_stats['def_int'] = table[trow].find('td', {'data-stat': 'def_int'}).text
                                year_stats['def_int_yds'] = table[trow].find('td', {'data-stat': 'def_int_yds'}).text
                                year_stats['def_int_yds_per_int'] = table[trow].find('td', {'data-stat': 'def_int_yds_per_int'}).text
                                year_stats['def_int_td'] = table[trow].find('td', {'data-stat': 'def_int_td'}).text
                                year_stats['pass_defended'] = table[trow].find('td', {'data-stat': 'pass_defended'}).text
                                year_stats['fumbles_rec'] = table[trow].find('td', {'data-stat': 'fumbles_rec'}).text
                                year_stats['fumbles_rec_yds'] = table[trow].find('td', {'data-stat': 'fumbles_rec_yds'}).text
                                year_stats['fumbles_rec_td'] = table[trow].find('td', {'data-stat': 'fumbles_rec_td'}).text
                                year_stats['fumbles_forced'] = table[trow].find('td', {'data-stat': 'fumbles_forced'}).text
                                cat_stats.append(year_stats)

                        elif college_tables_soup[i].find('h2').text == 'Punt & Kick Returns':
                            for trow in range(len(table)):
                                year_stats = {}
                                year_stats['player'] = college_stats_soup.body.find('h1', {'itemprop': 'name'}).text
                                year_stats['year'] = table[trow].find('th', {'data-stat': 'year_id'}).text.replace('*', '')
                                year_stats['school'] = table[trow].find('td', {'data-stat': 'school_name'}).text
                                year_stats['conference'] = table[trow].find('td', {'data-stat': 'conf_abbr'}).text
                                year_stats['class'] = table[trow].find('td', {'data-stat': 'class'}).text
                                year_stats['pos'] = table[trow].find('td', {'data-stat': 'pos'}).text
                                year_stats['games'] = table[trow].find('td', {'data-stat': 'g'}).text
                                year_stats['punt_ret'] = table[trow].find('td', {'data-stat': 'punt_ret'}).text
                                year_stats['punt_ret_yds'] = table[trow].find('td', {'data-stat': 'punt_ret_yds'}).text
                                year_stats['punt_ret_yds_per_ret'] = table[trow].find('td', {'data-stat': 'punt_ret_yds_per_ret'}).text
                                year_stats['punt_ret_td'] = table[trow].find('td', {'data-stat': 'punt_ret_td'}).text
                                year_stats['kick_ret'] = table[trow].find('td', {'data-stat': 'kick_ret'}).text
                                year_stats['kick_ret_yds'] = table[trow].find('td', {'data-stat': 'kick_ret_yds'}).text
                                year_stats['kick_ret_yds_per_ret'] = table[trow].find('td', {'data-stat': 'kick_ret_yds_per_ret'}).text
                                year_stats['kick_ret_td'] = table[trow].find('td', {'data-stat': 'kick_ret_td'}).text
                                cat_stats.append(year_stats)

                        primary_df = pd.DataFrame(cat_stats)

                    else:
                        ## scraping with comment extraction
                        comment = college_tables_soup[i].find_all(string=lambda text:isinstance(text, Comment))[0]
                        comment_soup = BeautifulSoup(comment, 'lxml')
                        table = comment_soup.tbody.find_all('tr')
                        cat_stats = []
                        if college_tables_soup[i].find('h2').text == 'Passing':
                            for trow in range(len(table)):
                                year_stats = {}
                                year_stats['player'] = college_stats_soup.body.find('h1', {'itemprop': 'name'}).text
                                year_stats['year'] = table[trow].find('th', {'data-stat': 'year_id'}).text.replace('*', '')                    
                                year_stats['pass_cmp'] = table[trow].find('td', {'data-stat': 'pass_cmp'}).text
                                year_stats['pass_att'] = table[trow].find('td', {'data-stat': 'pass_att'}).text
                                year_stats['pass_cmp_pct'] = table[trow].find('td', {'data-stat': 'pass_cmp_pct'}).text
                                year_stats['pass_yds'] = table[trow].find('td', {'data-stat': 'pass_yds'}).text
                                year_stats['pass_yds_per_att'] = table[trow].find('td', {'data-stat': 'pass_yds_per_att'}).text
                                year_stats['adj_pass_yds_per_att'] = table[trow].find('td', {'data-stat': 'adj_pass_yds_per_att'}).text
                                year_stats['pass_td'] = table[trow].find('td', {'data-stat': 'pass_td'}).text
                                year_stats['pass_int'] = table[trow].find('td', {'data-stat': 'pass_int'}).text
                                year_stats['pass_rating'] = table[trow].find('td', {'data-stat': 'pass_rating'}).text
                                cat_stats.append(year_stats)

                        elif (college_tables_soup[i].find('h2').text == 'Receiving & Rushing') | (college_tables_soup[i].find('h2').text == 'Rushing & Receiving'):
                            for trow in range(len(table)):
                                year_stats = {}
                                year_stats['player'] = college_stats_soup.body.find('h1', {'itemprop': 'name'}).text                    
                                year_stats['year'] = table[trow].find('th', {'data-stat': 'year_id'}).text.replace('*', '')
                                year_stats['rush_att'] = table[trow].find('td', {'data-stat': 'rush_att'}).text
                                year_stats['rush_yds'] = table[trow].find('td', {'data-stat': 'rush_yds'}).text
                                year_stats['rush_yds_per_att'] = table[trow].find('td', {'data-stat': 'rush_yds_per_att'}).text
                                year_stats['rush_td'] = table[trow].find('td', {'data-stat': 'rush_td'}).text
                                year_stats['rec'] = table[trow].find('td', {'data-stat': 'rec'}).text
                                year_stats['rec_yds'] = table[trow].find('td', {'data-stat': 'rec_yds'}).text
                                year_stats['rec_yds_per_rec'] = table[trow].find('td', {'data-stat': 'rec_yds_per_rec'}).text
                                year_stats['rec_td'] = table[trow].find('td', {'data-stat': 'rec_td'}).text
                                year_stats['scrim_att'] = table[trow].find('td', {'data-stat': 'scrim_att'}).text
                                year_stats['scrim_yds'] = table[trow].find('td', {'data-stat': 'scrim_yds'}).text
                                year_stats['scrim_yds_per_att'] = table[trow].find('td', {'data-stat': 'scrim_yds_per_att'}).text
                                year_stats['scrim_td'] = table[trow].find('td', {'data-stat': 'scrim_td'}).text
                                cat_stats.append(year_stats)

                        elif college_tables_soup[i].find('h2').text == 'Defense & Fumbles':
                            for trow in range(len(table)):
                                year_stats = {}
                                year_stats['player'] = college_stats_soup.body.find('h1', {'itemprop': 'name'}).text
                                year_stats['year'] = table[trow].find('th', {'data-stat': 'year_id'}).text.replace('*', '')
                                year_stats['tackles_solo'] = table[trow].find('td', {'data-stat': 'tackles_solo'}).text
                                year_stats['tackles_assists'] = table[trow].find('td', {'data-stat': 'tackles_assists'}).text
                                year_stats['tackles_total'] = table[trow].find('td', {'data-stat': 'tackles_total'}).text
                                year_stats['tackles_loss'] = table[trow].find('td', {'data-stat': 'tackles_loss'}).text
                                year_stats['sacks'] = table[trow].find('td', {'data-stat': 'sacks'}).text
                                year_stats['def_int'] = table[trow].find('td', {'data-stat': 'def_int'}).text
                                year_stats['def_int_yds'] = table[trow].find('td', {'data-stat': 'def_int_yds'}).text
                                year_stats['def_int_yds_per_int'] = table[trow].find('td', {'data-stat': 'def_int_yds_per_int'}).text
                                year_stats['def_int_td'] = table[trow].find('td', {'data-stat': 'def_int_td'}).text
                                year_stats['pass_defended'] = table[trow].find('td', {'data-stat': 'pass_defended'}).text
                                year_stats['fumbles_rec'] = table[trow].find('td', {'data-stat': 'fumbles_rec'}).text
                                year_stats['fumbles_rec_yds'] = table[trow].find('td', {'data-stat': 'fumbles_rec_yds'}).text
                                year_stats['fumbles_rec_td'] = table[trow].find('td', {'data-stat': 'fumbles_rec_td'}).text
                                year_stats['fumbles_forced'] = table[trow].find('td', {'data-stat': 'fumbles_forced'}).text
                                cat_stats.append(year_stats)

                        elif college_tables_soup[i].find('h2').text == 'Punt & Kick Returns':
                            for trow in range(len(table)):
                                year_stats = {}
                                year_stats['player'] = college_stats_soup.body.find('h1', {'itemprop': 'name'}).text
                                year_stats['year'] = table[trow].find('th', {'data-stat': 'year_id'}).text.replace('*', '')
                                year_stats['punt_ret'] = table[trow].find('td', {'data-stat': 'punt_ret'}).text
                                year_stats['punt_ret_yds'] = table[trow].find('td', {'data-stat': 'punt_ret_yds'}).text
                                year_stats['punt_ret_yds_per_ret'] = table[trow].find('td', {'data-stat': 'punt_ret_yds_per_ret'}).text
                                year_stats['punt_ret_td'] = table[trow].find('td', {'data-stat': 'punt_ret_td'}).text
                                year_stats['kick_ret'] = table[trow].find('td', {'data-stat': 'kick_ret'}).text
                                year_stats['kick_ret_yds'] = table[trow].find('td', {'data-stat': 'kick_ret_yds'}).text
                                year_stats['kick_ret_yds_per_ret'] = table[trow].find('td', {'data-stat': 'kick_ret_yds_per_ret'}).text
                                year_stats['kick_ret_td'] = table[trow].find('td', {'data-stat': 'kick_ret_td'}).text
                                cat_stats.append(year_stats)

                        temp_df = pd.DataFrame(cat_stats)
                        player_df = pd.concat([player_df, temp_df.drop(columns=['player', 'year'])], axis=1)

            player_df = pd.concat([primary_df, player_df], axis=1)

            all_player_stats = pd.concat([all_player_stats, player_df], ignore_index=True, sort=False)


        else:
            print(row['player'], row['position'], 'data not mapped')
    except:
        pass

Baker Mayfield
0
1
2
3
Saquon Barkley
0
1
2
3
4
Sam Darnold
0
1
2
3
Denzel Ward
0
1
Bradley Chubb
0
Quenton Nelson G data not mapped
Josh Allen
0
1
2
3
Roquan Smith
0
Mike McGlinchey T data not mapped
Josh Rosen
0
1
2
3
4
Minkah Fitzpatrick
0
1
2
Vita Vea
0
1
Daron Payne
0
1
2
Marcus Davenport
0
1
Kolton Miller T data not mapped
Tremaine Edmunds
0
Derwin James
0
1
2
Jaire Alexander
0
1
2
Leighton Vander Esch
0
1
Frank Ragnow C data not mapped
Billy Price C data not mapped
Rashaan Evans
0
Isaiah Wynn T data not mapped
D.J. Moore
0
1
2
3
4
5
Hayden Hurst
0
1
2
3
Calvin Ridley
0
1
2
3
Rashaad Penny
0
1
2
3
Terrell Edmunds
0
1
2
Taven Bryan
0
1
Mike Hughes
0
1
2
Sony Michel
0
1
2
3
Lamar Jackson
0
1
2
3
Austin Corbett C data not mapped
Will Hernandez G data not mapped
Nick Chubb
0
1
Darius Leonard LB data not mapped
Braden Smith G data not mapped
Ronald Jones
0
1
2
James Daniels C data not mapped
Courtland Sutton
0
1
2
3
4
Harold Landry
0
Mike Gesicki
0
1
2
Kerryon Johnson
0
1
2
3
4
Dante 

Dorian Johnson G data not mapped
Carl Lawson
0
Josh Reynolds
0
1
2
Mack Hollins
0
1
2
Tarik Cohen RB data not mapped
Ben Gedeon
0
1
2
3
Joe Williams
0
1
Nico Siragusa G data not mapped
Montae Nicholson
0
1
Jalen Reeves-Maybin
0
Samson Ebukam OLB data not mapped
Howard Wilson
0
1
Michael Roberts
0
1
2
3
Josh Malone
0
1
David Sharpe T data not mapped
Julie'n Davenport T data not mapped
Deatrich Wise
0
Donnel Pumphrey
0
1
2
3
4
Ryan Switzer
0
1
2
3
4
Jamaal Williams
0
1
2
Joshua Dobbs
0
1
2
Sean Harlow G data not mapped
Zach Banner T data not mapped
Ryan Glasgow
0
Jehu Chesson
0
1
2
3
Wayne Gallman
0
1
Chad Hansen
0
1
2
Carlos Watkins
0
1
Marlon Mack
0
1
2
Grover Stewart DT data not mapped
Jake Butt
0
1
2
George Kittle
0
1
2
3
Jordan Morgan G data not mapped
Blair Brown
0
Damontae Kazee
0
1
2
Jordan Leggett
0
1
2
3
Desmond King
0
1
2
Corn Elder
0
1
2
Jake Elliott K data not mapped
Jeremy Sprinkle
0
1
2
Jayon Brown
0
1
Brian Hill
0
1
2
3
Will Holden T data not mapped
Nate Hairston
0
1
2
Je

Rico Gathers TE data not mapped
Kevon Seymour
0
1
Will Parks S data not mapped
Travis Feeney
0
1
Ted Karras G data not mapped
Aaron Wallace
0
Brandon Doughty
0
1
2
Donavon Clark G data not mapped
Devin Lucien
0
1
2
3
4
Jonathan Woodard DE data not mapped
Stephen Weatherly
0
1
2
Riley Dixon P data not mapped
Demarcus Ayers
0
1
2
3
4
Daniel Braverman
0
1
2
3
4
Thomas Duarte
0
1
2
Steven Daniels
0
Jalen Mills
0
Vadal Alexander G data not mapped
Lac Edwards P data not mapped
Dwayne Washington
0
1
2
3
Daniel Lasco
0
1
2
3
Devin Fuller
0
1
2
3
4
Trevor Bates LB data not mapped
Alex McCalister
0
1
Charone Peake
0
1
2
Keith Marshall
0
1
2
3
Kenny Lawler
0
1
2
Jayron Kearse
0
Clayton Fejedelem
0
1
Tyler Matakevich
0
Zac Brooks
0
1
2
Austin Blythe C data not mapped
Prince Charles Iworah
0
Scooby Wright
0
Joe Walker
0
1
Beau Sandland TE data not mapped
Kalan Reed
0
1
Jameis Winston
0
1
2
3
Marcus Mariota
0
1
2
3
Dante Fowler
0
Amari Cooper
0
1
2
Brandon Scherff T data not mapped
Leonard Williams


0
Jace Amaro
0
1
2
Jeremiah Attaochu
0
Ego Ferguson
0
Troy Niklas
0
1
2
Davante Adams
0
1
2
3
Bishop Sankey
0
1
2
3
Jeremy Hill
0
1
2
Cody Latimer
0
1
2
Carlos Hyde
0
1
2
3
Stanley Jean-Baptiste DB data not mapped
Jack Mewhort T data not mapped
Kony Ealy
0
1
Allen Robinson
0
1
2
Jimmy Garoppolo QB data not mapped
Jarvis Landry
0
1
2
3
Justin Britt T data not mapped
C.J. Fiedorowicz
0
1
2
3
Morgan Moses OL data not mapped
Billy Turner T data not mapped
Dezmen Southward DB data not mapped
Charles Sims
0
1
2
3
4
Marcus Martin C data not mapped
Christian Kirksey
0
1
Scott Crichton
0
Preston Brown
0
1
2
Jay Bromley
0
Tre Mason
0
1
2
3
Travis Swanson C data not mapped
Chris Borland
0
1
2
3
4
Spencer Long G data not mapped
Terrence Brooks DB data not mapped
Dexter McDougle DB data not mapped
Gabe Jackson G data not mapped
Will Sutton
0
Louis Nix
0
Kareem Martin
0
Khyri Thornton
0
Josh Huff
0
1
2
3
Phillip Gaines DB data not mapped
Will Clarke
0
Chris Watt G data not mapped
Donte Moncrief
0
1


Ryan Nassib
0
1
2
Shamarko Thomas DB data not mapped
Tyler Wilson
0
1
2
3
Barrett Jones G data not mapped
B.W. Webb DB data not mapped
Landry Jones
0
1
2
Earl Watford G data not mapped
Khaseem Greene
0
1
Sean Porter
0
Phillip Thomas DB data not mapped
Gerald Hodges
0
1
Khaled Holmes C data not mapped
J.C. Tretter T data not mapped
Chris Harper
0
1
2
3
Trevardo Williams
0
Johnathan Franklin
0
1
2
William Gholston
0
Malliciah Goodman
0
Quinton Patton
0
1
2
3
John Simon
0
1
Kyle Juszczyk RB data not mapped
Marcus Lattimore
0
1
2
Devin Taylor
0
1
2
Levine Toilolo
0
1
2
Sanders Commings DB data not mapped
Denard Robinson
0
1
2
3
4
Earl Wolff DB data not mapped
Jesse Williams
0
Tharold Simon DB data not mapped
Montori Hughes DT data not mapped
Stepfan Taylor
0
1
2
3
Oday Aboushi T data not mapped
Lavar Edwards
0
1
Jonathan Meeks DB data not mapped
Kenny Stills
0
1
2
3
Steve Williams DB data not mapped
Quanterus Smith DE data not mapped
Steven Means
0
1
A.J. Klein
0
1
Brandon McGee DB data no

Trent Robinson DB data not mapped
Winston Guy DB data not mapped
Cyrus Gray
0
1
2
3
4
B.J. Cunningham
0
1
2
Isaiah Frey DB data not mapped
Ryan Lindley
0
1
2
3
James Hanna
0
1
2
3
Josh Bush DB data not mapped
Danny Trevathan
0
1
Christo Bilukidi DE data not mapped
Markelle Martin DB data not mapped
Dan Herron
0
1
2
3
Charles Mitchell DB data not mapped
Tom Compton T data not mapped
Marvin McNutt
0
1
2
3
4
Nick Mondek OL data not mapped
Jonte Green DB data not mapped
Nate Ebner DB data not mapped
Tommy Streeter
0
1
2
Jason Slowey OL data not mapped
Brandon Washington G data not mapped
Matt McCants T data not mapped
Terrance Ganaway
0
1
2
3
Robert Griffin OL data not mapped
Emmanuel Acho
0
1
Billy Winn
0
LaVon Brazill
0
1
2
3
4
Brad Nortman P data not mapped
Justin Anderson OL data not mapped
Aaron Brown
0
1
Audie Cole
0
1
Scott Solomon
0
Michael Smith
0
1
2
Richard Crawford DB data not mapped
Tim Fugger
0
Kheeston Randall
0
D.J. Campbell DB data not mapped
Jordan Bernstine DB data not m

Jay Finley
0
1
2
Jeremy Beal DL data not mapped
Eric Hagg DB data not mapped
DeMarco Sampson
0
1
2
3
Curtis Holcomb DB data not mapped
Tommie Campbell DB data not mapped
Bill Nagy G data not mapped
Chris Neild
0
Cheta Ozougwu
0
Sam Bradford
0
1
2
3
Ndamukong Suh
0
1
2
Gerald McCoy
0
Trent Williams T data not mapped
Eric Berry DB data not mapped
Russell Okung T data not mapped
Joe Haden DB data not mapped
Rolando McClain
0
1
C.J. Spiller
0
1
2
3
4
Tyson Alualu
0
1
Anthony Davis T data not mapped
Ryan Mathews
0
1
2
3
Brandon Graham
0
1
2
Earl Thomas DB data not mapped
Jason Pierre-Paul
0
1
Derrick Morgan
0
1
Mike Iupati G data not mapped
Maurkice Pouncey C data not mapped
Sean Weatherspoon
0
1
2
Kareem Jackson DB data not mapped
Jermaine Gresham
0
1
2
Demaryius Thomas
0
1
2
Bryan Bulaga T data not mapped
Dez Bryant
0
1
2
Tim Tebow
0
1
2
3
Dan Williams
0
Devin McCourty DB data not mapped
Jared Odrick
0
1
Kyle Wilson DB data not mapped
Jahvid Best
0
1
2
3
Jerry Hughes
0
1
2
Patrick Robinso

Will Beatty T data not mapped
Sean Smith DB data not mapped
Sen'Derrick Marks DT data not mapped
Cody Brown LB data not mapped
Richard Quinn
0
1
2
3
Shonn Greene
0
1
2
3
Bradley Fletcher DB data not mapped
Alex Magee
0
1
Jarron Gilbert
0
Jason Williams LB data not mapped
Michael Johnson
0
1
Matt Shaughnessy
0
Terrance Knighton
0
1
Derek Cox DB data not mapped
Glen Coffee
0
1
2
3
Robert Brewster T data not mapped
DeAndre Levy
0
Antoine Caldwell G data not mapped
Louis Vasquez G data not mapped
Kraig Urbik T data not mapped
Kevin Barnes DB data not mapped
Roy Miller
0
Derrick Williams
0
1
2
3
4
Brandon Tate
0
1
2
3
4
Mike Wallace
0
1
2
3
Ramses Barden WR data not mapped
Asher Allen DB data not mapped
Patrick Turner
0
1
2
3
Lardarius Webb DB data not mapped
Jared Cook
0
1
2
Chris Owens DB data not mapped
Deon Butler
0
1
2
Jerraud Powers DB data not mapped
Corvey Irvin
0
Ryan Mouton DB data not mapped
Rashad Johnson DB data not mapped
Keenan Lewis DB data not mapped
Tyrone McKenzie
0
Chase

In [18]:
all_player_stats

Unnamed: 0,player,year,school,conference,class,pos,games,pass_cmp,pass_att,pass_cmp_pct,...,fumbles_rec_td,fumbles_forced,punt_ret,punt_ret_yds,punt_ret_yds_per_ret,punt_ret_td,kick_ret,kick_ret_yds,kick_ret_yds_per_ret,kick_ret_td
0,Baker Mayfield,2013,Texas Tech,Big 12,FR,QB,8,218,340,64.1,...,,0,,,,,,,,
1,Baker Mayfield,2015,Oklahoma,Big 12,SO,QB,13,269,395,68.1,...,,0,,,,,,,,
2,Baker Mayfield,2016,Oklahoma,Big 12,JR,QB,13,254,358,70.9,...,,0,,,,,,,,
3,Baker Mayfield,2017,Oklahoma,Big 12,SR,QB,14,285,404,70.5,...,,0,,,,,,,,
4,Saquon Barkley,2015,Penn State,Big Ten,FR,RB,11,0,0,,...,,0,,,,,,,,
5,Saquon Barkley,2016,Penn State,Big Ten,SO,RB,14,0,0,,...,,0,,,,,,,,
6,Saquon Barkley,2017,Penn State,Big Ten,JR,RB,13,2,2,100.0,...,,0,,,,,,,,
7,Sam Darnold,2015,USC,Pac-12,FR,QB,,,,,...,,,,,,,,,,
8,Sam Darnold,2016,USC,Pac-12,FR,QB,13,246,366,67.2,...,,0,,,,,,,,
9,Sam Darnold,2017,USC,Pac-12,SO,QB,14,303,480,63.1,...,,0,,,,,,,,


In [10]:
all_player_stats.to_csv('./data/college/all_player_stats.csv')

In [19]:
len(all_player_stats.columns)

50

In [20]:
len(['player', 'year', 'school', 'conference', 'class', 'pos', 'games', 'pass_cmp', 'pass_att', 
           'pass_cmp_pct','pass_yds', 'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int', 
           'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td', 'rec', 'rec_yds','rec_yds_per_rec', 
           'rec_td', 'scrim_att', 'scrim_yds', 'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists', 
           'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds', 'def_int_yds_per_int', 'def_int_td', 
           'pass_defended', 'fumbles_rec', 'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced', 'punt_ret', 
           'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td', 'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 
           'kick_ret_td'])

50

In [21]:
all_player_stats[['player', 'year', 'school', 'conference', 'class', 'pos', 'games', 'pass_cmp', 'pass_att', 
           'pass_cmp_pct','pass_yds', 'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int', 
           'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td', 'rec', 'rec_yds','rec_yds_per_rec', 
           'rec_td', 'scrim_att', 'scrim_yds', 'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists', 
           'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds', 'def_int_yds_per_int', 'def_int_td', 
           'pass_defended', 'fumbles_rec', 'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced', 'punt_ret', 
           'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td', 'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 
           'kick_ret_td']]

Unnamed: 0,player,year,school,conference,class,pos,games,pass_cmp,pass_att,pass_cmp_pct,...,fumbles_rec_td,fumbles_forced,punt_ret,punt_ret_yds,punt_ret_yds_per_ret,punt_ret_td,kick_ret,kick_ret_yds,kick_ret_yds_per_ret,kick_ret_td
0,Baker Mayfield,2013,Texas Tech,Big 12,FR,QB,8,218,340,64.1,...,,0,,,,,,,,
1,Baker Mayfield,2015,Oklahoma,Big 12,SO,QB,13,269,395,68.1,...,,0,,,,,,,,
2,Baker Mayfield,2016,Oklahoma,Big 12,JR,QB,13,254,358,70.9,...,,0,,,,,,,,
3,Baker Mayfield,2017,Oklahoma,Big 12,SR,QB,14,285,404,70.5,...,,0,,,,,,,,
4,Saquon Barkley,2015,Penn State,Big Ten,FR,RB,11,0,0,,...,,0,,,,,,,,
5,Saquon Barkley,2016,Penn State,Big Ten,SO,RB,14,0,0,,...,,0,,,,,,,,
6,Saquon Barkley,2017,Penn State,Big Ten,JR,RB,13,2,2,100.0,...,,0,,,,,,,,
7,Sam Darnold,2015,USC,Pac-12,FR,QB,,,,,...,,,,,,,,,,
8,Sam Darnold,2016,USC,Pac-12,FR,QB,13,246,366,67.2,...,,0,,,,,,,,
9,Sam Darnold,2017,USC,Pac-12,SO,QB,14,303,480,63.1,...,,0,,,,,,,,


In [22]:
all_player_stats.isnull().sum()

player                     0
year                       0
school                     0
conference                 0
class                      0
pos                        0
games                      0
pass_cmp                4329
pass_att                4329
pass_cmp_pct            4329
pass_yds                4329
pass_yds_per_att        4329
adj_pass_yds_per_att    4329
pass_td                 4329
pass_int                4329
pass_rating             4329
rush_att                2585
rush_yds                2585
rush_yds_per_att        2585
rush_td                 2585
rec                     2585
rec_yds                 2585
rec_yds_per_rec         2585
rec_td                  2585
scrim_att               2585
scrim_yds               2585
scrim_yds_per_att       2585
scrim_td                2585
tackles_solo             263
tackles_assists          263
tackles_total            263
tackles_loss             263
sacks                    263
def_int                  263
def_int_yds   