In [101]:
# import needed libraries
from urllib.request import urlopen
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import requests
 

In [102]:
url = 'https://www.pro-football-reference.com'
year = 2022

r = requests.get(url + '/draft/' + str(year) + '-combine.htm')
soup = BeautifulSoup(r.content, 'html.parser')
parsed_table = soup.find_all('table')[0] 

In [120]:
def scrape_combine_data(years):
    
    dataframe = pd.DataFrame()
    
    # loop through each year
    for y in years:
        
        # season to scrape
        year = y
        
        # URL to scrape, notice f string:
        url = f"https://www.pro-football-reference.com/draft/{year}-combine.htm"
        
        # collect HTML data
        html = urlopen(url)
        
        # create beautiful soup object from HTML
        soup = BeautifulSoup(html, features="lxml")
        
        # use getText()to extract the headers into a list
        headers = [th.getText() for th in soup.findAll('tr')[0].findAll('th')]
        headers.append('Year')

        
        # next, grab all data from rows 
        rows = soup.findAll('tr')[1:]
        rows_data = [[td.getText() for td in rows[i].findAll('th')] # add player column
                     + [td.getText() for td in rows[i].findAll('td')] # add all other columns
                     + [year] # add year to 'Year' for each row
                        for i in range(len(rows))] 

        
        # create a dataframe with all aquired info
        dataframe = pd.concat([dataframe, pd.DataFrame(rows_data, columns = headers)], axis=0)
        
        # status
        print('Added to dataframe:', year)
        
    return dataframe
        

In [121]:
years = np.arange(2000,2023)
years

array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021,
       2022])

In [129]:
df = scrape_combine_data(years)

Added to dataframe: 2000
Added to dataframe: 2001
Added to dataframe: 2002
Added to dataframe: 2003
Added to dataframe: 2004
Added to dataframe: 2005
Added to dataframe: 2006
Added to dataframe: 2007
Added to dataframe: 2008
Added to dataframe: 2009
Added to dataframe: 2010
Added to dataframe: 2011
Added to dataframe: 2012
Added to dataframe: 2013
Added to dataframe: 2014
Added to dataframe: 2015
Added to dataframe: 2016
Added to dataframe: 2017
Added to dataframe: 2018
Added to dataframe: 2019
Added to dataframe: 2020
Added to dataframe: 2021
Added to dataframe: 2022
Added to dataframe: 2000
Added to dataframe: 2001
Added to dataframe: 2002
Added to dataframe: 2003


KeyboardInterrupt: 

In [123]:
df.reset_index()

Unnamed: 0,index,Player,Pos,School,College,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted (tm/rnd/yr),Year
0,0,John Abraham,OLB,South Carolina,,6-4,252,4.55,,,,,,New York Jets / 1st / 13th pick / 2000,2000
1,1,Shaun Alexander,RB,Alabama,College Stats,6-0,218,4.58,,,,,,Seattle Seahawks / 1st / 19th pick / 2000,2000
2,2,Darnell Alford,OT,Boston Col.,,6-4,334,5.56,25.0,23,94,8.48,4.98,Kansas City Chiefs / 6th / 188th pick / 2000,2000
3,3,Kyle Allamon,TE,Texas Tech,,6-2,253,4.97,29.0,,104,7.29,4.49,,2000
4,4,Rashard Anderson,CB,Jackson State,,6-2,206,4.55,34.0,,123,7.18,4.15,Carolina Panthers / 1st / 23rd pick / 2000,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7816,325,Devonte Wyatt,DT,Georgia,College Stats,6-3,304,4.77,29.0,,111,,,Green Bay Packers / 1st / 28th pick / 2022,2022
7817,326,Jalen Wydermyer,TE,Texas A&M,College Stats,6-4,255,,,,,,,,2022
7818,327,Cade York,K,LSU,College Stats,6-1,206,,,12,,,,Cleveland Browns / 4th / 124th pick / 2022,2022
7819,328,Nick Zakelj,OT,Fordham,,6-6,316,5.13,28.5,27,110,7.75,4.71,San Francisco 49ers / 6th / 187th pick / 2022,2022


In [124]:
df

Unnamed: 0,Player,Pos,School,College,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted (tm/rnd/yr),Year
0,John Abraham,OLB,South Carolina,,6-4,252,4.55,,,,,,New York Jets / 1st / 13th pick / 2000,2000
1,Shaun Alexander,RB,Alabama,College Stats,6-0,218,4.58,,,,,,Seattle Seahawks / 1st / 19th pick / 2000,2000
2,Darnell Alford,OT,Boston Col.,,6-4,334,5.56,25.0,23,94,8.48,4.98,Kansas City Chiefs / 6th / 188th pick / 2000,2000
3,Kyle Allamon,TE,Texas Tech,,6-2,253,4.97,29.0,,104,7.29,4.49,,2000
4,Rashard Anderson,CB,Jackson State,,6-2,206,4.55,34.0,,123,7.18,4.15,Carolina Panthers / 1st / 23rd pick / 2000,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,Devonte Wyatt,DT,Georgia,College Stats,6-3,304,4.77,29.0,,111,,,Green Bay Packers / 1st / 28th pick / 2022,2022
326,Jalen Wydermyer,TE,Texas A&M,College Stats,6-4,255,,,,,,,,2022
327,Cade York,K,LSU,College Stats,6-1,206,,,12,,,,Cleveland Browns / 4th / 124th pick / 2022,2022
328,Nick Zakelj,OT,Fordham,,6-6,316,5.13,28.5,27,110,7.75,4.71,San Francisco 49ers / 6th / 187th pick / 2022,2022


In [127]:
df = df.reset_index().drop(columns = 'index')

In [128]:
df

Unnamed: 0,Player,Pos,School,College,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted (tm/rnd/yr),Year
0,John Abraham,OLB,South Carolina,,6-4,252,4.55,,,,,,New York Jets / 1st / 13th pick / 2000,2000
1,Shaun Alexander,RB,Alabama,College Stats,6-0,218,4.58,,,,,,Seattle Seahawks / 1st / 19th pick / 2000,2000
2,Darnell Alford,OT,Boston Col.,,6-4,334,5.56,25.0,23,94,8.48,4.98,Kansas City Chiefs / 6th / 188th pick / 2000,2000
3,Kyle Allamon,TE,Texas Tech,,6-2,253,4.97,29.0,,104,7.29,4.49,,2000
4,Rashard Anderson,CB,Jackson State,,6-2,206,4.55,34.0,,123,7.18,4.15,Carolina Panthers / 1st / 23rd pick / 2000,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7816,Devonte Wyatt,DT,Georgia,College Stats,6-3,304,4.77,29.0,,111,,,Green Bay Packers / 1st / 28th pick / 2022,2022
7817,Jalen Wydermyer,TE,Texas A&M,College Stats,6-4,255,,,,,,,,2022
7818,Cade York,K,LSU,College Stats,6-1,206,,,12,,,,Cleveland Browns / 4th / 124th pick / 2022,2022
7819,Nick Zakelj,OT,Fordham,,6-6,316,5.13,28.5,27,110,7.75,4.71,San Francisco 49ers / 6th / 187th pick / 2022,2022


In [116]:
df.to_csv('combine_2000_2022.csv')