In [1]:
import urllib
from bs4 import BeautifulSoup as BS
import pandas as pd

## this is my root url that I will be using to get my info from
url = 'http://nflcombineresults.com/nflcombinedata_expanded.php?year=all&pos=&college='


In [2]:
## here I define my html from the website that i will be using to 
## create the soup
html = urllib.urlopen(url).read()

## creating the soup from the website using the beautiful soup package
soup = BS(html, 'html.parser')

## this is the body of the entire table i want the info from
body = soup.find('table', {'class':'sortable'})

## this is the body for the column headers only
col_body = soup.find('thead')

## this is the body for the table contents
players_body = soup.find('tbody')

In [22]:
## below i will create a list of the column headers to put in my dataframe
## as the column names

# creating an empty list to add them to
columns = []
# looping through each item in the col_body that starts with 'td'
# since those are the markers for the column headers
for i in col_body.findAll('td'):
    columns.append(i.text)       #the .text will give me the text only

# checking the list to make sure i got everything
## I am also replacing unwanted characters with 
columns = [item.replace(' ', '_') for item in columns]
columns = [item.replace('(', '') for item in columns]
columns = [item.replace(')', '') for item in columns]
columns

[u'Year',
 u'Name',
 u'College',
 u'POS',
 u'Height_in',
 u'Weight_lbs',
 u'Hand_Size_in',
 u'Arm_Length_in',
 u'Wonderlic',
 u'40_Yard',
 u'Bench_Press',
 u'Vert_Leap_in',
 u'Broad_Jump_in',
 u'Shuttle',
 u'3Cone',
 u'60Yd_Shuttle']

In [4]:
## now i want to create a list with the body information on players

## creating empty list to add things to
rows = []

## this will loop through every row in the table
for row in players_body.findAll('tr'):
    ## adding a second loop to iterate through each row for all the items
    for i in row:
        rows.append(i.text)  #adding text from each item to the empty list

## making sure i got the right stuff
rows

[u'2016',
 u'Mehdi Abdesmad',
 u'Boston College',
 u'DT',
 u'78',
 u'284',
 u'9.75',
 u'33.38',
 u'',
 u'5.10',
 u'25',
 u'29.5',
 u'108',
 u'4.62',
 u'7.55',
 u'',
 u'2016',
 u'Jerell Adams',
 u'South Carolina',
 u'TE',
 u'77',
 u'247',
 u'9.75',
 u'34.38',
 u'',
 u'4.64',
 u'',
 u'32.5',
 u'117',
 u'4.31',
 u'7.05',
 u'11.52',
 u'2016',
 u'Vernon Adams',
 u'Oregon',
 u'QB',
 u'70',
 u'200',
 u'9.13',
 u'30.25',
 u'',
 u'4.83',
 u'',
 u'29.5',
 u'114',
 u'4.20',
 u'6.82',
 u'',
 u'2016',
 u'Bralon Addison',
 u'Oregon',
 u'WR',
 u'69',
 u'197',
 u'9.13',
 u'29.50',
 u'',
 u'4.66',
 u'13',
 u'34.5',
 u'116',
 u'4.14',
 u'6.95',
 u'11.53',
 u'2016',
 u'Dominique Alexander',
 u'Oklahoma',
 u'ILB',
 u'72',
 u'232',
 u'9.00',
 u'32.25',
 u'',
 u'',
 u'17',
 u'28.5',
 u'104',
 u'',
 u'',
 u'',
 u'2016',
 u'Mackensie Alexander',
 u'Clemson',
 u'CB',
 u'70',
 u'190',
 u'9.13',
 u'31.38',
 u'',
 u'',
 u'11',
 u'',
 u'',
 u'',
 u'',
 u'',
 u'2016',
 u'Vadal Alexander',
 u'LSU',
 u'G',
 u'77',
 u

In [5]:
import numpy as np

## I am creating an array out of my list so that i can make it into
## a dataframe using the acquired data
rows_array = np.asarray(rows)

In [6]:
## checking the shape of the array as it is now
rows_array.shape

(90624,)

In [7]:
## since i have 16 column headers in my cols list, i want to make
## the array have 16 columns as well to fit into that
## This equates to 5664 rows based on the number of players
row_for_df = np.reshape(rows_array, (5664, 16))

In [8]:
## checking the shape to make sure it worked correctly
row_for_df.shape

(5664, 16)

In [9]:
## now I am creating the dataframe and adding the new array as
## the data with the previously made columns list as the columns
combine_df = pd.DataFrame(row_for_df, columns = columns)

In [10]:
## printing it out to check on it
combine_df

Unnamed: 0,Year,Name,College,POS,Height (in),Weight (lbs),Hand Size (in),Arm Length (in),Wonderlic,40 Yard,Bench Press,Vert Leap (in),Broad Jump (in),Shuttle,3Cone,60Yd Shuttle
0,2016,Mehdi Abdesmad,Boston College,DT,78,284,9.75,33.38,,5.10,25,29.5,108,4.62,7.55,
1,2016,Jerell Adams,South Carolina,TE,77,247,9.75,34.38,,4.64,,32.5,117,4.31,7.05,11.52
2,2016,Vernon Adams,Oregon,QB,70,200,9.13,30.25,,4.83,,29.5,114,4.20,6.82,
3,2016,Bralon Addison,Oregon,WR,69,197,9.13,29.50,,4.66,13,34.5,116,4.14,6.95,11.53
4,2016,Dominique Alexander,Oklahoma,ILB,72,232,9.00,32.25,,,17,28.5,104,,,
5,2016,Mackensie Alexander,Clemson,CB,70,190,9.13,31.38,,,11,,,,,
6,2016,Vadal Alexander,LSU,G,77,326,10.50,35.25,,5.57,25,24.0,95,4.90,8.04,
7,2016,Brandon Allen,Arkansas,QB,73,217,8.88,31.25,,4.84,,28.0,110,4.33,7.06,
8,2016,Jack Allen,Michigan State,C,73,294,10.13,32.25,,5.29,23,26.5,101,4.73,7.90,
9,2016,Geronimo Allison,Illinois,WR,75,196,9.50,32.88,,4.67,,33.0,127,4.28,7.40,11.54


In [11]:
## I am creating a csv file from the newly formed team_df and
## exporting to to my current working directory
combine_df.to_csv('terry_combine_df', encoding = 'utf-8')