In [1]:
# Import scraping modules
from urllib.request import urlopen
from bs4 import BeautifulSoup


# Import data manipulation modules
import pandas as pd
import numpy as np
# Import data visualization modules
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# URL of page
url = 'https://www.fantasypros.com/nfl/stats/rb.php?scoring=HALF&ownership=y'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)


In [3]:
# Collect table headers
column_headers = stats_page.findAll('tr')[1]
column_headers = [i.getText() for i in column_headers.findAll('th')]

print(column_headers)

['Rank', 'Player', 'ATT', 'YDS', 'Y/A', 'LG', '20+', 'TD', 'REC', 'TGT', 'YDS', 'Y/R', 'TD', 'FL', 'G', ' FPTS', 'FPTS/G', 'OWN']


In [4]:
# Collect table rows
rows = stats_page.findAll('tr')[2:]
# Get stats from each row
rb_stats = []
for i in range(len(rows)):
  rb_stats.append([col.getText() for col in rows[i].findAll('td')])

In [5]:
df = pd.DataFrame(rb_stats, columns=column_headers)
df


Unnamed: 0,Rank,Player,ATT,YDS,Y/A,LG,20+,TD,REC,TGT,YDS.1,Y/R,TD.1,FL,G,FPTS,FPTS/G,OWN
0,1,Alvin Kamara (NO),187,932,5.0,49,0,16,83,107,756,9.1,5,0,15,336.3,22.4,100.0%
1,2,Derrick Henry (TEN),378,2027,5.4,94,0,17,19,31,114,6.0,0,2,16,323.6,20.2,100.0%
2,3,Dalvin Cook (MIN),312,1557,5.0,70,0,16,44,54,361,8.2,1,3,14,315.8,22.6,100.0%
3,4,David Montgomery (CHI),247,1070,4.3,80,0,8,54,68,438,8.1,2,1,15,237.8,15.9,97.0%
4,5,Aaron Jones (GB),201,1104,5.5,77,0,9,47,63,355,7.6,2,0,14,235.4,16.8,100.0%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279,280,Kenjon Barner (FA),0,0,0,0,0,0,0,1,0,0,0,0,6,0.0,0.0,0.0%
280,281,Tim Flanders (NO),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
281,282,Brennan Clay (DEN),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
282,283,Otis Anderson Jr. (LAR),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0%


In [6]:
df['Team'] = df['Player'].str[-6:]
position = 'RB'

df['Pos'] = position
df




Unnamed: 0,Rank,Player,ATT,YDS,Y/A,LG,20+,TD,REC,TGT,YDS.1,Y/R,TD.1,FL,G,FPTS,FPTS/G,OWN,Team,Pos
0,1,Alvin Kamara (NO),187,932,5.0,49,0,16,83,107,756,9.1,5,0,15,336.3,22.4,100.0%,(NO),RB
1,2,Derrick Henry (TEN),378,2027,5.4,94,0,17,19,31,114,6.0,0,2,16,323.6,20.2,100.0%,(TEN),RB
2,3,Dalvin Cook (MIN),312,1557,5.0,70,0,16,44,54,361,8.2,1,3,14,315.8,22.6,100.0%,(MIN),RB
3,4,David Montgomery (CHI),247,1070,4.3,80,0,8,54,68,438,8.1,2,1,15,237.8,15.9,97.0%,(CHI),RB
4,5,Aaron Jones (GB),201,1104,5.5,77,0,9,47,63,355,7.6,2,0,14,235.4,16.8,100.0%,(GB),RB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279,280,Kenjon Barner (FA),0,0,0,0,0,0,0,1,0,0,0,0,6,0.0,0.0,0.0%,(FA),RB
280,281,Tim Flanders (NO),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,(NO),RB
281,282,Brennan Clay (DEN),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,(DEN),RB
282,283,Otis Anderson Jr. (LAR),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0%,(LAR),RB


In [7]:
new_columns = df.columns.values
new_columns[-3] = 'Pct Owned'
new_columns[7] = 'Rush TD'
new_columns[3] = 'Rush YDS'
new_columns[-5] = 'Fantasy Points'
new_columns[-4] = 'Fantasy Points Per Game'
new_columns[6] = '20+ Rushes'
new_columns[-10] = 'Rec YDS'
new_columns[-8] = 'Rec TD'
new_columns[-7] = 'Fumbles Lost'
new_columns[-6] = 'Games'
new_columns[2] = 'Rush ATT'
df.columns = new_columns

df.head()


Unnamed: 0,Rank,Player,Rush ATT,Rush YDS,Y/A,LG,20+ Rushes,Rush TD,REC,TGT,Rec YDS,Y/R,Rec TD,Fumbles Lost,Games,Fantasy Points,Fantasy Points Per Game,Pct Owned,Team,Pos
0,1,Alvin Kamara (NO),187,932,5.0,49,0,16,83,107,756,9.1,5,0,15,336.3,22.4,100.0%,(NO),RB
1,2,Derrick Henry (TEN),378,2027,5.4,94,0,17,19,31,114,6.0,0,2,16,323.6,20.2,100.0%,(TEN),RB
2,3,Dalvin Cook (MIN),312,1557,5.0,70,0,16,44,54,361,8.2,1,3,14,315.8,22.6,100.0%,(MIN),RB
3,4,David Montgomery (CHI),247,1070,4.3,80,0,8,54,68,438,8.1,2,1,15,237.8,15.9,97.0%,(CHI),RB
4,5,Aaron Jones (GB),201,1104,5.5,77,0,9,47,63,355,7.6,2,0,14,235.4,16.8,100.0%,(GB),RB


In [9]:
# Create data subset for radar chart
categories = ['Games','Rush ATT','Rush YDS','Y/A','LG','Rush TD','REC','TGT','Rec YDS','Y/R','Rec TD','Fumbles Lost','Fantasy Points','Fantasy Points Per Game','Pct Owned']
df1 = df[['Rank', 'Player','Team','Pos'] + categories]
df1.head()


Unnamed: 0,Rank,Player,Team,Pos,Games,Rush ATT,Rush YDS,Y/A,LG,Rush TD,REC,TGT,Rec YDS,Y/R,Rec TD,Fumbles Lost,Fantasy Points,Fantasy Points Per Game,Pct Owned
0,1,Alvin Kamara (NO),(NO),RB,15,187,932,5.0,49,16,83,107,756,9.1,5,0,336.3,22.4,100.0%
1,2,Derrick Henry (TEN),(TEN),RB,16,378,2027,5.4,94,17,19,31,114,6.0,0,2,323.6,20.2,100.0%
2,3,Dalvin Cook (MIN),(MIN),RB,14,312,1557,5.0,70,16,44,54,361,8.2,1,3,315.8,22.6,100.0%
3,4,David Montgomery (CHI),(CHI),RB,15,247,1070,4.3,80,8,54,68,438,8.1,2,1,237.8,15.9,97.0%
4,5,Aaron Jones (GB),(GB),RB,14,201,1104,5.5,77,9,47,63,355,7.6,2,0,235.4,16.8,100.0%


In [10]:
df1['Player'] = df1['Player'].str.slice(0, -6)
df1.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Rank,Player,Team,Pos,Games,Rush ATT,Rush YDS,Y/A,LG,Rush TD,REC,TGT,Rec YDS,Y/R,Rec TD,Fumbles Lost,Fantasy Points,Fantasy Points Per Game,Pct Owned
0,1,Alvin Kamara,(NO),RB,15,187,932,5.0,49,16,83,107,756,9.1,5,0,336.3,22.4,100.0%
1,2,Derrick Henry,(TEN),RB,16,378,2027,5.4,94,17,19,31,114,6.0,0,2,323.6,20.2,100.0%
2,3,Dalvin Cook,(MIN),RB,14,312,1557,5.0,70,16,44,54,361,8.2,1,3,315.8,22.6,100.0%
3,4,David Montgomery,(CHI),RB,15,247,1070,4.3,80,8,54,68,438,8.1,2,1,237.8,15.9,97.0%
4,5,Aaron Jones,(GB),RB,14,201,1104,5.5,77,9,47,63,355,7.6,2,0,235.4,16.8,100.0%


In [11]:
df1['Rec YDS'] = df1['Rec YDS'].str.replace(',', '')
df1['Rush YDS'] = df1['Rush YDS'].str.replace(',', '')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [12]:
ints = ['Games','Rush ATT','Rush YDS','LG','Rush TD','REC','TGT','Rec YDS','Rec TD','Fumbles Lost']
floats = ['Y/R','Y/A','Fantasy Points','Fantasy Points Per Game',]
df1['Games'] = df1['Games'].astype('int64')

for i in ints:
    df1[i] = df1[i].astype('int64')

for i in floats:
    df1[i] = df1[i].astype('float64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [13]:
df1.dtypes


Rank                        object
Player                      object
Team                        object
Pos                         object
Games                        int64
Rush ATT                     int64
Rush YDS                     int64
Y/A                        float64
LG                           int64
Rush TD                      int64
REC                          int64
TGT                          int64
Rec YDS                      int64
Y/R                        float64
Rec TD                       int64
Fumbles Lost                 int64
Fantasy Points             float64
Fantasy Points Per Game    float64
Pct Owned                   object
dtype: object

In [14]:
df1

Unnamed: 0,Rank,Player,Team,Pos,Games,Rush ATT,Rush YDS,Y/A,LG,Rush TD,REC,TGT,Rec YDS,Y/R,Rec TD,Fumbles Lost,Fantasy Points,Fantasy Points Per Game,Pct Owned
0,1,Alvin Kamara,(NO),RB,15,187,932,5.0,49,16,83,107,756,9.1,5,0,336.3,22.4,100.0%
1,2,Derrick Henry,(TEN),RB,16,378,2027,5.4,94,17,19,31,114,6.0,0,2,323.6,20.2,100.0%
2,3,Dalvin Cook,(MIN),RB,14,312,1557,5.0,70,16,44,54,361,8.2,1,3,315.8,22.6,100.0%
3,4,David Montgomery,(CHI),RB,15,247,1070,4.3,80,8,54,68,438,8.1,2,1,237.8,15.9,97.0%
4,5,Aaron Jones,(GB),RB,14,201,1104,5.5,77,9,47,63,355,7.6,2,0,235.4,16.8,100.0%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279,280,Kenjon Barner,(FA),RB,6,0,0,0.0,0,0,0,1,0,0.0,0,0,0.0,0.0,0.0%
280,281,Tim Flanders,(NO),RB,0,0,0,0.0,0,0,0,0,0,0.0,0,0,0.0,0.0,
281,282,Brennan Clay,(DEN),RB,0,0,0,0.0,0,0,0,0,0,0.0,0,0,0.0,0.0,
282,283,Otis Anderson Jr.,(LAR),RB,0,0,0,0.0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0%


In [15]:
df1.to_csv('RBstats.csv',index=False)

In [2]:
# URL of page
url = 'https://www.cbssports.com/fantasy/football/draft/averages/both/h2h/RB/'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)


In [25]:
# Collect table headers
column_headers = stats_page.findAll('tr')[0]
column_headers = [i.getText() for i in column_headers.findAll('th')]

print(column_headers)

['\n                \n                                    Rank\n                \n                            ', '\n                \n                                    Player\n                \n                            ', '\n                \n                                    Trend\n                \n                            ', '\n                \n                                    Avg Pos\n                \n                            ', '\n                \n                                    Hi/Lo\n                \n                            ', '\n                \n                                    Pct\n                \n                            ']


In [20]:
# Collect table rows
rows = stats_page.findAll('tr')[1:]
# Get stats from each row
rb_stats = []
for i in range(len(rows)):
    rb_stats.append([col.getText() for col in rows[i].findAll('td')])
    #rb_stats.strip('\n')


In [50]:
df1 = pd.DataFrame(rb_stats, columns=column_headers)
df1

Unnamed: 0,\n \n Rank\n \n,\n \n Player\n \n,\n \n Trend\n \n,\n \n Avg Pos\n \n,\n \n Hi/Lo\n \n,\n \n Pct\n \n
0,\n 1\n,\n\n\nC. McCaffrey\n\n ...,\n —\n,\n 1.22\n,\n 1/3\n,\n 98.6\n
1,\n 2\n,\n\n\nD. Cook\n\n RB\n ...,\n —\n,\n 2.62\n,\n 2/6\n,\n 98.6\n
2,\n 3\n,\n\n\nD. Henry\n\n RB\n...,\n —\n,\n 3.49\n,\n 1/7\n,\n 98.8\n
3,\n 4\n,\n\n\nA. Kamara\n\n RB\...,\n —\n,\n 4.62\n,\n 3/9\n,\n 98.7\n
4,\n 5\n,\n\n\nE. Elliott\n\n RB...,\n —\n,\n 8.48\n,\n 4/19\n,\n 98.5\n
...,...,...,...,...,...,...
60,\n 211\n,\n\n\nR. Penny\n\n RB\n...,\n —\n,\n 164.80\n,\n 119/220\n,\n 27.2\n
61,\n 213\n,\n\n\nD. Williams\n\n R...,\n\n 8\n\n,\n 164.94\n,\n 114/222\n,\n 27.3\n
62,\n 215\n,\n\n\nR. Burkhead\n\n R...,\n\n -5\n\n,\n 166.80\n,\n 126/227\n,\n 26.7\n
63,\n 216\n,\n\n\nD. Williams\n\n R...,\n\n -12\n\n,\n 166.88\n,\n 102/229\n,\n 26.2\n


In [51]:
df1 = df1.replace(r'\n',' ', regex=True) 

In [53]:
df1 = df1.replace(r'\\n',  ' ', regex=True)

In [54]:
df1

Unnamed: 0,\n \n Rank\n \n,\n \n Player\n \n,\n \n Trend\n \n,\n \n Avg Pos\n \n,\n \n Hi/Lo\n \n,\n \n Pct\n \n
0,1,C. McCaffrey RB ...,—,1.22,1/3,98.6
1,2,D. Cook RB ...,—,2.62,2/6,98.6
2,3,D. Henry RB ...,—,3.49,1/7,98.8
3,4,A. Kamara RB ...,—,4.62,3/9,98.7
4,5,E. Elliott RB ...,—,8.48,4/19,98.5
...,...,...,...,...,...,...
60,211,R. Penny RB ...,—,164.80,119/220,27.2
61,213,D. Williams RB ...,8,164.94,114/222,27.3
62,215,R. Burkhead RB ...,-5,166.80,126/227,26.7
63,216,D. Williams RB ...,-12,166.88,102/229,26.2


In [55]:
df1.to_csv('RBadpstats.csv',index=False)