In [1]:
# Import scraping modules
from urllib.request import urlopen
from bs4 import BeautifulSoup


# Import data manipulation modules
import pandas as pd
import numpy as np
# Import data visualization modules
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [44]:
# URL of page
url = 'https://www.fantasypros.com/nfl/stats/wr.php?scoring=HALF&ownership=y'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)


In [45]:
# Collect table headers
column_headers = stats_page.findAll('tr')[1]
column_headers = [i.getText() for i in column_headers.findAll('th')]

print(column_headers)

['Rank', 'Player', 'REC', 'TGT', 'YDS', 'Y/R', 'LG', '20+', 'TD', 'ATT', 'YDS', 'TD', 'FL', 'G', ' FPTS', 'FPTS/G', 'OWN']


In [46]:
# Collect table rows
rows = stats_page.findAll('tr')[2:]
# Get stats from each row
wr_stats = []
for i in range(len(rows)):
  wr_stats.append([col.getText() for col in rows[i].findAll('td')])

In [47]:
df = pd.DataFrame(wr_stats, columns=column_headers)
df

Unnamed: 0,Rank,Player,REC,TGT,YDS,Y/R,LG,20+,TD,ATT,YDS.1,TD.1,FL,G,FPTS,FPTS/G,OWN
0,1,Davante Adams (GB),115,149,1374,11.9,56,0,18,0,0,0,1,14,300.9,21.5,100.0%
1,2,Tyreek Hill (KC),87,135,1276,14.7,75,0,15,13,123,2,0,15,285.4,19.0,100.0%
2,3,Stefon Diggs (BUF),127,166,1535,12.1,55,0,8,1,1,0,0,16,265.1,16.6,100.0%
3,4,Calvin Ridley (ATL),90,143,1374,15.3,63,0,9,5,1,0,1,15,236.5,15.8,100.0%
4,5,DeAndre Hopkins (ARI),115,160,1407,12.2,60,0,6,1,1,0,2,16,230.3,14.4,100.0%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,461,DeAndre Carter (WAS),1,2,8,8.0,8,0,0,0,0,0,1,13,-0.7,-0.1,0.0%
461,462,Kendall Hinton (DEN),0,0,0,0,0,0,0,2,7,0,0,1,-0.8,-0.8,0.0%
462,463,Dede Westbrook (MIN),1,1,4,4.0,4,0,0,0,0,0,1,2,-1.1,-0.6,1.0%
463,464,Nsimba Webster (SF),0,0,0,0,0,0,0,0,0,0,1,16,-2.0,-0.1,0.0%


In [48]:
df['Team'] = df['Player'].str[-6:]
position = 'WR'

df['Pos'] = position
df.head()

Unnamed: 0,Rank,Player,REC,TGT,YDS,Y/R,LG,20+,TD,ATT,YDS.1,TD.1,FL,G,FPTS,FPTS/G,OWN,Team,Pos
0,1,Davante Adams (GB),115,149,1374,11.9,56,0,18,0,0,0,1,14,300.9,21.5,100.0%,(GB),WR
1,2,Tyreek Hill (KC),87,135,1276,14.7,75,0,15,13,123,2,0,15,285.4,19.0,100.0%,(KC),WR
2,3,Stefon Diggs (BUF),127,166,1535,12.1,55,0,8,1,1,0,0,16,265.1,16.6,100.0%,(BUF),WR
3,4,Calvin Ridley (ATL),90,143,1374,15.3,63,0,9,5,1,0,1,15,236.5,15.8,100.0%,(ATL),WR
4,5,DeAndre Hopkins (ARI),115,160,1407,12.2,60,0,6,1,1,0,2,16,230.3,14.4,100.0%,(ARI),WR


In [49]:
new_columns = df.columns.values
new_columns[-3] = 'Pct Owned'
new_columns[-8] = 'Rush TD'
new_columns[-9] = 'Rush YDS'
new_columns[-5] = 'Fantasy Points'
new_columns[-4] = 'Fantasy Points Per Game'
new_columns[-10] = 'Rush ATT'
new_columns[-11] = 'Rec TD'
new_columns[-7] = 'Fumbles Lost'
new_columns[-6] = 'Games'
new_columns[4] = 'Rec YDS'
df.columns = new_columns

df.head()


Unnamed: 0,Rank,Player,REC,TGT,Rec YDS,Y/R,LG,20+,Rec TD,Rush ATT,Rush YDS,Rush TD,Fumbles Lost,Games,Fantasy Points,Fantasy Points Per Game,Pct Owned,Team,Pos
0,1,Davante Adams (GB),115,149,1374,11.9,56,0,18,0,0,0,1,14,300.9,21.5,100.0%,(GB),WR
1,2,Tyreek Hill (KC),87,135,1276,14.7,75,0,15,13,123,2,0,15,285.4,19.0,100.0%,(KC),WR
2,3,Stefon Diggs (BUF),127,166,1535,12.1,55,0,8,1,1,0,0,16,265.1,16.6,100.0%,(BUF),WR
3,4,Calvin Ridley (ATL),90,143,1374,15.3,63,0,9,5,1,0,1,15,236.5,15.8,100.0%,(ATL),WR
4,5,DeAndre Hopkins (ARI),115,160,1407,12.2,60,0,6,1,1,0,2,16,230.3,14.4,100.0%,(ARI),WR


In [50]:
# Create data subset for radar chart
categories = ['Games','REC','TGT','Rec YDS','Y/R','LG','Rec TD','Rush ATT','Rush YDS','Rush TD','Fumbles Lost','Fantasy Points','Fantasy Points Per Game','Pct Owned']
df1 = df[['Rank', 'Player','Team','Pos'] + categories]
df1.head()


Unnamed: 0,Rank,Player,Team,Pos,Games,REC,TGT,Rec YDS,Y/R,LG,Rec TD,Rush ATT,Rush YDS,Rush TD,Fumbles Lost,Fantasy Points,Fantasy Points Per Game,Pct Owned
0,1,Davante Adams (GB),(GB),WR,14,115,149,1374,11.9,56,18,0,0,0,1,300.9,21.5,100.0%
1,2,Tyreek Hill (KC),(KC),WR,15,87,135,1276,14.7,75,15,13,123,2,0,285.4,19.0,100.0%
2,3,Stefon Diggs (BUF),(BUF),WR,16,127,166,1535,12.1,55,8,1,1,0,0,265.1,16.6,100.0%
3,4,Calvin Ridley (ATL),(ATL),WR,15,90,143,1374,15.3,63,9,5,1,0,1,236.5,15.8,100.0%
4,5,DeAndre Hopkins (ARI),(ARI),WR,16,115,160,1407,12.2,60,6,1,1,0,2,230.3,14.4,100.0%


In [51]:
df1['Player'] = df1['Player'].str.slice(0, -6)
df1.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Rank,Player,Team,Pos,Games,REC,TGT,Rec YDS,Y/R,LG,Rec TD,Rush ATT,Rush YDS,Rush TD,Fumbles Lost,Fantasy Points,Fantasy Points Per Game,Pct Owned
0,1,Davante Adams,(GB),WR,14,115,149,1374,11.9,56,18,0,0,0,1,300.9,21.5,100.0%
1,2,Tyreek Hill,(KC),WR,15,87,135,1276,14.7,75,15,13,123,2,0,285.4,19.0,100.0%
2,3,Stefon Diggs,(BUF),WR,16,127,166,1535,12.1,55,8,1,1,0,0,265.1,16.6,100.0%
3,4,Calvin Ridley,(ATL),WR,15,90,143,1374,15.3,63,9,5,1,0,1,236.5,15.8,100.0%
4,5,DeAndre Hopkins,(ARI),WR,16,115,160,1407,12.2,60,6,1,1,0,2,230.3,14.4,100.0%


In [52]:
df1['Rec YDS'] = df1['Rec YDS'].str.replace(',', '')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [53]:
ints = ['Games','Rush ATT','Rush YDS','LG','Rush TD','REC','TGT','Rec YDS','Rec TD','Fumbles Lost']
floats = ['Y/R','Fantasy Points','Fantasy Points Per Game',]
df1['Games'] = df1['Games'].astype('int64')

for i in ints:
    df1[i] = df1[i].astype('int64')

for i in floats:
    df1[i] = df1[i].astype('float64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [54]:
df1.dtypes


Rank                        object
Player                      object
Team                        object
Pos                         object
Games                        int64
REC                          int64
TGT                          int64
Rec YDS                      int64
Y/R                        float64
LG                           int64
Rec TD                       int64
Rush ATT                     int64
Rush YDS                     int64
Rush TD                      int64
Fumbles Lost                 int64
Fantasy Points             float64
Fantasy Points Per Game    float64
Pct Owned                   object
dtype: object

In [55]:
df1

Unnamed: 0,Rank,Player,Team,Pos,Games,REC,TGT,Rec YDS,Y/R,LG,Rec TD,Rush ATT,Rush YDS,Rush TD,Fumbles Lost,Fantasy Points,Fantasy Points Per Game,Pct Owned
0,1,Davante Adams,(GB),WR,14,115,149,1374,11.9,56,18,0,0,0,1,300.9,21.5,100.0%
1,2,Tyreek Hill,(KC),WR,15,87,135,1276,14.7,75,15,13,123,2,0,285.4,19.0,100.0%
2,3,Stefon Diggs,(BUF),WR,16,127,166,1535,12.1,55,8,1,1,0,0,265.1,16.6,100.0%
3,4,Calvin Ridley,(ATL),WR,15,90,143,1374,15.3,63,9,5,1,0,1,236.5,15.8,100.0%
4,5,DeAndre Hopkins,(ARI),WR,16,115,160,1407,12.2,60,6,1,1,0,2,230.3,14.4,100.0%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,461,DeAndre Carter,(WAS),WR,13,1,2,8,8.0,8,0,0,0,0,1,-0.7,-0.1,0.0%
461,462,Kendall Hinton,(DEN),WR,1,0,0,0,0.0,0,0,2,7,0,0,-0.8,-0.8,0.0%
462,463,Dede Westbrook,(MIN),WR,2,1,1,4,4.0,4,0,0,0,0,1,-1.1,-0.6,1.0%
463,464,Nsimba Webster,(SF),WR,16,0,0,0,0.0,0,0,0,0,0,1,-2.0,-0.1,0.0%


In [56]:
df1.to_csv('WRstats.csv',index=False)

In [3]:
# URL of page
url = 'https://www.cbssports.com/fantasy/football/draft/averages/both/h2h/WR/'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)


In [4]:
# Collect table headers
column_headers = stats_page.findAll('tr')[0]
column_headers = [i.getText() for i in column_headers.findAll('th')]

print(column_headers)

['\n                \n                                    Rank\n                \n                            ', '\n                \n                                    Player\n                \n                            ', '\n                \n                                    Trend\n                \n                            ', '\n                \n                                    Avg Pos\n                \n                            ', '\n                \n                                    Hi/Lo\n                \n                            ', '\n                \n                                    Pct\n                \n                            ']


In [7]:
# Collect table rows
rows = stats_page.findAll('tr')[1:]
# Get stats from each row
wr_stats = []
for i in range(len(rows)):
    wr_stats.append([col.getText() for col in rows[i].findAll('td')])
    #rb_stats.strip('\n')


In [8]:
df1 = pd.DataFrame(wr_stats, columns=column_headers)
df1

Unnamed: 0,\n \n Rank\n \n,\n \n Player\n \n,\n \n Trend\n \n,\n \n Avg Pos\n \n,\n \n Hi/Lo\n \n,\n \n Pct\n \n
0,\n 7\n,\n\n\nD. Adams\n\n WR\n...,\n —\n,\n 9.63\n,\n 4/17\n,\n 99.4\n
1,\n 13\n,\n\n\nT. Hill\n\n WR\n ...,\n —\n,\n 12.72\n,\n 7/19\n,\n 99.5\n
2,\n 17\n,\n\n\nS. Diggs\n\n WR\n...,\n\n -1\n\n,\n 17.86\n,\n 11/28\n,\n 99.5\n
3,\n 19\n,\n\n\nD. Hopkins\n\n WR...,\n —\n,\n 20.68\n,\n 12/32\n,\n 99.5\n
4,\n 20\n,\n\n\nC. Ridley\n\n WR\...,\n —\n,\n 21.94\n,\n 14/32\n,\n 99.5\n
...,...,...,...,...,...,...
65,\n 199\n,\n\n\nT. Marshall Jr.\n\n ...,\n\n 1\n\n,\n 161.34\n,\n 112/207\n,\n 28.2\n
66,\n 210\n,\n\n\nS. Shepard\n\n WR...,\n\n -2\n\n,\n 165.46\n,\n 114/217\n,\n 23.8\n
67,\n 211\n,\n\n\nJ. Reagor\n\n WR\...,\n\n -7\n\n,\n 165.56\n,\n 115/216\n,\n 21.1\n
68,\n 214\n,\n\n\nN. Agholor\n\n WR...,\n\n -15\n\n,\n 165.90\n,\n 111/219\n,\n 22.2\n


In [9]:
df1 = df1.replace(r'\n',' ', regex=True) 

In [10]:
df1 = df1.replace(r'\n',' ', regex=True) 

Unnamed: 0,\n \n Rank\n \n,\n \n Player\n \n,\n \n Trend\n \n,\n \n Avg Pos\n \n,\n \n Hi/Lo\n \n,\n \n Pct\n \n
0,7,D. Adams WR ...,—,9.63,4/17,99.4
1,13,T. Hill WR ...,—,12.72,7/19,99.5
2,17,S. Diggs WR ...,-1,17.86,11/28,99.5
3,19,D. Hopkins WR ...,—,20.68,12/32,99.5
4,20,C. Ridley WR ...,—,21.94,14/32,99.5
...,...,...,...,...,...,...
65,199,T. Marshall Jr. WR...,1,161.34,112/207,28.2
66,210,S. Shepard WR ...,-2,165.46,114/217,23.8
67,211,J. Reagor WR ...,-7,165.56,115/216,21.1
68,214,N. Agholor WR ...,-15,165.90,111/219,22.2


In [11]:
df1.to_csv('WRadpstats.csv',index=False)