In [1]:
# Import scraping modules
from urllib.request import urlopen
from bs4 import BeautifulSoup


# Import data manipulation modules
import pandas as pd
import numpy as np
# Import data visualization modules
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [16]:
# URL of page
url = 'https://www.fantasypros.com/nfl/stats/te.php?scoring=HALF&ownership=y'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)


In [17]:
# Collect table headers
column_headers = stats_page.findAll('tr')[1]
column_headers = [i.getText() for i in column_headers.findAll('th')]

print(column_headers)

['Rank', 'Player', 'REC', 'TGT', 'YDS', 'Y/R', 'LG', '20+', 'TD', 'ATT', 'YDS', 'TD', 'FL', 'G', ' FPTS', 'FPTS/G', 'OWN']


In [18]:
# Collect table rows
rows = stats_page.findAll('tr')[2:]
# Get stats from each row
te_stats = []
for i in range(len(rows)):
  te_stats.append([col.getText() for col in rows[i].findAll('td')])

In [19]:
df = pd.DataFrame(te_stats, columns=column_headers)
df

Unnamed: 0,Rank,Player,REC,TGT,YDS,Y/R,LG,20+,TD,ATT,YDS.1,TD.1,FL,G,FPTS,FPTS/G,OWN
0,1,Travis Kelce (KC),105,145,1416,13.5,45,0,11,0,0,0,1,15,260.3,17.4,100.0%
1,2,Darren Waller (LV),107,145,1196,11.2,38,0,9,0,0,0,2,16,225.1,14.1,100.0%
2,3,Robert Tonyan (GB),52,59,586,11.3,45,0,11,0,0,0,0,16,150.6,9.4,91.0%
3,4,T.J. Hockenson (DET),67,101,723,10.8,51,0,6,1,0,0,1,16,141.8,8.9,99.0%
4,5,Mark Andrews (BAL),58,88,701,12.1,39,0,7,0,0,0,0,14,141.1,10.1,99.0%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258,259,Khari Lee (DET),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0%
259,260,Kelvin Benjamin (NYG),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0%
260,261,Xavier Grimble (BAL),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0%
261,262,Kyle Nelson (FA),0,0,0,0,0,0,0,0,0,0,0,3,0.0,0.0,0.0%


In [20]:
df['Team'] = df['Player'].str[-6:]
position = 'TE'

df['Pos'] = position
df.head()

Unnamed: 0,Rank,Player,REC,TGT,YDS,Y/R,LG,20+,TD,ATT,YDS.1,TD.1,FL,G,FPTS,FPTS/G,OWN,Team,Pos
0,1,Travis Kelce (KC),105,145,1416,13.5,45,0,11,0,0,0,1,15,260.3,17.4,100.0%,(KC),TE
1,2,Darren Waller (LV),107,145,1196,11.2,38,0,9,0,0,0,2,16,225.1,14.1,100.0%,(LV),TE
2,3,Robert Tonyan (GB),52,59,586,11.3,45,0,11,0,0,0,0,16,150.6,9.4,91.0%,(GB),TE
3,4,T.J. Hockenson (DET),67,101,723,10.8,51,0,6,1,0,0,1,16,141.8,8.9,99.0%,(DET),TE
4,5,Mark Andrews (BAL),58,88,701,12.1,39,0,7,0,0,0,0,14,141.1,10.1,99.0%,(BAL),TE


In [21]:
new_columns = df.columns.values
new_columns[-3] = 'Pct Owned'
new_columns[-5] = 'Fantasy Points'
new_columns[-4] = 'Fantasy Points Per Game'
new_columns[-11] = 'Rec TD'
new_columns[-7] = 'Fumbles Lost'
new_columns[-6] = 'Games'
new_columns[4] = 'Rec YDS'
new_columns[-10] = 'Rush ATT'
new_columns[-8] = 'Rush TD'
new_columns[-9] = 'Rush YDS'
df.columns = new_columns

df.head()


Unnamed: 0,Rank,Player,REC,TGT,Rec YDS,Y/R,LG,20+,Rec TD,Rush ATT,Rush YDS,Rush TD,Fumbles Lost,Games,Fantasy Points,Fantasy Points Per Game,Pct Owned,Team,Pos
0,1,Travis Kelce (KC),105,145,1416,13.5,45,0,11,0,0,0,1,15,260.3,17.4,100.0%,(KC),TE
1,2,Darren Waller (LV),107,145,1196,11.2,38,0,9,0,0,0,2,16,225.1,14.1,100.0%,(LV),TE
2,3,Robert Tonyan (GB),52,59,586,11.3,45,0,11,0,0,0,0,16,150.6,9.4,91.0%,(GB),TE
3,4,T.J. Hockenson (DET),67,101,723,10.8,51,0,6,1,0,0,1,16,141.8,8.9,99.0%,(DET),TE
4,5,Mark Andrews (BAL),58,88,701,12.1,39,0,7,0,0,0,0,14,141.1,10.1,99.0%,(BAL),TE


In [22]:
# Create data subset for radar chart
categories = ['Games','REC','TGT','Rec YDS','Y/R','LG','Rec TD','Rush ATT','Rush YDS','Rush TD','Fumbles Lost','Fantasy Points','Fantasy Points Per Game','Pct Owned']
df1 = df[['Rank', 'Player','Team','Pos'] + categories]
df1.head()


Unnamed: 0,Rank,Player,Team,Pos,Games,REC,TGT,Rec YDS,Y/R,LG,Rec TD,Rush ATT,Rush YDS,Rush TD,Fumbles Lost,Fantasy Points,Fantasy Points Per Game,Pct Owned
0,1,Travis Kelce (KC),(KC),TE,15,105,145,1416,13.5,45,11,0,0,0,1,260.3,17.4,100.0%
1,2,Darren Waller (LV),(LV),TE,16,107,145,1196,11.2,38,9,0,0,0,2,225.1,14.1,100.0%
2,3,Robert Tonyan (GB),(GB),TE,16,52,59,586,11.3,45,11,0,0,0,0,150.6,9.4,91.0%
3,4,T.J. Hockenson (DET),(DET),TE,16,67,101,723,10.8,51,6,1,0,0,1,141.8,8.9,99.0%
4,5,Mark Andrews (BAL),(BAL),TE,14,58,88,701,12.1,39,7,0,0,0,0,141.1,10.1,99.0%


In [23]:
df1['Player'] = df1['Player'].str.slice(0, -6)
df1.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Rank,Player,Team,Pos,Games,REC,TGT,Rec YDS,Y/R,LG,Rec TD,Rush ATT,Rush YDS,Rush TD,Fumbles Lost,Fantasy Points,Fantasy Points Per Game,Pct Owned
0,1,Travis Kelce,(KC),TE,15,105,145,1416,13.5,45,11,0,0,0,1,260.3,17.4,100.0%
1,2,Darren Waller,(LV),TE,16,107,145,1196,11.2,38,9,0,0,0,2,225.1,14.1,100.0%
2,3,Robert Tonyan,(GB),TE,16,52,59,586,11.3,45,11,0,0,0,0,150.6,9.4,91.0%
3,4,T.J. Hockenson,(DET),TE,16,67,101,723,10.8,51,6,1,0,0,1,141.8,8.9,99.0%
4,5,Mark Andrews,(BAL),TE,14,58,88,701,12.1,39,7,0,0,0,0,141.1,10.1,99.0%


In [24]:
df1['Rec YDS'] = df1['Rec YDS'].str.replace(',', '')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [25]:
ints = ['Games','Rush ATT','Rush YDS','LG','Rush TD','REC','TGT','Rec YDS','Rec TD','Fumbles Lost']
floats = ['Y/R','Fantasy Points','Fantasy Points Per Game',]
df1['Games'] = df1['Games'].astype('int64')

for i in ints:
    df1[i] = df1[i].astype('int64')

for i in floats:
    df1[i] = df1[i].astype('float64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [26]:
df1.dtypes

Rank                        object
Player                      object
Team                        object
Pos                         object
Games                        int64
REC                          int64
TGT                          int64
Rec YDS                      int64
Y/R                        float64
LG                           int64
Rec TD                       int64
Rush ATT                     int64
Rush YDS                     int64
Rush TD                      int64
Fumbles Lost                 int64
Fantasy Points             float64
Fantasy Points Per Game    float64
Pct Owned                   object
dtype: object

In [27]:
df1

Unnamed: 0,Rank,Player,Team,Pos,Games,REC,TGT,Rec YDS,Y/R,LG,Rec TD,Rush ATT,Rush YDS,Rush TD,Fumbles Lost,Fantasy Points,Fantasy Points Per Game,Pct Owned
0,1,Travis Kelce,(KC),TE,15,105,145,1416,13.5,45,11,0,0,0,1,260.3,17.4,100.0%
1,2,Darren Waller,(LV),TE,16,107,145,1196,11.2,38,9,0,0,0,2,225.1,14.1,100.0%
2,3,Robert Tonyan,(GB),TE,16,52,59,586,11.3,45,11,0,0,0,0,150.6,9.4,91.0%
3,4,T.J. Hockenson,(DET),TE,16,67,101,723,10.8,51,6,1,0,0,1,141.8,8.9,99.0%
4,5,Mark Andrews,(BAL),TE,14,58,88,701,12.1,39,7,0,0,0,0,141.1,10.1,99.0%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258,259,Khari Lee,(DET),TE,0,0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0%
259,260,Kelvin Benjamin,(NYG),TE,0,0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0%
260,261,Xavier Grimble,(BAL),TE,0,0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0%
261,262,Kyle Nelson,(FA),TE,3,0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0%


In [28]:
df1.to_csv('TEstats.csv',index=False)

In [2]:
# URL of page
url = 'https://www.cbssports.com/fantasy/football/draft/averages/both/h2h/TE/'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)


In [3]:
# Collect table headers
column_headers = stats_page.findAll('tr')[0]
column_headers = [i.getText() for i in column_headers.findAll('th')]

print(column_headers)

['\n                \n                                    Rank\n                \n                            ', '\n                \n                                    Player\n                \n                            ', '\n                \n                                    Trend\n                \n                            ', '\n                \n                                    Avg Pos\n                \n                            ', '\n                \n                                    Hi/Lo\n                \n                            ', '\n                \n                                    Pct\n                \n                            ']


In [4]:
# Collect table rows
rows = stats_page.findAll('tr')[1:]
# Get stats from each row
te_stats = []
for i in range(len(rows)):
    te_stats.append([col.getText() for col in rows[i].findAll('td')])
    #rb_stats.strip('\n')


In [5]:
df1 = pd.DataFrame(te_stats, columns=column_headers)
df1

Unnamed: 0,\n \n Rank\n \n,\n \n Player\n \n,\n \n Trend\n \n,\n \n Avg Pos\n \n,\n \n Hi/Lo\n \n,\n \n Pct\n \n
0,\n 8\n,\n\n\nT. Kelce\n\n TE\n...,\n —\n,\n 10.17\n,\n 4/19\n,\n 99.4\n
1,\n 23\n,\n\n\nG. Kittle\n\n TE\...,\n\n -2\n\n,\n 25.73\n,\n 12/43\n,\n 99.5\n
2,\n 25\n,\n\n\nD. Waller\n\n TE\...,\n\n 1\n\n,\n 29.11\n,\n 17/48\n,\n 99.5\n
3,\n 53\n,\n\n\nM. Andrews\n\n TE...,\n\n 2\n\n,\n 51.87\n,\n 32/84\n,\n 99.3\n
4,\n 55\n,\n\n\nK. Pitts\n\n TE\n...,\n\n -2\n\n,\n 53.56\n,\n 32/81\n,\n 99.6\n
5,\n 66\n,\n\n\nT. Hockenson\n\n ...,\n\n -3\n\n,\n 66.90\n,\n 44/111\n,\n 96.5\n
6,\n 79\n,\n\n\nL. Thomas\n\n TE\...,\n\n 20\n\n,\n 86.97\n,\n 57/141\n,\n 91.1\n
7,\n 85\n,\n\n\nN. Fant\n\n TE\n ...,\n\n -2\n\n,\n 89.47\n,\n 59/141\n,\n 91.1\n
8,\n 98\n,\n\n\nD. Goedert\n\n TE...,\n\n -8\n\n,\n 103.48\n,\n 65/152\n,\n 88.2\n
9,\n 99\n,\n\n\nT. Higbee\n\n TE\...,\n\n -4\n\n,\n 103.79\n,\n 65/153\n,\n 87.5\n


In [6]:
df1 = df1.replace(r'\n',' ', regex=True) 

In [7]:
df1

Unnamed: 0,\n \n Rank\n \n,\n \n Player\n \n,\n \n Trend\n \n,\n \n Avg Pos\n \n,\n \n Hi/Lo\n \n,\n \n Pct\n \n
0,8,T. Kelce TE ...,—,10.17,4/19,99.4
1,23,G. Kittle TE ...,-2,25.73,12/43,99.5
2,25,D. Waller TE ...,1,29.11,17/48,99.5
3,53,M. Andrews TE ...,2,51.87,32/84,99.3
4,55,K. Pitts TE ...,-2,53.56,32/81,99.6
5,66,T. Hockenson TE ...,-3,66.9,44/111,96.5
6,79,L. Thomas TE ...,20,86.97,57/141,91.1
7,85,N. Fant TE ...,-2,89.47,59/141,91.1
8,98,D. Goedert TE ...,-8,103.48,65/152,88.2
9,99,T. Higbee TE ...,-4,103.79,65/153,87.5


In [8]:
df1.to_csv('TEadpstats.csv',index=False)