In [1]:
# Import scraping modules
from urllib.request import urlopen
from bs4 import BeautifulSoup


# Import data manipulation modules
import pandas as pd
import numpy as np
# Import data visualization modules
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# URL of page
url = 'https://www.fantasypros.com/nfl/stats/dst.php?scoring=HALF&ownership=y'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)


In [5]:
# Collect table headers
column_headers = stats_page.findAll('tr')[0]
column_headers = [i.getText() for i in column_headers.findAll('th')]

print(column_headers)

['Rank', 'Player', 'SACK', 'INT', 'FR', 'FF', 'DEF TD', 'SFTY', 'SPC TD', 'G', ' FPTS', 'FPTS/G', 'OWN']


In [10]:
# Collect table rows
rows = stats_page.findAll('tr')[1:]
# Get stats from each row
dst_stats = []
for i in range(len(rows)):
  dst_stats.append([col.getText() for col in rows[i].findAll('td')])

In [11]:
df = pd.DataFrame(dst_stats, columns=column_headers)
df

Unnamed: 0,Rank,Player,SACK,INT,FR,FF,DEF TD,SFTY,SPC TD,G,FPTS,FPTS/G,OWN
0,1,Los Angeles Rams (LAR),53,14,8,12,4,1,0,16,151.0,9.4,99.0%
1,2,Pittsburgh Steelers (PIT),56,18,9,12,3,1,0,17,148.0,8.7,96.0%
2,3,Indianapolis Colts (IND),40,15,10,11,4,3,2,16,146.0,9.1,90.0%
3,4,Miami Dolphins (MIA),41,18,10,14,2,0,1,16,141.0,8.8,50.0%
4,5,Baltimore Ravens (BAL),39,10,12,22,3,2,1,16,141.0,8.8,97.0%
5,6,Washington Football Team (WAS),47,16,7,13,3,1,0,16,127.0,7.9,96.0%
6,7,New Orleans Saints (NO),45,18,8,6,1,1,0,16,124.0,7.8,62.0%
7,8,Buffalo Bills (BUF),38,15,11,14,3,0,1,16,118.0,7.4,79.0%
8,9,Tampa Bay Buccaneers (TB),48,15,10,15,1,1,0,16,117.0,7.3,96.0%
9,10,Arizona Cardinals (ARI),48,11,10,12,0,1,0,16,108.0,6.8,12.0%


In [14]:
position = 'DEF'
df['Pos'] = position
df.head()

Unnamed: 0,Rank,Player,SACK,INT,FR,FF,DEF TD,SFTY,SPC TD,G,FPTS,FPTS/G,OWN,Pos
0,1,Los Angeles Rams (LAR),53,14,8,12,4,1,0,16,151.0,9.4,99.0%,DEF
1,2,Pittsburgh Steelers (PIT),56,18,9,12,3,1,0,17,148.0,8.7,96.0%,DEF
2,3,Indianapolis Colts (IND),40,15,10,11,4,3,2,16,146.0,9.1,90.0%,DEF
3,4,Miami Dolphins (MIA),41,18,10,14,2,0,1,16,141.0,8.8,50.0%,DEF
4,5,Baltimore Ravens (BAL),39,10,12,22,3,2,1,16,141.0,8.8,97.0%,DEF


In [18]:
new_columns = df.columns.values
new_columns[-2] = 'Pct_Owned'
new_columns[-4] = 'Fantasy_Points'
new_columns[-3] = 'Fantasy_Points_Per_Game'
new_columns[-5] = 'Games'
new_columns[-8] = 'Def_TD'
new_columns[-6] = 'Special_Team_TD'
new_columns[1] = 'Team'


df.columns = new_columns

df.head()


Unnamed: 0,Rank,Team,Sack,INT,FR,FF,Def_TD,SFTY,Special_Team_TD,Games,Fantasy_Points,Fantasy_Points_Per_Game,Pct_Owned,Pos
0,1,Los Angeles Rams (LAR),53,14,8,12,4,1,0,16,151.0,9.4,99.0%,DEF
1,2,Pittsburgh Steelers (PIT),56,18,9,12,3,1,0,17,148.0,8.7,96.0%,DEF
2,3,Indianapolis Colts (IND),40,15,10,11,4,3,2,16,146.0,9.1,90.0%,DEF
3,4,Miami Dolphins (MIA),41,18,10,14,2,0,1,16,141.0,8.8,50.0%,DEF
4,5,Baltimore Ravens (BAL),39,10,12,22,3,2,1,16,141.0,8.8,97.0%,DEF


In [19]:
# Create data subset for radar chart
categories = ['Games','Sack','INT','FR','FF','Def_TD','Special_Team_TD','Fantasy_Points','Fantasy_Points_Per_Game','Pct_Owned']
df1 = df[['Rank', 'Team','Pos'] + categories]
df1.head()

Unnamed: 0,Rank,Team,Pos,Games,Sack,INT,FR,FF,Def_TD,Special_Team_TD,Fantasy_Points,Fantasy_Points_Per_Game,Pct_Owned
0,1,Los Angeles Rams (LAR),DEF,16,53,14,8,12,4,0,151.0,9.4,99.0%
1,2,Pittsburgh Steelers (PIT),DEF,17,56,18,9,12,3,0,148.0,8.7,96.0%
2,3,Indianapolis Colts (IND),DEF,16,40,15,10,11,4,2,146.0,9.1,90.0%
3,4,Miami Dolphins (MIA),DEF,16,41,18,10,14,2,1,141.0,8.8,50.0%
4,5,Baltimore Ravens (BAL),DEF,16,39,10,12,22,3,1,141.0,8.8,97.0%


In [21]:
df1['Team'] = df1['Team'].str.slice(0, -6)
df1.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Rank,Team,Pos,Games,Sack,INT,FR,FF,Def_TD,Special_Team_TD,Fantasy_Points,Fantasy_Points_Per_Game,Pct_Owned
0,1,Los Angeles Rams,DEF,16,53,14,8,12,4,0,151.0,9.4,99.0%
1,2,Pittsburgh Steelers,DEF,17,56,18,9,12,3,0,148.0,8.7,96.0%
2,3,Indianapolis Colts,DEF,16,40,15,10,11,4,2,146.0,9.1,90.0%
3,4,Miami Dolphins,DEF,16,41,18,10,14,2,1,141.0,8.8,50.0%
4,5,Baltimore Ravens,DEF,16,39,10,12,22,3,1,141.0,8.8,97.0%


In [22]:
ints = ['Games','Sack','INT','FR','FF','Def_TD','Special_Team_TD']
floats = ['Fantasy_Points','Fantasy_Points_Per_Game',]
df1['Games'] = df1['Games'].astype('int64')

for i in ints:
    df1[i] = df1[i].astype('int64')

for i in floats:
    df1[i] = df1[i].astype('float64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [23]:
df1.dtypes

Rank                        object
Team                        object
Pos                         object
Games                        int64
Sack                         int64
INT                          int64
FR                           int64
FF                           int64
Def_TD                       int64
Special_Team_TD              int64
Fantasy_Points             float64
Fantasy_Points_Per_Game    float64
Pct_Owned                   object
dtype: object

In [24]:
df1

Unnamed: 0,Rank,Team,Pos,Games,Sack,INT,FR,FF,Def_TD,Special_Team_TD,Fantasy_Points,Fantasy_Points_Per_Game,Pct_Owned
0,1,Los Angeles Rams,DEF,16,53,14,8,12,4,0,151.0,9.4,99.0%
1,2,Pittsburgh Steelers,DEF,17,56,18,9,12,3,0,148.0,8.7,96.0%
2,3,Indianapolis Colts,DEF,16,40,15,10,11,4,2,146.0,9.1,90.0%
3,4,Miami Dolphins,DEF,16,41,18,10,14,2,1,141.0,8.8,50.0%
4,5,Baltimore Ravens,DEF,16,39,10,12,22,3,1,141.0,8.8,97.0%
5,6,Washington Football Team,DEF,16,47,16,7,13,3,0,127.0,7.9,96.0%
6,7,New Orleans Saints,DEF,16,45,18,8,6,1,0,124.0,7.8,62.0%
7,8,Buffalo Bills,DEF,16,38,15,11,14,3,1,118.0,7.4,79.0%
8,9,Tampa Bay Buccaneers,DEF,16,48,15,10,15,1,0,117.0,7.3,96.0%
9,10,Arizona Cardinals,DEF,16,48,11,10,12,0,0,108.0,6.8,12.0%


In [25]:
df1.to_csv('Defstats.csv',index=False)

In [4]:
# URL of page
url = 'https://www.cbssports.com/fantasy/football/draft/averages/both/h2h/DST/'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)


In [5]:
# Collect table headers
column_headers = stats_page.findAll('tr')[0]
column_headers = [i.getText() for i in column_headers.findAll('th')]

print(column_headers)

['\n                \n                                    Rank\n                \n                            ', '\n                \n                                    Player\n                \n                            ', '\n                \n                                    Trend\n                \n                            ', '\n                \n                                    Avg Pos\n                \n                            ', '\n                \n                                    Hi/Lo\n                \n                            ', '\n                \n                                    Pct\n                \n                            ']


In [6]:
# Collect table rows
rows = stats_page.findAll('tr')[1:]
# Get stats from each row
def_stats = []
for i in range(len(rows)):
    def_stats.append([col.getText() for col in rows[i].findAll('td')])
    #rb_stats.strip('\n')

In [7]:
df1 = pd.DataFrame(def_stats, columns=column_headers)
df1

Unnamed: 0,\n \n Rank\n \n,\n \n Player\n \n,\n \n Trend\n \n,\n \n Avg Pos\n \n,\n \n Hi/Lo\n \n,\n \n Pct\n \n
0,\n 82\n,\n\n\n.\n\n DST\n ...,\n\n 3\n\n,\n 87.39\n,\n 42/138\n,\n 94.1\n
1,\n 96\n,\n\n\n.\n\n DST\n ...,\n\n -2\n\n,\n 102.30\n,\n 57/153\n,\n 94.0\n
2,\n 103\n,\n\n\n.\n\n DST\n ...,\n\n 4\n\n,\n 107.10\n,\n 61/155\n,\n 93.5\n
3,\n 116\n,\n\n\n.\n\n DST\n ...,\n\n 1\n\n,\n 115.62\n,\n 74/159\n,\n 92.9\n
4,\n 123\n,\n\n\n.\n\n DST\n ...,\n\n -5\n\n,\n 119.28\n,\n 72/167\n,\n 91.7\n
5,\n 124\n,\n\n\n.\n\n DST\n ...,\n —\n,\n 122.12\n,\n 71/178\n,\n 86.9\n
6,\n 134\n,\n\n\n.\n\n DST\n ...,\n\n -6\n\n,\n 129.74\n,\n 76/180\n,\n 84.6\n
7,\n 149\n,\n\n\n.\n\n DST\n ...,\n\n -1\n\n,\n 135.62\n,\n 85/185\n,\n 80.1\n
8,\n 152\n,\n\n\n.\n\n DST\n ...,\n\n 1\n\n,\n 136.47\n,\n 72/192\n,\n 61.8\n
9,\n 159\n,\n\n\n.\n\n DST\n ...,\n\n -24\n\n,\n 140.87\n,\n 84/191\n,\n 73.2\n


In [8]:
df1 = df1.replace(r'\n',' ', regex=True) 

In [9]:
df1

Unnamed: 0,\n \n Rank\n \n,\n \n Player\n \n,\n \n Trend\n \n,\n \n Avg Pos\n \n,\n \n Hi/Lo\n \n,\n \n Pct\n \n
0,82,. DST ...,3,87.39,42/138,94.1
1,96,. DST ...,-2,102.3,57/153,94.0
2,103,. DST ...,4,107.1,61/155,93.5
3,116,. DST ...,1,115.62,74/159,92.9
4,123,. DST ...,-5,119.28,72/167,91.7
5,124,. DST ...,—,122.12,71/178,86.9
6,134,. DST ...,-6,129.74,76/180,84.6
7,149,. DST ...,-1,135.62,85/185,80.1
8,152,. DST ...,1,136.47,72/192,61.8
9,159,. DST ...,-24,140.87,84/191,73.2


In [10]:
df1.to_csv('defadpstats.csv',index=False)