In [1]:
# Import scraping modules
from urllib.request import urlopen
from bs4 import BeautifulSoup


# Import data manipulation modules
import pandas as pd
import numpy as np
# Import data visualization modules
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# URL of page
url = 'https://www.fantasypros.com/nfl/stats/k.php?scoring=HALF&ownership=y'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)


In [4]:
# Collect table headers
column_headers = stats_page.findAll('tr')[0]
column_headers = [i.getText() for i in column_headers.findAll('th')]

print(column_headers)

['Rank', 'Player', 'FG', 'FGA', 'PCT', 'LG', '1-19', '20-29', '30-39', '40-49', '50+', 'XPT', 'XPA', 'G', ' FPTS', 'FPTS/G', 'OWN']


In [12]:
# Collect table rows
rows = stats_page.findAll('tr')[1:]
# Get stats from each row
k_stats = []
for i in range(len(rows)):
  k_stats.append([col.getText() for col in rows[i].findAll('td')])

In [13]:
df = pd.DataFrame(k_stats, columns=column_headers)
df

Unnamed: 0,Rank,Player,FG,FGA,PCT,LG,1-19,20-29,30-39,40-49,50+,XPT,XPA,G,FPTS,FPTS/G,OWN
0,1,Jason Sanders (MIA),36,39,92.3,56,1,8,7,12,8,36,36,16,172.0,10.8,75.0%
1,2,Younghoe Koo (ATL),37,39,94.9,54,0,10,11,8,8,33,36,15,168.0,11.2,97.0%
2,3,Tyler Bass (BUF),28,34,82.4,58,1,9,6,8,4,57,59,16,157.0,9.8,80.0%
3,4,Daniel Carlson (LV),33,35,94.3,54,0,18,8,3,4,45,47,16,155.0,9.7,17.0%
4,5,Greg Zuerlein (DAL),34,41,82.9,59,1,5,13,12,3,33,36,16,153.0,9.6,84.0%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,78,Garrett Lindholm (ATL),0,0,0,0,0,0,0,0,0,0,0,0,0,0,
78,79,Connor Hughes (DAL),0,0,0,0,0,0,0,0,0,0,0,0,0,0,
79,80,Clint Stitser (WAS),0,0,0,0,0,0,0,0,0,0,0,0,0,0,
80,81,Brian Johnson (CHI),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0%


In [15]:
df['Team'] = df['Player'].str[-6:]
position = 'K'
df['Pos'] = position
df.head()

Unnamed: 0,Rank,Player,FG,FGA,PCT,LG,1-19,20-29,30-39,40-49,50+,XPT,XPA,G,FPTS,FPTS/G,OWN,Team,Pos
0,1,Jason Sanders (MIA),36,39,92.3,56,1,8,7,12,8,36,36,16,172.0,10.8,75.0%,(MIA),K
1,2,Younghoe Koo (ATL),37,39,94.9,54,0,10,11,8,8,33,36,15,168.0,11.2,97.0%,(ATL),K
2,3,Tyler Bass (BUF),28,34,82.4,58,1,9,6,8,4,57,59,16,157.0,9.8,80.0%,(BUF),K
3,4,Daniel Carlson (LV),33,35,94.3,54,0,18,8,3,4,45,47,16,155.0,9.7,17.0%,(LV),K
4,5,Greg Zuerlein (DAL),34,41,82.9,59,1,5,13,12,3,33,36,16,153.0,9.6,84.0%,(DAL),K


In [16]:
new_columns = df.columns.values
new_columns[-3] = 'Pct_Owned'
new_columns[-5] = 'Fantasy_Points'
new_columns[-4] = 'Fantasy_Points_Per_Game'
new_columns[-6] = 'Games'

df.columns = new_columns

df.head()


Unnamed: 0,Rank,Player,FG,FGA,PCT,LG,1-19,20-29,30-39,40-49,50+,XPT,XPA,Games,Fantasy_Points,Fantasy_Points_Per_Game,Pct_Owned,Team,Pos
0,1,Jason Sanders (MIA),36,39,92.3,56,1,8,7,12,8,36,36,16,172.0,10.8,75.0%,(MIA),K
1,2,Younghoe Koo (ATL),37,39,94.9,54,0,10,11,8,8,33,36,15,168.0,11.2,97.0%,(ATL),K
2,3,Tyler Bass (BUF),28,34,82.4,58,1,9,6,8,4,57,59,16,157.0,9.8,80.0%,(BUF),K
3,4,Daniel Carlson (LV),33,35,94.3,54,0,18,8,3,4,45,47,16,155.0,9.7,17.0%,(LV),K
4,5,Greg Zuerlein (DAL),34,41,82.9,59,1,5,13,12,3,33,36,16,153.0,9.6,84.0%,(DAL),K


In [18]:
# Create data subset for radar chart
categories = ['Games','FG','FGA','PCT','LG','1-19','20-29','30-39','40-49','50+','XPT','XPA','Fantasy_Points','Fantasy_Points_Per_Game','Pct_Owned']
df1 = df[['Rank', 'Player','Team','Pos'] + categories]
df1.head()

Unnamed: 0,Rank,Player,Team,Pos,Games,FG,FGA,PCT,LG,1-19,20-29,30-39,40-49,50+,XPT,XPA,Fantasy_Points,Fantasy_Points_Per_Game,Pct_Owned
0,1,Jason Sanders (MIA),(MIA),K,16,36,39,92.3,56,1,8,7,12,8,36,36,172.0,10.8,75.0%
1,2,Younghoe Koo (ATL),(ATL),K,15,37,39,94.9,54,0,10,11,8,8,33,36,168.0,11.2,97.0%
2,3,Tyler Bass (BUF),(BUF),K,16,28,34,82.4,58,1,9,6,8,4,57,59,157.0,9.8,80.0%
3,4,Daniel Carlson (LV),(LV),K,16,33,35,94.3,54,0,18,8,3,4,45,47,155.0,9.7,17.0%
4,5,Greg Zuerlein (DAL),(DAL),K,16,34,41,82.9,59,1,5,13,12,3,33,36,153.0,9.6,84.0%


In [19]:
df1['Player'] = df1['Player'].str.slice(0, -6)
df1.head()


Unnamed: 0,Rank,Player,Team,Pos,Games,FG,FGA,PCT,LG,1-19,20-29,30-39,40-49,50+,XPT,XPA,Fantasy_Points,Fantasy_Points_Per_Game,Pct_Owned
0,1,Jason Sanders,(MIA),K,16,36,39,92.3,56,1,8,7,12,8,36,36,172.0,10.8,75.0%
1,2,Younghoe Koo,(ATL),K,15,37,39,94.9,54,0,10,11,8,8,33,36,168.0,11.2,97.0%
2,3,Tyler Bass,(BUF),K,16,28,34,82.4,58,1,9,6,8,4,57,59,157.0,9.8,80.0%
3,4,Daniel Carlson,(LV),K,16,33,35,94.3,54,0,18,8,3,4,45,47,155.0,9.7,17.0%
4,5,Greg Zuerlein,(DAL),K,16,34,41,82.9,59,1,5,13,12,3,33,36,153.0,9.6,84.0%


In [22]:
ints = ['Games','FG','FGA','LG','1-19','20-29','30-39','40-49','50+','XPT','XPA']
floats = ['PCT','Fantasy_Points','Fantasy_Points_Per_Game',]
df1['Games'] = df1['Games'].astype('int64')

for i in ints:
    df1[i] = df1[i].astype('int64')

for i in floats:
    df1[i] = df1[i].astype('float64')

In [23]:
df1.dtypes

Rank                        object
Player                      object
Team                        object
Pos                         object
Games                        int64
FG                           int64
FGA                          int64
PCT                        float64
LG                           int64
1-19                         int64
20-29                        int64
30-39                        int64
40-49                        int64
50+                          int64
XPT                          int64
XPA                          int64
Fantasy_Points             float64
Fantasy_Points_Per_Game    float64
Pct_Owned                   object
dtype: object

In [24]:
df1

Unnamed: 0,Rank,Player,Team,Pos,Games,FG,FGA,PCT,LG,1-19,20-29,30-39,40-49,50+,XPT,XPA,Fantasy_Points,Fantasy_Points_Per_Game,Pct_Owned
0,1,Jason Sanders,(MIA),K,16,36,39,92.3,56,1,8,7,12,8,36,36,172.0,10.8,75.0%
1,2,Younghoe Koo,(ATL),K,15,37,39,94.9,54,0,10,11,8,8,33,36,168.0,11.2,97.0%
2,3,Tyler Bass,(BUF),K,16,28,34,82.4,58,1,9,6,8,4,57,59,157.0,9.8,80.0%
3,4,Daniel Carlson,(LV),K,16,33,35,94.3,54,0,18,8,3,4,45,47,155.0,9.7,17.0%
4,5,Greg Zuerlein,(DAL),K,16,34,41,82.9,59,1,5,13,12,3,33,36,153.0,9.6,84.0%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,78,Garrett Lindholm,(ATL),K,0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,
78,79,Connor Hughes,(DAL),K,0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,
79,80,Clint Stitser,(WAS),K,0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,
80,81,Brian Johnson,(CHI),K,0,0,0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0%


In [25]:
df1.to_csv('Kstats.csv',index=False)

In [2]:
# URL of page
url = 'https://www.cbssports.com/fantasy/football/draft/averages/both/h2h/K/'
# Open URL and pass to BeautifulSoup
html = urlopen(url)
stats_page = BeautifulSoup(html)


In [3]:
# Collect table headers
column_headers = stats_page.findAll('tr')[0]
column_headers = [i.getText() for i in column_headers.findAll('th')]

print(column_headers)

['\n                \n                                    Rank\n                \n                            ', '\n                \n                                    Player\n                \n                            ', '\n                \n                                    Trend\n                \n                            ', '\n                \n                                    Avg Pos\n                \n                            ', '\n                \n                                    Hi/Lo\n                \n                            ', '\n                \n                                    Pct\n                \n                            ']


In [4]:
# Collect table rows
rows = stats_page.findAll('tr')[1:]
# Get stats from each row
k_stats = []
for i in range(len(rows)):
    k_stats.append([col.getText() for col in rows[i].findAll('td')])
    #rb_stats.strip('\n')


In [5]:
df1 = pd.DataFrame(k_stats, columns=column_headers)
df1

Unnamed: 0,\n \n Rank\n \n,\n \n Player\n \n,\n \n Trend\n \n,\n \n Avg Pos\n \n,\n \n Hi/Lo\n \n,\n \n Pct\n \n
0,\n 111\n,\n\n\nJ. Tucker\n\n K\n...,\n\n 12\n\n,\n 114.53\n,\n 68/162\n,\n 95.6\n
1,\n 120\n,\n\n\nH. Butker\n\n K\n...,\n\n -8\n\n,\n 117.42\n,\n 72/163\n,\n 95.7\n
2,\n 135\n,\n\n\nG. Zuerlein\n\n K...,\n\n 2\n\n,\n 130.44\n,\n 73/183\n,\n 94.3\n
3,\n 156\n,\n\n\nY. Koo\n\n K\n ...,\n\n 1\n\n,\n 138.92\n,\n 90/184\n,\n 94.6\n
4,\n 162\n,\n\n\nT. Bass\n\n K\n ...,\n\n 1\n\n,\n 142.04\n,\n 92/188\n,\n 94.2\n
5,\n 166\n,\n\n\nR. Succop\n\n K\n...,\n\n -11\n\n,\n 145.38\n,\n 93/194\n,\n 91.0\n
6,\n 175\n,\n\n\nJ. Sanders\n\n K\...,\n\n 2\n\n,\n 149.15\n,\n 100/194\n,\n 91.0\n
7,\n 187\n,\n\n\nR. Blankenship\n\n ...,\n\n 5\n\n,\n 154.02\n,\n 105/198\n,\n 88.3\n
8,\n 201\n,\n\n\nB. McManus\n\n K\...,\n\n 23\n\n,\n 162.23\n,\n 112/210\n,\n 61.1\n
9,\n 202\n,\n\n\nM. Crosby\n\n K\n...,\n\n 7\n\n,\n 162.28\n,\n 109/213\n,\n 56.3\n


In [6]:
df1 = df1.replace(r'\n',' ', regex=True) 

In [7]:
df1

Unnamed: 0,\n \n Rank\n \n,\n \n Player\n \n,\n \n Trend\n \n,\n \n Avg Pos\n \n,\n \n Hi/Lo\n \n,\n \n Pct\n \n
0,111,J. Tucker K ...,12,114.53,68/162,95.6
1,120,H. Butker K ...,-8,117.42,72/163,95.7
2,135,G. Zuerlein K ...,2,130.44,73/183,94.3
3,156,Y. Koo K ...,1,138.92,90/184,94.6
4,162,T. Bass K ...,1,142.04,92/188,94.2
5,166,R. Succop K ...,-11,145.38,93/194,91.0
6,175,J. Sanders K ...,2,149.15,100/194,91.0
7,187,R. Blankenship K ...,5,154.02,105/198,88.3
8,201,B. McManus K ...,23,162.23,112/210,61.1
9,202,M. Crosby K ...,7,162.28,109/213,56.3


In [8]:
df1.to_csv('Kadpstats.csv',index=False)