# D/ST Rankings
In standard fantasy football leagues, Defense/Special Teams (D/ST) scoring is primarily impacted by the following statistical categories:

- Points Allowed
- Yards Allowed
- Sacks
- Interceptions
- Fumble Recoveries

In this notebook I explore ways to automatically scrape historical data for the statistics above in an effort to predict defensive preformance in a given week. 

In [1]:
import requests
from bs4 import BeautifulSoup
from Levenshtein import distance
from data.team_abbreviations import team_abbreviations
from scipy.stats import poisson, t
import numpy as np

In [2]:
from collections import OrderedDict
from operator import itemgetter   

## Model Variables

In [3]:
# NFL week number
week_number = 5

# Home team advantage for predicted sacks
home_sack_adv = 1.1

# QB weight for predicted interceptions
qb_weight = 2

# Fumble recovery/game rate (assume constant)
fumble_rate = 0.6

## Scrape Data

### Game Schedule

In [4]:
nfl_schedule_url = "http://www.nfl.com/schedules/2018/REG{0}"
r = requests.get(nfl_schedule_url.format(week_number))
soup = BeautifulSoup(r.content,'html.parser')

In [5]:
games = []
for game in soup.find_all('div',class_='list-matchup-row-team')[1:]:
    away = game.find('span', class_='team-name away ').text
    home = game.find('span', class_='team-name home ').text
    games.append({'home': home, 'away':away})

In [6]:
def matchup_exists(teams):
    for game in games:
        if teams[0] == game['home'] or teams[0] == game['away']:
            if teams[1] == game['home'] or teams[1] == game['away']:
                return True
            else:
                return False

In [7]:
games

[{'home': 'Patriots', 'away': 'Colts'},
 {'home': 'Bills', 'away': 'Titans'},
 {'home': 'Steelers', 'away': 'Falcons'},
 {'home': 'Jets', 'away': 'Broncos'},
 {'home': 'Chiefs', 'away': 'Jaguars'},
 {'home': 'Lions', 'away': 'Packers'},
 {'home': 'Browns', 'away': 'Ravens'},
 {'home': 'Panthers', 'away': 'Giants'},
 {'home': 'Bengals', 'away': 'Dolphins'},
 {'home': 'Chargers', 'away': 'Raiders'},
 {'home': '49ers', 'away': 'Cardinals'},
 {'home': 'Eagles', 'away': 'Vikings'},
 {'home': 'Seahawks', 'away': 'Rams'},
 {'home': 'Texans', 'away': 'Cowboys'},
 {'home': 'Saints', 'away': 'Redskins'}]

### Vegas Scoring Predictions

In [8]:
espn_lines_url = "http://www.espn.com/nfl/lines"
r = requests.get(espn_lines_url)
soup = BeautifulSoup(r.content,'html.parser')

In [9]:
sportsline_link = "https://www.sportsline.com/nfl/picksheet/"
r = requests.get(sportsline_link)
soup = BeautifulSoup(r.content,'html.parser')

In [10]:
pred_dst_pa = {}
for row in soup.find_all('a',class_='row data-row'):
    away_abrv = row.find('div', {'itemprop': 'awayTeam'}).text.lower().replace('\n','').replace(' ','')
    home_abrv = (row.find('span', {'itemprop': 'homeTeam'}).text.lower().replace(' ',''))
    away_team = team_abbreviations[away_abrv]
    home_team = team_abbreviations[home_abrv]
    if matchup_exists([away_team, home_team]):
        try:
            over_under_text = row.find('div', class_='col two').find('div', class_='top').text
            over_under = float(over_under_text.split('/U ')[1].replace('\n',''))
            if 'PK' in row.find('div', class_='col two').find('div', class_='bottom').text:
                home_adv = 0
                away_adv = 0
            else:
                home_adv = float(row.find('div', class_='col two').find('div', class_='bottom').text)
            pred_dst_pa[home_team] = (over_under + home_adv)/2
            pred_dst_pa[away_team] = (over_under - home_adv)/2

        except:
#             print(away_team, home_team,' ----- Odds not availible')
            pass

In [11]:
# pred_dst_pa['Titans'] = 21.1
# pred_dst_pa['Texans'] = 21.0

### Yards Allowed

#### Game-by-game

In [12]:
teams = []
for abrv, team in team_abbreviations.items():
    if team not in teams:
        teams.append(team)

In [13]:
pfr_gameyds_link = "https://www.pro-football-reference.com/play-index/tgl_finder.cgi?request=1&match=game&year_min=2017&year_max=2018&game_type=R&game_num_min=0&game_num_max=99&week_num_min=0&week_num_max=99&temperature_gtlt=lt&c5val=1.0&order_by=tot_yds&offset={0}"

In [14]:
yd_history = {'def': {t:[] for t in teams},
            'off': {t:[] for t in teams}}

starting_offset = 0
while True:
    r = requests.get(pfr_gameyds_link.format(starting_offset))
    soup = BeautifulSoup(r.content, 'html.parser')
    if soup.find('table') == None:
        break
    else:
        for row in soup.find('table').find_all('tr')[2:]:
            try:
                off_abr = row.find('td', {'data-stat': 'team'}).text.lower()
                off_name = team_abbreviations[off_abr]
                def_abr = row.find('td', {'data-stat': 'opp'}).text.lower()
                def_name = team_abbreviations[def_abr]
                yds = int(row.find('td', {'data-stat': 'tot_yds'}).text)
                yd_history['off'][off_name].append(yds)
                yd_history['def'][def_name].append(yds)
            except:
                pass
    starting_offset+=100

In [15]:
yd_avgs = {off_def: {team: np.average(yds) for (team, yds) in yd_history[off_def].items()} for (off_def, game_yds) in yd_history.items()}

In [16]:
yd_stds = {off_def: {team: np.std(yds) for (team, yds) in yd_history[off_def].items()} for (off_def, game_yds) in yd_history.items()}

In [17]:
yd_stds = {off_def: {team: np.size(yds) for (team, yds) in yd_history[off_def].items()} for (off_def, game_yds) in yd_history.items()}

In [18]:
pred_dst_yds_avg = {}
pred_dst_yds_std = {}

for game in games:
    home = game['home']
    away = game['away']
    pred_dst_yds_avg[home] = (yd_avgs['def'][home] + yd_avgs['off'][away])/2
    pred_dst_yds_avg[away] = (yd_avgs['def'][away] + yd_avgs['off'][home])/2
    pred_dst_yds_std[home] = (yd_stds['def'][home] + yd_stds['off'][away])/2
    pred_dst_yds_std[away] = (yd_stds['def'][away] + yd_stds['off'][home])/2

### Sacks

In [19]:
pfr_sack_links = {'def': "https://www.pro-football-reference.com/years/2017/opp.htm",
                 'off': "https://www.pro-football-reference.com/years/2017/index.htm"}

In [20]:
sack_avgs = {'def': {},
            'off': {}}

for def_or_off, link in pfr_sack_links.items():
    r = requests.get(link)
    soup = BeautifulSoup(r.content,'html.parser')
    temp_soup = str(soup.find('div',{'id':'all_passing'})).split('<!--\n')[1].split('\n-->')[0]
    soup = BeautifulSoup(temp_soup,'html.parser')
    
    for team_row in soup.find_all('tr')[1:-3]:
        team_name = team_row.find('td', {'data-stat':'team'}).text.split(' ')[-1]
        team_sacks = int(team_row.find('td', {'data-stat':'pass_sacked'}).text)
        num_games = int(team_row.find('td', {'data-stat':'g'}).text)
        avg_sacks = team_sacks/num_games
        sack_avgs[def_or_off][team_name] = avg_sacks

In [21]:
pred_dst_sacks = {}
for game in games:
    home = game['home']
    away = game['away']
    pred_dst_sacks[home] = (home_sack_adv*sack_avgs['def'][home] + sack_avgs['off'][away])/(home_sack_adv + 1)
    pred_dst_sacks[away] = (sack_avgs['def'][away] + home_sack_adv*sack_avgs['off'][home])/(home_sack_adv + 1)

### Interceptions

#### Defensive interceptions

In [22]:
pfr_int_link = "https://www.pro-football-reference.com/years/2017/opp.htm"
r = requests.get(pfr_int_link)
soup = BeautifulSoup(r.content,'html.parser')
temp_soup = str(soup.find('div',{'id':'all_passing'})).split('<!--\n')[1].split('\n-->')[0]
soup = BeautifulSoup(temp_soup,'html.parser')

In [23]:
dst_int_rates = {}
for team_row in soup.find_all('tr')[1:-3]:
    team_name = team_row.find('td', {'data-stat':'team'}).text.split(' ')[-1]
    team_int = int(team_row.find('td', {'data-stat':'pass_int'}).text)
    num_games = int(team_row.find('td', {'data-stat':'g'}).text)
    avg_int = team_int/num_games
    dst_int_rates[team_name] = avg_int

#### Starting QBs

In [24]:
starting_qb_link = "http://www.ourlads.com/nfldepthcharts/depthchartpos/QB"
r = requests.get(starting_qb_link)
soup = BeautifulSoup(r.content,'html.parser')

In [25]:
starting_qbs = {}
for qb_row in soup.find('table').find_all('tr')[3:]:
    if qb_row.get('class')[0] in ['row-dc-wht', 'row-dc-grey']:
        team_abr = qb_row.find('img').get('src').split('thumb_')[1].split('.')[0].lower()
        team = team_abbreviations[team_abr]
        qb_name_list = qb_row.find_all('td')[3].text.replace(',','').lower().split(' ')[:-1]
        qb_name_list.reverse()
        starting_qbs[team] = ' '.join([name.capitalize() for name in qb_name_list])

#### Scrape QB ints

In [26]:
qb_int_link = "https://www.pro-football-reference.com/play-index/psl_finder.cgi?request=1&match=combined&year_min=2010&year_max=2018&season_start=1&season_end=-1&is_active=Y&pos%5B%5D=qb&draft_year_min=1936&draft_year_max=2018&draft_slot_min=1&draft_slot_max=500&draft_pick_in_round=pick_overall&conference=any&draft_pos%5B%5D=qb&draft_pos%5B%5D=rb&draft_pos%5B%5D=wr&draft_pos%5B%5D=te&draft_pos%5B%5D=e&draft_pos%5B%5D=t&draft_pos%5B%5D=g&draft_pos%5B%5D=c&draft_pos%5B%5D=ol&draft_pos%5B%5D=dt&draft_pos%5B%5D=de&draft_pos%5B%5D=dl&draft_pos%5B%5D=ilb&draft_pos%5B%5D=olb&draft_pos%5B%5D=lb&draft_pos%5B%5D=cb&draft_pos%5B%5D=s&draft_pos%5B%5D=db&draft_pos%5B%5D=k&draft_pos%5B%5D=p&c5val=1.0&order_by=pass_int"
r = requests.get(qb_int_link)
soup = BeautifulSoup(r.content,'html.parser')

In [27]:
qb_int_rates = {}
for row in soup.find('table').find_all('tr')[2:]:
    try:
        qb_name = row.find_all('td')[0].text
        qb_ints = int(row.find_all('td')[13].text)
        qb_games = int(row.find_all('td')[6].text)
        if qb_games > 5:
            qb_int_rates[qb_name] = qb_ints/qb_games
    except:
        pass  

In [28]:
off_int_rates = {}
for team, qb_name in starting_qbs.items():
    closest_match = None
    for temp_name, int_rate in qb_int_rates.items():
        dist = distance(qb_name, temp_name)
        if dist == 0:
            off_int_rates[team] = qb_int_rates[qb_name]

            break
        elif closest_match == None or dist < closest_match[1]:
            closest_match = [temp_name, dist]
    
    if team not in off_int_rates.keys():
        if closest_match[1] <= 3:
            off_int_rates[team] = qb_int_rates[closest_match[0]]

In [29]:
pred_dst_ints = {}
for game in games:
    home = game['home']
    away = game['away']
    
    if away in off_int_rates.keys():
        pred_dst_ints[home] = (dst_int_rates[home] + qb_weight*off_int_rates[away])/(1 + qb_weight)
    else:
        pred_dst_ints[home] = dst_int_rates[home]
        
    if home in off_int_rates.keys():
        pred_dst_ints[away] = (dst_int_rates[away] + qb_weight*off_int_rates[home])/(1 + qb_weight)
    else:
        pred_dst_ints[away] = dst_int_rates[away]

## Convert from predicted statistics to fantasy points

In [30]:
pred_ffpts = {team:0 for team in pred_dst_pa.keys()}

### Points Against

In [31]:
for team, pa in pred_dst_pa.items():
    pt_scoring_intervals = [0, 6, 13, 17, 27, 34, 45]
    pt_scoring = [5, 4, 3, 1, 0, -1, -3, -5]
    cdf = poisson.cdf(pt_scoring_intervals, mu=pa)
    pt_probs = []

    for i in range(0, len(pt_scoring_intervals)):
        if i == 0:
            pt_probs.append(cdf[i])
        else:
            pt_probs.append(cdf[i]-cdf[i-1])
    pt_probs.append(1 - cdf[len(cdf)-1])
    
    pred_score_pts = 0
    for i in range(0, len(pt_probs)):
        pred_score_pts += pt_probs[i]*pt_scoring[i]
    pred_ffpts[team] += pred_score_pts

### Yards Allowed

In [32]:
for team, yd_avg in pred_dst_yds_avg.items():
    yd_std = pred_dst_yds_std[team]
    yd_scoring_intervals = [99, 199, 299, 349, 399, 449, 499, 549]
    yd_scoring = [5, 3, 2, 0, -1, -3, -5, -6, -7]
    cdf = t.cdf(yd_scoring_intervals, loc=yd_avg, scale=yd_std, df=17)
    yd_probs = []

    for i in range(0, len(yd_scoring_intervals)):
        if i == 0:
            yd_probs.append(cdf[i])
        else:
            yd_probs.append(cdf[i]-cdf[i-1])
    yd_probs.append(1 - cdf[len(cdf)-1])
    
    pred_yd_pts = 0
    for i in range(0, len(yd_probs)):
        pred_yd_pts += yd_probs[i]*yd_scoring[i]
    pred_ffpts[team] += pred_yd_pts

### Sacks

In [33]:
for team, sacks in pred_dst_sacks.items():
    pred_ffpts[team] += 1 * pred_dst_sacks[team]

### Interceptions

In [34]:
for team, ints in pred_dst_ints.items():
    pred_ffpts[team] += 2 * pred_dst_ints[team]

### Fumble Recoveries
Fumbles are a very random occurance and are difficult to predict. I will assume that all teams will lose fumbles at roughly the average rate across the league (0.6 lost fumbles/game)

In [35]:
for team, pts in pred_ffpts.items():
    pred_ffpts[team] += 2 * fumble_rate

## Ranking

In [36]:
OrderedDict(sorted(pred_ffpts.items(), key=itemgetter(1), reverse=True))

OrderedDict([('Ravens', 7.18905833530289),
             ('Titans', 6.848851877588678),
             ('Panthers', 6.542944387007517),
             ('Patriots', 6.27259421982103),
             ('Jaguars', 6.163051351505042),
             ('Broncos', 5.891583601158774),
             ('Cowboys', 5.801426302692557),
             ('49ers', 5.747069375664649),
             ('Rams', 5.6173512233157785),
             ('Cardinals', 5.283366527450448),
             ('Bengals', 5.252287006973084),
             ('Eagles', 5.219682276458696),
             ('Bills', 5.006700576726671),
             ('Jets', 4.972795227136481),
             ('Saints', 4.911750645754587),
             ('Vikings', 4.805482005837138),
             ('Lions', 4.749975187733019),
             ('Packers', 4.723003226566771),
             ('Chargers', 4.567348915969121),
             ('Dolphins', 4.3364141193198735),
             ('Steelers', 4.27291721596265),
             ('Texans', 4.2500828419635),
             ('Browns',