# D/ST Rankings
In standard fantasy football leagues, Defense/Special Teams (D/ST) scoring is primarily impacted by the following statistical categories:

- Points Allowed
- Yards Allowed
- Sacks
- Interceptions

In [10]:
import requests
from bs4 import BeautifulSoup
from Levenshtein import distance
from data.team_abbreviations import team_abbreviations
from scipy.stats import poisson, t
import numpy as np

In [11]:
from collections import OrderedDict
from operator import itemgetter   

## Model Variables

In [12]:
# NFL week number
week_number = 2

# Home team advantage for predicted sacks
home_adv = 1.1

# QB weight for predicted interceptions
qb_weight = 2

# Fumble recovery/game rate (assume constant)
fumble_rate = 0.6

## Scrape Data

### Game Schedule

In [5]:
nfl_schedule_url = "http://www.nfl.com/schedules/2018/REG{0}"
r = requests.get(nfl_schedule_url.format(week_number))
soup = BeautifulSoup(r.content,'html.parser')

In [6]:
games = []
for game in soup.find_all('div',class_='list-matchup-row-team')[2:]:
    away = game.find('span', class_='team-name away ').text
    home = game.find('span', class_='team-name home ').text
    games.append({'home': home, 'away':away})

In [7]:
def matchup_exists(teams):
    for game in games:
        if teams[0] == game['home'] or teams[0] == game['away']:
            if teams[1] == game['home'] or teams[1] == game['away']:
                return True
            else:
                return False

### Vegas Scoring Predictions

In [8]:
espn_lines_url = "http://www.espn.com/nfl/lines"
r = requests.get(espn_lines_url)
soup = BeautifulSoup(r.content,'html.parser')

In [19]:
sportsline_link = "https://www.sportsline.com/nfl/picksheet/"
r = requests.get(sportsline_link)
soup = BeautifulSoup(r.content,'html.parser')

In [31]:
for row in soup.find_all('a',class_='row data-row'):
    print(row.find('div', {'itemprop': 'awayTeam'}).text.lower().replace('\n','').replace(' ',''))
    print(row.find('span', {'itemprop': 'homeTeam'}).text.lower().replace(' ',''))

nyj
det
lar
oak
bal
cin
ind
was
car
atlanta
min
gb
lac
buffalo
houston
ten
kc
pit
miami
nyj
phi
tb
cle
no
arizona
lar
detroit
sf
ne
jac
oakland
denver
nyg
dallas
seattle
chicago


In [17]:
sportsbook_url = "http://www.superbook.com/sbk/sportsbook4/nfl-betting/nfl-nfl-game-lines.sbk"
r = requests.get(sportsbook_url)
soup = BeautifulSoup(r.content,'html.parser')

SSLError: HTTPSConnectionPool(host='www.superbook.com', port=443): Max retries exceeded with url: /sbk/sportsbook4/nfl-betting/nfl-nfl-game-lines.sbk (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'ssl3_get_server_certificate', 'certificate verify failed')])")))

In [None]:
pred_dst_pa = {}
for game in soup.find_all('div', class_='col-sm-12 eventbox'):
    team_rows = game.find_all('div', class_='eventrow clearfix')
    matchup_teams = []
    for team in team_rows:
        team_name = team.find('span', class_='team').text.split(' ')[-1]
        over_under = float(team.find_all('div', class_='market')[0].text.split(' ')[1].split('(')[0])
        adv = team.find_all('div', class_='market')[1].text.split('(')[0]
        if adv[0] == '+':
            pred_pa = (over_under + float(adv.split('+')[1]))/2
        elif adv[0] == '-':
            pred_pa = (over_under - float(adv.split('-')[1]))/2
        pred_dst_pa[team_name] = pred_pa
        matchup_teams.append(team_name)
    
    if not matchup_exists(matchup_teams):
        print("WARNING, scraped betting odds do not match week {0} matchups!".format(week_number))

### Yards Allowed

#### Game-by-game

In [None]:
teams = []
for abrv, team in team_abbreviations.items():
    if team not in teams:
        teams.append(team)

In [None]:
pfr_gameyds_link = "https://www.pro-football-reference.com/play-index/tgl_finder.cgi?request=1&match=game&year_min=2017&year_max=2018&game_type=R&game_num_min=0&game_num_max=99&week_num_min=0&week_num_max=99&temperature_gtlt=lt&c5val=1.0&order_by=tot_yds&offset={0}"

In [None]:
yd_history = {'def': {t:[] for t in teams},
            'off': {t:[] for t in teams}}

starting_offset = 0
while True:
    r = requests.get(pfr_gameyds_link.format(starting_offset))
    soup = BeautifulSoup(r.content, 'html.parser')
    if soup.find('table') == None:
        break
    else:
        for row in soup.find('table').find_all('tr')[2:]:
            try:
                off_abr = row.find('td', {'data-stat': 'team'}).text.lower()
                off_name = team_abbreviations[off_abr]
                def_abr = row.find('td', {'data-stat': 'opp'}).text.lower()
                def_name = team_abbreviations[def_abr]
                yds = int(row.find('td', {'data-stat': 'tot_yds'}).text)
                yd_history['off'][off_name].append(yds)
                yd_history['def'][def_name].append(yds)
            except:
                pass
    starting_offset+=100

In [None]:
yd_avgs = {off_def: {team: np.average(yds) for (team, yds) in yd_history[off_def].items()} for (off_def, game_yds) in yd_history.items()}

In [None]:
yd_stds = {off_def: {team: np.std(yds) for (team, yds) in yd_history[off_def].items()} for (off_def, game_yds) in yd_history.items()}

In [None]:
yd_stds = {off_def: {team: np.size(yds) for (team, yds) in yd_history[off_def].items()} for (off_def, game_yds) in yd_history.items()}

In [None]:
pred_dst_yds_avg = {}
pred_dst_yds_std = {}

for game in games:
    home = game['home']
    away = game['away']
    pred_dst_yds_avg[home] = (yd_avgs['def'][home] + yd_avgs['off'][away])/2
    pred_dst_yds_avg[away] = (yd_avgs['def'][away] + yd_avgs['off'][home])/2
    pred_dst_yds_std[home] = (yd_stds['def'][home] + yd_stds['off'][away])/2
    pred_dst_yds_std[away] = (yd_stds['def'][away] + yd_stds['off'][home])/2

### Sacks

In [None]:
pfr_sack_links = {'def': "https://www.pro-football-reference.com/years/2017/opp.htm",
                 'off': "https://www.pro-football-reference.com/years/2017/index.htm"}

In [None]:
sack_avgs = {'def': {},
            'off': {}}

for def_or_off, link in pfr_sack_links.items():
    r = requests.get(link)
    soup = BeautifulSoup(r.content,'html.parser')
    temp_soup = str(soup.find('div',{'id':'all_passing'})).split('<!--\n')[1].split('\n-->')[0]
    soup = BeautifulSoup(temp_soup,'html.parser')
    
    for team_row in soup.find_all('tr')[1:-3]:
        team_name = team_row.find('td', {'data-stat':'team'}).text.split(' ')[-1]
        team_sacks = int(team_row.find('td', {'data-stat':'pass_sacked'}).text)
        num_games = int(team_row.find('td', {'data-stat':'g'}).text)
        avg_sacks = team_sacks/num_games
        sack_avgs[def_or_off][team_name] = avg_sacks

In [None]:
pred_dst_sacks = {}
for game in games:
    home = game['home']
    away = game['away']
    pred_dst_sacks[home] = (home_adv*sack_avgs['def'][home] + sack_avgs['off'][away])/(home_adv + 1)
    pred_dst_sacks[away] = (sack_avgs['def'][away] + home_adv*sack_avgs['off'][home])/(home_adv + 1)

### Interceptions

#### Defensive interceptions

In [None]:
pfr_int_link = "https://www.pro-football-reference.com/years/2017/opp.htm"
r = requests.get(pfr_int_link)
soup = BeautifulSoup(r.content,'html.parser')
temp_soup = str(soup.find('div',{'id':'all_passing'})).split('<!--\n')[1].split('\n-->')[0]
soup = BeautifulSoup(temp_soup,'html.parser')

In [None]:
dst_int_rates = {}
for team_row in soup.find_all('tr')[1:-3]:
    team_name = team_row.find('td', {'data-stat':'team'}).text.split(' ')[-1]
    team_int = int(team_row.find('td', {'data-stat':'pass_int'}).text)
    num_games = int(team_row.find('td', {'data-stat':'g'}).text)
    avg_int = team_int/num_games
    dst_int_rates[team_name] = avg_int

#### Starting QBs

In [None]:
starting_qb_link = "http://www.ourlads.com/nfldepthcharts/depthchartpos/QB"
r = requests.get(starting_qb_link)
soup = BeautifulSoup(r.content,'html.parser')

In [None]:
starting_qbs = {}
for qb_row in soup.find('table').find_all('tr')[3:]:
    if qb_row.get('class')[0] in ['row-dc-wht', 'row-dc-grey']:
        team_abr = qb_row.find('img').get('src').split('thumb_')[1].split('.')[0].lower()
        team = team_abbreviations[team_abr]
        qb_name_list = qb_row.find_all('td')[3].text.replace(',','').lower().split(' ')[:-1]
        qb_name_list.reverse()
        starting_qbs[team] = ' '.join([name.capitalize() for name in qb_name_list])

#### Scrape QB ints

In [None]:
qb_int_link = "https://www.pro-football-reference.com/play-index/psl_finder.cgi?request=1&match=combined&year_min=2010&year_max=2018&season_start=1&season_end=-1&is_active=Y&pos%5B%5D=qb&draft_year_min=1936&draft_year_max=2018&draft_slot_min=1&draft_slot_max=500&draft_pick_in_round=pick_overall&conference=any&draft_pos%5B%5D=qb&draft_pos%5B%5D=rb&draft_pos%5B%5D=wr&draft_pos%5B%5D=te&draft_pos%5B%5D=e&draft_pos%5B%5D=t&draft_pos%5B%5D=g&draft_pos%5B%5D=c&draft_pos%5B%5D=ol&draft_pos%5B%5D=dt&draft_pos%5B%5D=de&draft_pos%5B%5D=dl&draft_pos%5B%5D=ilb&draft_pos%5B%5D=olb&draft_pos%5B%5D=lb&draft_pos%5B%5D=cb&draft_pos%5B%5D=s&draft_pos%5B%5D=db&draft_pos%5B%5D=k&draft_pos%5B%5D=p&c5val=1.0&order_by=pass_int"
r = requests.get(qb_int_link)
soup = BeautifulSoup(r.content,'html.parser')

In [None]:
qb_int_rates = {}
for row in soup.find('table').find_all('tr')[2:]:
    try:
        qb_name = row.find_all('td')[0].text
        qb_ints = int(row.find_all('td')[13].text)
        qb_games = int(row.find_all('td')[6].text)
        if qb_games > 5:
            qb_int_rates[qb_name] = qb_ints/qb_games
    except:
        pass  

In [None]:
off_int_rates = {}
for team, qb_name in starting_qbs.items():
    closest_match = None
    for temp_name, int_rate in qb_int_rates.items():
        dist = distance(qb_name, temp_name)
        if dist == 0:
            off_int_rates[team] = qb_int_rates[qb_name]

            break
        elif closest_match == None or dist < closest_match[1]:
            closest_match = [temp_name, dist]
    
    if team not in off_int_rates.keys():
        if closest_match[1] <= 3:
            off_int_rates[team] = qb_int_rates[closest_match[0]]

In [None]:
pred_dst_ints = {}
for game in games:
    home = game['home']
    away = game['away']
    
    if away in off_int_rates.keys():
        pred_dst_ints[home] = (dst_int_rates[home] + qb_weight*off_int_rates[away])/(1 + qb_weight)
    else:
        pred_dst_ints[home] = dst_int_rates[home]
        
    if home in off_int_rates.keys():
        pred_dst_ints[away] = (dst_int_rates[away] + qb_weight*off_int_rates[home])/(1 + qb_weight)
    else:
        pred_dst_ints[away] = dst_int_rates[away]

## Convert from predicted statistics to fantasy points

In [None]:
pred_ffpts = {team:0 for team in pred_dst_pa.keys()}

### Points Against

In [None]:
for team, pa in pred_dst_pa.items():
    pt_scoring_intervals = [0, 6, 13, 17, 27, 34, 45]
    pt_scoring = [5, 4, 3, 1, 0, -1, -3, -5]
    cdf = poisson.cdf(pt_scoring_intervals, mu=pa)
    pt_probs = []

    for i in range(0, len(pt_scoring_intervals)):
        if i == 0:
            pt_probs.append(cdf[i])
        else:
            pt_probs.append(cdf[i]-cdf[i-1])
    pt_probs.append(1 - cdf[len(cdf)-1])
    
    pred_score_pts = 0
    for i in range(0, len(pt_probs)):
        pred_score_pts += pt_probs[i]*pt_scoring[i]
    pred_ffpts[team] += pred_score_pts

### Yards Allowed

In [None]:
for team, yd_avg in pred_dst_yds_avg.items():
    yd_std = pred_dst_yds_std[team]
    yd_scoring_intervals = [99, 199, 299, 349, 399, 449, 499, 549]
    yd_scoring = [5, 3, 2, 0, -1, -3, -5, -6, -7]
    cdf = t.cdf(yd_scoring_intervals, loc=yd_avg, scale=yd_std, df=17)
    yd_probs = []

    for i in range(0, len(yd_scoring_intervals)):
        if i == 0:
            yd_probs.append(cdf[i])
        else:
            yd_probs.append(cdf[i]-cdf[i-1])
    yd_probs.append(1 - cdf[len(cdf)-1])
    
    pred_yd_pts = 0
    for i in range(0, len(yd_probs)):
        pred_yd_pts += yd_probs[i]*yd_scoring[i]
    pred_ffpts[team] += pred_yd_pts

### Sacks

In [None]:
for team, sacks in pred_dst_sacks.items():
    pred_ffpts[team] += 1 * pred_dst_sacks[team]

### Interceptions

In [None]:
for team, ints in pred_dst_ints.items():
    pred_ffpts[team] += 2 * pred_dst_ints[team]

### Fumble Recoveries

In [None]:
for team, pts in pred_ffpts.items():
    pred_ffpts[team] += 2 * fumble_rate

## Ranking

In [None]:
OrderedDict(sorted(pred_ffpts.items(), key = itemgetter(1), reverse = True))