# Empirical Expected Points Model

**Authors:**  
Iain Muir, iam9ez  
Hriday Singh,  
Connor Smith,

*Date: September 9th, 2021*

## Table of Contents

* 0. Import Libraries
* 1. Load Data
* 2. Transform Data
    * 2.1 Remove Garbage Time
    * 2.2 Split Score to Home/Away
    * 2.3 Cast datetime Variables
    * 2.4 Group Point Periods
* 3. Expected Points Model

### 0. Import Libraries

In [2]:
import pandas as pd
import numpy as np

### 1.0 Load Data

In [3]:
PATH = '/Users/iainmuir/Desktop/4Y 1S/STAT 4800/Dataset/2019 PFF All Plays.csv'
DATA = pd.read_csv(PATH)

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
DATA.shape

(167027, 177)

In [5]:
DATA.head()

Unnamed: 0,pff_PLAYID,pff_GAMEID,pff_GAMEDATE,pff_GAMESEASON,pff_WEEK,pff_GSISGAMEKEY,pff_GSISPLAYID,pff_QUARTER,pff_DOWN,pff_CLOCK,...,pff_STSAFETIES,pff_TACKLE,pff_TACKLEASSIST,pff_TEALIGNMENT,pff_TOUCHDOWN,pff_UNBLOCKEDPRESSURE,pff_VISE,pff_WRALIGNMENT,pff_PLAYCLOCK,pff_RUNPASSOPTION
0,3401248,16800,1/13/20,2019,FC,76953,0,1,0,15:00,...,,,,,,,,,,0
1,3401250,16800,1/13/20,2019,FC,76953,0,1,1,15:00,...,,LAST D18,,R,,,,LWR; SLoWR^; SLiWR,3.0,0
2,3401251,16800,1/13/20,2019,FC,76953,0,1,2,14:35,...,,LAST D01,,,,,,LWR^; SRiWR; SRoWR; RWR^,23.0,0
3,3401252,16800,1/13/20,2019,FC,76953,0,1,1,14:19,...,,LAST D06,,R,,,,LWR^; SRWR; RWR^,17.0,0
4,3401253,16800,1/13/20,2019,FC,76953,0,1,1,13:58,...,,LAST D05,,R,,,,SRiWR; SRoWR^; RWR,25.0,1


In [6]:
DATA.describe()

Unnamed: 0,pff_PLAYID,pff_GAMEID,pff_GAMESEASON,pff_GSISGAMEKEY,pff_GSISPLAYID,pff_QUARTER,pff_DOWN,pff_BLITZDOG,pff_CATCHABLE,pff_DEEPPASS,...,pff_PASSDEPTH,pff_PASSWIDTH,pff_PLAYENDFIELDPOSITION,pff_PUMPFAKE,pff_QBMOVEDOFFSPOT,pff_QBRESET,pff_RUNCONCEPT3,pff_SCOREDIFFERENTIAL,pff_PLAYCLOCK,pff_RUNPASSOPTION
count,167027.0,167027.0,167027.0,167027.0,167027.0,167027.0,167027.0,167027.0,70988.0,167027.0,...,67106.0,58114.0,167027.0,167027.0,70988.0,70988.0,0.0,167027.0,137584.0,167027.0
mean,3187349.0,15459.272088,2019.0,74972.614553,2011.392799,2.488831,1.789022,0.125944,0.560081,0.05832,...,8.061038,27.19491,1.464524,0.015578,0.205316,0.017989,,-1.210014,12.988996,0.174451
std,121110.7,462.439957,0.0,1267.438124,1182.639051,1.11417,1.131771,0.331787,0.496381,0.234348,...,11.000766,16.630412,30.528227,0.123838,0.403935,0.132912,,16.022196,7.199358,0.379498
min,2982310.0,14901.0,2019.0,73049.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-21.0,0.0,-49.0,0.0,0.0,0.0,,-79.0,0.0,0.0
25%,3083844.0,15135.0,2019.0,73810.0,994.0,2.0,1.0,0.0,0.0,0.0,...,0.0,12.0,-29.0,0.0,0.0,0.0,,-10.0,8.0,0.0
50%,3186109.0,15368.0,2019.0,74990.0,1995.0,2.0,2.0,0.0,1.0,0.0,...,5.0,28.0,4.0,0.0,0.0,0.0,,0.0,12.0,0.0
75%,3286893.0,15605.0,2019.0,76239.0,2991.0,3.0,3.0,0.0,1.0,0.0,...,13.0,43.0,27.0,0.0,0.0,0.0,,7.0,18.0,0.0
max,3402800.0,16800.0,2019.0,76953.0,5246.0,5.0,4.0,1.0,1.0,1.0,...,59.0,53.0,50.0,1.0,1.0,1.0,,79.0,40.0,1.0


In [7]:
DATA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167027 entries, 0 to 167026
Columns: 177 entries, pff_PLAYID to pff_RUNPASSOPTION
dtypes: float64(29), int64(31), object(117)
memory usage: 225.6+ MB


In [8]:
d = DATA.copy()

### 2.0 Transform Data

#### 2.1 Remove Garbage Time

In [9]:
d = d.loc[d['pff_GARBAGETIME'] == 0]

In [10]:
d = d.loc[(d['pff_QUARTER'] == 1) | (d['pff_QUARTER'] == 3)]

In [11]:
d = d.reset_index(drop=True)

In [12]:
d.shape

(79912, 177)

#### 2.2 Split Score to Home/Away

In [13]:
def split_score(row):
    """
    
    """
    score = row['pff_SCORE']
    
    if '.' in score:
        h, a = score.split('.')
        if len(a) == 1:
            a = int(a)
            a *= 10
    else:
        h = score
        a = 0
    
    return int(h), int(a)

In [14]:
%%time

d['pff_SCORE'] = d['pff_SCORE'].astype(str)
s = pd.DataFrame(d['pff_SCORE'])
scores = s.apply(
    lambda r: split_score(r),
    axis=1,
    result_type='expand'
)

CPU times: user 20.9 s, sys: 331 ms, total: 21.3 s
Wall time: 23.5 s


In [15]:
scores.columns = ['homeScore', 'awayScore']

In [16]:
d['homeScore'], d['awayScore'] = scores['homeScore'], scores['awayScore']

#### 2.3 Cast Datetime Variables

In [17]:
d['pff_GAMEDATE'] = pd.to_datetime(d['pff_GAMEDATE'])

In [18]:
d['pff_CLOCK'] = pd.to_datetime(d['pff_CLOCK']).dt.time

In [19]:
d.shape

(79912, 179)

### 3.0 Expected Points Added Model

In [233]:
points = list()
MAX = max(d['pff_DISTANCE'])
SEED = 42

YTG_DEV = {
    'long': 4,
    'medium': 3,
    'short': 2,
    'inches': 1
}
YTG_MIN = {
    'long': 10,
    'medium': 6,
    'short': 3,
    'inches': 1
}
POSITION_DEV = 5 # yards

In [238]:
def EPA(down, ytg, position, own):
    """
    
    """
    global points
    
    ytg_dev = 4 if ytg in range(10, MAX) else \
              3 if ytg in range(6, 10) else \
              2 if ytg in range(3, 6) else \
              1
                
    SIMULATIONS = 1000
    for i in range(SIMULATIONS):
        point = run_play(down, ytg, position, own)
        points.append(point)
    
    return np.array(points).mean()

In [239]:
def run_play(down, ytg, position, own):
    dist_ = 'long' if ytg in range(10, MAX) else \
            'medium' if ytg in range(6, 10) else \
            'short' if ytg in range(3, 6) else \
            'inches'
    ytg_dev = YTG_DEV[dist_]
    ytg_min = YTG_MIN[dist_]
    
    position *= -1 if own else 1
    fp_right = position - POSITION_DEV
    fp_left = position + POSITION_DEV
    
    # Off the field left
    if fp_left > 0 and fp_right < 0:
        fp_left = -1
    # Redzone -- shrink deviation to 3 yards
    elif 0 < position < 20:
        fp_right += 2
        fp_left -= 2
    
    samples = d.loc[
        (d['pff_DOWN'] == down) &
        (d['pff_DISTANCE'] >= max(ytg_min, ytg - ytg_dev)) &
        (d['pff_DISTANCE'] <= ytg + ytg_dev) &
        (d['pff_FIELDPOSITION'] >= fp_right) &
        (d['pff_FIELDPOSITION'] <= fp_left)
    ]
    if len(samples) == 0:
        return None
    else:
        sample = samples.sample(n=1, random_state=SEED)
        
        # Define conditions for a TD, FG
        if True:
            score = True
            scored = 6
            
            xps = d.loc[
                (d['pff_DOWN'] == 0) &
                (d['pff_DISTANCE'] == 0) &
                (d['pff_FIELDPOSITION'] == 0)
            ]
            xp = xps.sample(n=1, random_state=SEED)
            result = xp['pff_KICKRESULT']
            scored += 1 if 'MADE' in result else 0
        elif True:
            score = True
            scored = 3
        else:
            score = False
    
    if score:
        return scored
    else:
        return run_play(d, y, p)

In [None]:
down = int(input('Down: ').strip())
ytg = int(input('Yards to Go: ').strip())
position = int(input('Field Position: ').strip())
own = input('Possession (home/away): ').strip()
own = True if own == 'home' else False

In [None]:
STATE = {
    'down': down,
    'ytg': ytg,
    'position': position,
    'own': own
}

In [None]:
epa = EPA(**STATE)

In [None]:
print('''
    Expected Points Model:
    S (Down: {}, YTG: {}, Position: {}) == {} EP
'''.format(
    STATE['down'],
    STATE['ytg'],
    ('Own ' if STATE['own'] else '') + str(STATE['position']),
    epa
))