# Empirical Expected Points Model

**Authors:**  
Iain Muir, iam9ez  
Hriday Singh,  
Connor Smith,

*Date: September 9th, 2021*

## Table of Contents

### 0.1 Import Libraries

In [2]:
import pandas as pd
import numpy as np
import statistics

### 1.0 Load Data

In [3]:
PATH = '/Users/iainmuir/Desktop/4Y 1S/STAT 4800/Dataset/2019 PFF All Plays.csv'
DATA = pd.read_csv(PATH)

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
DATA.shape

(167027, 177)

In [5]:
DATA.head()

Unnamed: 0,pff_PLAYID,pff_GAMEID,pff_GAMEDATE,pff_GAMESEASON,pff_WEEK,pff_GSISGAMEKEY,pff_GSISPLAYID,pff_QUARTER,pff_DOWN,pff_CLOCK,...,pff_STSAFETIES,pff_TACKLE,pff_TACKLEASSIST,pff_TEALIGNMENT,pff_TOUCHDOWN,pff_UNBLOCKEDPRESSURE,pff_VISE,pff_WRALIGNMENT,pff_PLAYCLOCK,pff_RUNPASSOPTION
0,3401248,16800,1/13/20,2019,FC,76953,0,1,0,15:00,...,,,,,,,,,,0
1,3401250,16800,1/13/20,2019,FC,76953,0,1,1,15:00,...,,LAST D18,,R,,,,LWR; SLoWR^; SLiWR,3.0,0
2,3401251,16800,1/13/20,2019,FC,76953,0,1,2,14:35,...,,LAST D01,,,,,,LWR^; SRiWR; SRoWR; RWR^,23.0,0
3,3401252,16800,1/13/20,2019,FC,76953,0,1,1,14:19,...,,LAST D06,,R,,,,LWR^; SRWR; RWR^,17.0,0
4,3401253,16800,1/13/20,2019,FC,76953,0,1,1,13:58,...,,LAST D05,,R,,,,SRiWR; SRoWR^; RWR,25.0,1


In [6]:
DATA.describe()

Unnamed: 0,pff_PLAYID,pff_GAMEID,pff_GAMESEASON,pff_GSISGAMEKEY,pff_GSISPLAYID,pff_QUARTER,pff_DOWN,pff_BLITZDOG,pff_CATCHABLE,pff_DEEPPASS,...,pff_PASSDEPTH,pff_PASSWIDTH,pff_PLAYENDFIELDPOSITION,pff_PUMPFAKE,pff_QBMOVEDOFFSPOT,pff_QBRESET,pff_RUNCONCEPT3,pff_SCOREDIFFERENTIAL,pff_PLAYCLOCK,pff_RUNPASSOPTION
count,167027.0,167027.0,167027.0,167027.0,167027.0,167027.0,167027.0,167027.0,70988.0,167027.0,...,67106.0,58114.0,167027.0,167027.0,70988.0,70988.0,0.0,167027.0,137584.0,167027.0
mean,3187349.0,15459.272088,2019.0,74972.614553,2011.392799,2.488831,1.789022,0.125944,0.560081,0.05832,...,8.061038,27.19491,1.464524,0.015578,0.205316,0.017989,,-1.210014,12.988996,0.174451
std,121110.7,462.439957,0.0,1267.438124,1182.639051,1.11417,1.131771,0.331787,0.496381,0.234348,...,11.000766,16.630412,30.528227,0.123838,0.403935,0.132912,,16.022196,7.199358,0.379498
min,2982310.0,14901.0,2019.0,73049.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-21.0,0.0,-49.0,0.0,0.0,0.0,,-79.0,0.0,0.0
25%,3083844.0,15135.0,2019.0,73810.0,994.0,2.0,1.0,0.0,0.0,0.0,...,0.0,12.0,-29.0,0.0,0.0,0.0,,-10.0,8.0,0.0
50%,3186109.0,15368.0,2019.0,74990.0,1995.0,2.0,2.0,0.0,1.0,0.0,...,5.0,28.0,4.0,0.0,0.0,0.0,,0.0,12.0,0.0
75%,3286893.0,15605.0,2019.0,76239.0,2991.0,3.0,3.0,0.0,1.0,0.0,...,13.0,43.0,27.0,0.0,0.0,0.0,,7.0,18.0,0.0
max,3402800.0,16800.0,2019.0,76953.0,5246.0,5.0,4.0,1.0,1.0,1.0,...,59.0,53.0,50.0,1.0,1.0,1.0,,79.0,40.0,1.0


In [7]:
DATA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167027 entries, 0 to 167026
Columns: 177 entries, pff_PLAYID to pff_RUNPASSOPTION
dtypes: float64(29), int64(31), object(117)
memory usage: 225.6+ MB


In [8]:
d = DATA.copy()

### 2.0 Transform Data

#### 2.1 Remove Garbage Time

In [9]:
d = d.loc[d['pff_GARBAGETIME'] == 0]

In [10]:
d = d.loc[(d['pff_QUARTER'] == 1) | (d['pff_QUARTER'] == 3)]

In [11]:
d = d.reset_index(drop=True)

In [12]:
d.shape

(79912, 177)

#### 2.2 Split Score to Home/Away

In [13]:
def split_score(row):
    """
    
    """
    score = row['pff_SCORE']
    h, a = score.split('.')
    h = float('.' + h) * (10 * (10 if len(h) == 2 else 1))
    a = float('.' + a) * (10 * (10 if len(a) == 2 else 1))
    
    return int(h), int(a)

In [14]:
%%time

d['pff_SCORE'] = d['pff_SCORE'].astype(str)
s = pd.DataFrame(d['pff_SCORE'])
scores = s.apply(
    lambda r: split_score(r),
    axis=1,
    result_type='expand'
)

In [15]:
scores.columns = ['homeScore', 'awayScore']

In [16]:
d['homeScore'], d['awayScore'] = scores['homeScore'], scores['awayScore']

#### 2.3 Cast Datetime Variables

In [17]:
d['pff_GAMEDATE'] = pd.to_datetime(d['pff_GAMEDATE'])

In [18]:
d['pff_CLOCK'] = pd.to_datetime(d['pff_CLOCK']).dt.time

#### 2.4 Group Point Periods

In [115]:
def group_point_periods(row):
    """
    
    """
    global drive, last_id, increment
    
    id_, order, type_ = row['pff_GAMEID'], row['pff_SORTORDER'], row['pff_SPECIALTEAMSTYPE']

    if increment:
        drive += 1
        increment = False
        
    if id_ != last_id:
        drive = 1
    last_id = id_

    increment = True if type_ == 'KICKOFF' and order != 1 else False
        
    return drive

In [116]:
drive = 1
last_id = None
increment = False

In [118]:
%%time

games = pd.DataFrame(
    d[['pff_GAMEID', 'pff_SORTORDER', 'pff_SPECIALTEAMSTYPE']]
)
d['pointPeriod'] = games.apply(
    lambda r: group_point_periods(r),
    axis=1
)

CPU times: user 4.34 s, sys: 25.6 ms, total: 4.37 s
Wall time: 4.49 s


In [89]:
def assign_points():
    pass

In [119]:
periods = d.groupby(
    ['pff_GAMEID', 'pointPeriod']
)

In [170]:
d.loc[(d['pff_GAMEID'] == 16556) & (d['pointPeriod'] == 1)]

Unnamed: 0,pff_PLAYID,pff_GAMEID,pff_GAMEDATE,pff_GAMESEASON,pff_WEEK,pff_GSISGAMEKEY,pff_GSISPLAYID,pff_QUARTER,pff_DOWN,pff_CLOCK,...,pff_TOUCHDOWN,pff_UNBLOCKEDPRESSURE,pff_VISE,pff_WRALIGNMENT,pff_PLAYCLOCK,pff_RUNPASSOPTION,homeScore,awayScore,pointPeriod,periodScore
466,3368447,16556,2019-12-21,2019,BG,76924,35,1,0,15:00:00,...,,,,,13.0,0,0,0,1,7.0
467,3368449,16556,2019-12-21,2019,BG,76924,49,1,1,15:00:00,...,,,,LWR^(-18.5); SRWR(+9.5); RWR(+17.5),,0,0,0,1,7.0
468,3368452,16556,2019-12-21,2019,BG,76924,71,1,2,14:56:00,...,,,,LWR^(-16.5); SRWR(+8.5); RWR^(+18.5),19.0,1,0,0,1,7.0
469,3368454,16556,2019-12-21,2019,BG,76924,95,1,3,14:22:00,...,,,,LWR(-16.5); SLWR^(-7.5); RWR^(+19.5),13.0,0,0,0,1,7.0
470,3368459,16556,2019-12-21,2019,BG,76924,116,1,1,14:17:00,...,,,,SRWR(+9.5); RWR^(+12.5),13.0,0,0,0,1,7.0
471,3368465,16556,2019-12-21,2019,BG,76924,138,1,2,13:44:00,...,,,,LWR(-13.5); RWR^(+13.5),16.0,0,0,0,1,7.0
472,3368466,16556,2019-12-21,2019,BG,76924,160,1,3,13:13:00,...,,,,LWR^(-19.5),16.0,0,0,0,1,10.0
473,3368471,16556,2019-12-21,2019,BG,76924,184,1,1,12:43:00,...,CASS 45,,,LWR^(-18.5); RWR(+13.5),13.0,0,0,0,1,10.0
474,3368473,16556,2019-12-21,2019,BG,76924,204,1,0,12:34:00,...,,,,,12.0,0,0,6,1,10.0
475,3368476,16556,2019-12-21,2019,BG,76924,220,1,0,12:34:00,...,,,,,12.0,0,0,7,1,10.0


In [171]:
%%time

point_periods = list()
for name, group in periods:
    g = group.reset_index()
    num = len(g)
    
#     try:
    home = g['homeScore'].iloc[-1] - g['homeScore'].iloc[0]
    away = g['awayScore'].iloc[-1] - g['awayScore'].iloc[0]
#     except IndexError:
#         point_periods.extend([None]*num)
#         continue
    
    diff = home - away
    if diff == 0:
        print(len(g))
    points = np.ones(num) * diff
    
    point_periods.extend(points)

2
1
7
10
12
4
1
10
19
32
15
12
19
12
10
9
10
19
20
32
32
6
3
1
1
24
4
12
19
15
3
1
12
3
22
18
21
7
9
16
13
1
27
11
5
3
14
11
11
12
15
9
5
4
10
6
43
26
9
18
30
4
9
2
3
17
2
9
26
18
4
9
19
10
4
8
5
4
15
8
18
21
1
5
37
36
8
2
6
19
8
29
11
11
17
18
29
1
8
15
2
1
10
3
22
20
6
32
4
8
30
19
10
4
24
2
32
3
2
21
1
5
4
9
12
1
3
3
2
9
9
1
6
24
18
2
1
14
4
3
2
5
9
29
19
20
8
11
6
5
5
22
7
11
11
11
27
11
8
6
9
2
2
1
2
20
1
14
3
6
14
23
4
7
13
17
1
42
2
22
6
12
34
1
4
4
8
14
4
32
24
5
25
39
28
12
4
5
25
3
6
5
34
4
1
22
16
16
49
2
1
2
5
16
19
1
10
14
14
5
4
9
6
2
5
6
7
2
11
14
1
36
1
8
3
1
20
13
6
3
6
3
11
2
25
2
31
4
6
47
11
23
1
10
24
11
17
32
3
2
8
1
8
17
18
14
8
1
10
1
23
11
3
29
48
35
1
3
13
7
3
33
1
15
21
5
10
8
4
1
11
3
26
7
5
26
12
11
1
3
4
6
11
17
1
4
7
11
3
6
16
40
3
3
35
2
16
23
6
12
41
21
13
4
39
30
16
6
40
4
8
13
2
14
1
3
7
4
20
22
8
23
15
20
3
8
8
13
1
16
8
33
14
34
24
1
3
16
1
6
16
11
32
2
42
6
22
12
17
2
3
13
3
13
4
4
17
14
6
20
7
22
9
16
6
2
6
32
13
41
12
19
7
16
36
24
1
26
8
1
31
5


In [164]:
d['periodScore'] = point_periods

In [165]:
d.loc[d['periodScore'] > 8]

Unnamed: 0,pff_PLAYID,pff_GAMEID,pff_GAMEDATE,pff_GAMESEASON,pff_WEEK,pff_GSISGAMEKEY,pff_GSISPLAYID,pff_QUARTER,pff_DOWN,pff_CLOCK,...,pff_TOUCHDOWN,pff_UNBLOCKEDPRESSURE,pff_VISE,pff_WRALIGNMENT,pff_PLAYCLOCK,pff_RUNPASSOPTION,homeScore,awayScore,pointPeriod,periodScore
472,3368466,16556,2019-12-21,2019,BG,76924,160,1,3,13:13:00,...,,,,LWR^(-19.5),16.0,0,0,0,1,10.0
473,3368471,16556,2019-12-21,2019,BG,76924,184,1,1,12:43:00,...,CASS 45,,,LWR^(-18.5); RWR(+13.5),13.0,0,0,0,1,10.0
474,3368473,16556,2019-12-21,2019,BG,76924,204,1,0,12:34:00,...,,,,,12.0,0,0,6,1,10.0
475,3368476,16556,2019-12-21,2019,BG,76924,220,1,0,12:34:00,...,,,,,12.0,0,0,7,1,10.0
476,3368482,16556,2019-12-21,2019,BG,76924,234,1,1,12:34:00,...,,,,LWR(-11.5); SLoWR(-4.5); SLiWR^(-2.5),11.0,0,0,7,2,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79859,2984603,14902,2019-08-24,2019,0,73051,2225,3,2,13:31:00,...,,,,LWR^(-19.5); SRiWR^(+5.5); SRoWR(+10.5); RWR(+...,24.0,1,28,21,4,11.0
79860,2984607,14902,2019-08-24,2019,0,73051,2247,3,3,13:01:00,...,,,,LWR^(-16.5); SRiWR^(+4.5); SRoWR(+8.5); RWR(+1...,15.0,0,28,21,4,11.0
79861,2984612,14902,2019-08-24,2019,0,73051,2269,3,4,12:50:00,...,,,,,12.0,0,28,21,4,11.0
79862,2984617,14902,2019-08-24,2019,0,73051,2292,3,4,12:49:00,...,,,HIUN D04 (VL); HIUN D23 (VR),,21.0,0,28,21,4,11.0


### 3.0 Expected Points Model

In [142]:
points = list()
MAX = max(d['pff_DISTANCE'])
SEED = 42

YTG_DEV = {
    'long': 4,
    'medium': 3,
    'short': 2,
    'inches': 1
}
YTG_MIN = {
    'long': 10,
    'medium': 6,
    'short': 3,
    'inches': 1
}
POSITION_DEV = 5 # yards

In [159]:
def EP(down, ytg, position, own):
    """
    
    """
    dist_ = 'long' if ytg in range(10, MAX) else \
            'medium' if ytg in range(6, 10) else \
            'short' if ytg in range(3, 6) else \
            'inches'
    ytg_dev = YTG_DEV[dist_]
    ytg_min = YTG_MIN[dist_]
    
    position *= -1 if own else 1
    fp_right = position - POSITION_DEV
    fp_left = position + POSITION_DEV
    
    # Off the field left
    if fp_left > 0 and fp_right < 0:
        fp_left = -1
    # Redzone -- shrink deviation to 3 yards
    elif 0 < position < 20:
        fp_right += 2
        fp_left -= 2
    
    samples = d.loc[
        (d['pff_DOWN'] == down) &
        (d['pff_DISTANCE'] >= max(ytg_min, ytg - ytg_dev)) &
        (d['pff_DISTANCE'] <= ytg + ytg_dev) &
        (d['pff_FIELDPOSITION'] >= fp_right) &
        (d['pff_FIELDPOSITION'] <= fp_left)
    ]

    sample_ep = samples['periodScore'].mean()
    
    return round(sample_ep, 2)

In [154]:
STATE = {
    'down': 1,
    'ytg': 10,
    'position': 2,
    'own': False
}

In [160]:
ep = EP(**STATE)

In [161]:
print('''
    Expected Points Model:
    S (Down: {}, YTG: {}, Position: {}) == {} EP
'''.format(
    STATE['down'],
    STATE['ytg'],
    ('Own ' if STATE['own'] else '') + str(STATE['position']),
    ep
))


    Expected Points Model:
    S (Down: 1, YTG: 10, Position: 2) == 0.29 EP



### 4.0 Expected Points Added Model

In [None]:
def EPA(down, ytg, position, own):
    """
    
    """
    global points
    
    ytg_dev = 4 if ytg in range(10, MAX) else
              3 if ytg in range(6, 10) else
              2 if ytg in range(3, 6) else
              1
                
    SIMULATIONS = 1000
    for i in range(SIMULATIONS):
        point = run_play(down, ytg, position, own)
        points.append(point)
    
    return statistics.mean(points)

In [None]:
def run_play(down, ytg, position, own):
    dist_ = 'long' if ytg in range(10, MAX) else
            'medium' if ytg in range(6, 10) else
            'short' if ytg in range(3, 6) else
            'inches'
    ytg_dev = YTG_DEV[dist_]
    ytg_min = YTG_MIN[dist_]
    
    position *= -1 if own else 1
    fp_right = position - POSITION_DEV
    fp_left = position + POSITION_DEV
    
    # Off the field left
    if fp_left > 0 and fp_right < 0:
        fp_left = -1
    # Redzone -- shrink deviation to 3 yards
    elif 0 < position < 20:
        fp_right += 2
        fp_left -= 2
    
    samples = d.loc[
        (d['pff_DOWN'] == down) &
        (d['pff_DISTANCE'] >= max(ytg_min, ytg - ytg_dev)) &
        (d['pff_DISTANCE'] <= ytg + ytg_dev) &
        (d['pff_FIELDPOSITION'] >= fp_right) &
        (d['pff_FIELDPOSITION'] <= fp_left)
    ]
    if len(samples) == 0:
        return None
    else:
        sample = samples.sample(n=1, random_state=SEED)
        
        # Define conditions for a TD, FG
        if True:
            score = True
            scored = 6
            
            xps = d.loc[
                (d['pff_DOWN'] == 0) &
                (d['pff_DISTANCE'] == 0) &
                (d['pff_FIELDPOSITION'] == 0)
            ]
            xp = xps.sample(n=1, random_state=SEED)
            result = xp['pff_KICKRESULT']
            scored += 1 if 'MADE' in result else 0
        elif True:
            score = True
            scored = 3
        else:
            score = False
    
    if score:
        return scored
    else:
        return run_play(d, y, p)