# Simple NFL Strength of Performance Analysis

This notebook performs a simple strength of performance analysis. The objective is to quantify how well a team has performed with respect to the strength of their opponent. This is done through estimating a team's margin of victory/loss against an average team at a neutral site - we call this metric the team's SOP.

Consider a game between Team A and Team B where team A is at home.

* Team A's SOP = $4.1$

* Team B's SOP = $-1.5$

* Assume the average NFL homefield advantage is $+1.3$

We expect Team A to win by a margin of $4.1 - (-1.5) + 1.3 = 6.9$ points on average. Suppose Team A wins by $3$ points, our error is $6.9 - 3 = 3.9$ points.

To determine each team's SOP, we simply minimize this error accross every game.

In [49]:
# load in the necessary packages
import nfl_data_py
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from IPython.display import display, HTML

In [2]:
def compute_home_advan(scores: pd.DataFrame) -> float:
    """
    Compute the average home field advantage for the given scores data
    """
    return np.mean(scores["home_score"]) - np.mean(scores["away_score"])

In [8]:
def get_scores(season: int) -> pd.DataFrame:
    """
    Get the scores data for the given season
    """
    scores = nfl_data_py.import_schedules([season])
    scores = scores[["home_team", "away_team", "home_score", "away_score"]].dropna()
    return scores

In [5]:
def init_sop(X: tuple[float], scores: pd.DataFrame):
    """
    Initialize the SOP dataframe
    """
    sop = pd.DataFrame(
        {"team": sorted(list(scores["home_team"].unique())), "sop": [x for x in X]}
    )
    return sop

In [None]:
def sop_error(sop: pd.DataFrame, scores: pd.DataFrame, home_advan: float) -> float:
    """
    Compute the mean error for the given, SOP, scores, and home field advantage
    """

    # create a table with the scores of each game and each teams SOP
    table = pd.merge(left=scores, right=sop, left_on="home_team", right_on="team")
    table = pd.merge(left=table, right=sop, left_on="away_team", right_on="team")
    table = table.rename({"sop_x": "home_sop", "sop_y": "away_sop"}, axis="columns")
    table = table[["home_score", "away_score", "home_sop", "away_sop"]]

    # compute the expected score differential
    exp_diff = table["home_sop"] - table["away_sop"] + home_advan

    # compute the real score differential
    real_diff = table["home_score"] - table["away_score"]

    # compute and return the error in expected vs. real scores
    return np.sqrt(np.mean(np.square(exp_diff - real_diff)))

In [35]:
def objective(X: tuple[float], scores: pd.DataFrame, home_advan: float) -> float:
    """
    Define the objective function for minimization
    """
    sop = init_sop(X, scores)
    return sop_error(sop, scores, home_advan)

In [36]:
def compute_sop(season: int) -> pd.DataFrame:
    # get the scores data
    scores = get_scores(season)

    # compute the home field advantage
    home_advan = compute_home_advan(scores)

    # minimize the error
    solution = minimize(
        objective, tuple(0 for _ in range(32)), args=(scores, home_advan), tol=1e-6
    )

    # return the SOP table
    sop_table = init_sop(solution.x, scores)
    return sop_table

Now let's run the algorithm a few seasons.

In [None]:
# 2024
sop_table = compute_sop(2024)
sop2024 = sop_table.sort_values(by="sop", ascending=False).style.hide(axis="index")

# 2023
sop_table = compute_sop(2023)
sop2023 = sop_table.sort_values(by="sop", ascending=False).style.hide(axis="index")

# 2022
sop_table = compute_sop(2022)
sop2022 = sop_table.sort_values(by="sop", ascending=False).style.hide(axis="index")

# 2008
sop_table = compute_sop(2008)
sop2008 = sop_table.sort_values(by="sop", ascending=False).style.hide(axis="index")

# 2007
sop_table = compute_sop(2007)
sop2007 = sop_table.sort_values(by="sop", ascending=False).style.hide(axis="index")

# convert dataframes to HTML
html = f"""
<div style="display: flex; justify-content: space-between;">
    <div style="width: 48%; display: flex; flex-direction: column; align-items: center;">
        <h3>2024</h3>
        {sop2024.to_html(index=False)}
    </div>
    <div style="width: 48%; display: flex; flex-direction: column; align-items: center;">
        <h3>2023</h3>
        {sop2023.to_html(index=False)}
    </div>
    <div style="width: 48%; display: flex; flex-direction: column; align-items: center;">
        <h3>2022</h3>
        {sop2022.to_html(index=False)}
    </div>
    <div style="width: 48%; display: flex; flex-direction: column; align-items: center;">
        <h3>2008</h3>
        {sop2008.to_html(index=False)}
    </div>
    <div style="width: 48%; display: flex; flex-direction: column; align-items: center;">
        <h3>2007</h3>
        {sop2007.to_html(index=False)}
    </div>
</div>
"""

# display all dataframes side by side
display(HTML(html))

team,sop
DET,14.634425
BUF,9.464874
GB,7.784117
MIN,7.18215
PHI,6.275717
BAL,6.222153
PIT,4.32549
TB,4.180493
LAC,4.175214
KC,3.96644

team,sop
BAL,12.380044
SF,10.299989
DAL,7.418777
KC,6.303398
BUF,6.296673
DET,5.093401
MIA,3.52102
GB,3.499999
NO,2.740861
LA,2.684864

team,sop
BUF,9.208579
PHI,8.462653
CIN,8.044024
SF,7.266307
DAL,7.000276
KC,6.679477
BAL,3.097545
DET,2.099778
MIA,1.954941
NE,1.807962

team,sop
PIT,10.349663
BAL,10.293259
PHI,8.821211
TEN,8.554753
NYG,7.983826
IND,6.037208
SD,4.772166
NE,4.186565
CAR,3.794677
NO,3.505897

team,sop
NE,18.457604
IND,11.421139
SD,9.480065
GB,9.390769
DAL,8.982306
JAX,7.134558
NYG,5.8871
PHI,5.34306
PIT,4.879325
MIN,4.065947
