# Notebook for calculating CPTs

In [1]:
import pandas as pd
import numpy as np

### Filtering dataset to only keep the relevant metrics

In [2]:
unfiltered = pd.read_csv("Data/Binary_Dataset.csv")

In [3]:
columns = [
    "defense_ave_epa_pass", "DSRS", "offense_ave_yards_gained_pass", "OSRS",
    "offense_ave_wpa_pass", "defense_ave_yards_gained_pass", "Avg_Points_Allowed",
    "Avg_Points_Scored", "SRS", "MoV", "PD", "W-L%", "Playoffs"
]

In [4]:
df = unfiltered[columns]

## Thresholds

As a refresher, when converting to binary, we used certain thresholds for each metric. These were the thresholds for each metric that will be used:

- **defense_ave_epa_pass:** A rate below -0.02 is strong for a defense.
- **DSRS:** Above 0.1 for an above average team.
- **offense_ave_yards_gained_pass:** More than 6.6 passing yards per play.
- **OSRS:** Above average team has an OSRS above 2.
- **offense_ave_wpa_pass:** Above 0.002 is above average.
- **defense_ave_yards_gained_pass:** A defense that allows more than 5.9 passing yards per play is considered weak.
- **Avg_Points_Allowed:** A strong team allows less than 21 ppg on average.
- **Avg_Points_Scored:** A strong team scores more than 24 ppg on average.
- **SRS:** An above average team has an SRS above 0.
- **MoV:** A strong team wins by more than 6.5 points.
- **PD:** A strong team has a point differential of more than 50.
- **W-L%:** A strong team wins more than 59% of the time.

__Additional notes__
- defense_ave_epa_pass > -0.02
- DSRS > 0.1
- offense_ave_yards_gained_pass > 6.6
- OSRS > 2
- offense_ave_wpa_pass > 0.002
- defense_ave_yards_gained_pass > 5.9 (value of 0 means stronger defense)
- Avg_Points_Allowed >= 21 (value of 0 means stronger defense)
- Avg_Points_Scored >= 24
- SRS > 0
- MoV >= 7
- PD > 50
- W-L% > 0.59

## Calculating probabilities for first level of network

In [5]:
length = len(df)
# Values of 1 indicate weak defense
defense_ave_epa_pass_rate_prob = np.sum(df["defense_ave_epa_pass"] == 1) / length
DSRS_prob = np.sum(df["DSRS"] == 1) / length
offense_ave_yards_gained_pass_prob = np.sum(df["offense_ave_yards_gained_pass"] == 1) / length
OSRS_prob = np.sum(df["OSRS"] == 1) / length
offense_ave_wpa_pass_prob = np.sum(df["offense_ave_wpa_pass"] == 1) / length
# Average Allowed Passing Yards Per Play, values of 1 indicate weak defense
defense_ave_yards_gained_pass_prob = np.sum(df["defense_ave_yards_gained_pass"] == 1) / length
# Margin of Victory
MoV_prob = np.sum(df["MoV"] == 1) / length

In [6]:
print(f"Defense Average EPA Pass Rate: {defense_ave_epa_pass_rate_prob}")
print(f"DSRS: {DSRS_prob}")
print(f"Average Passing Yards per Play (Offensive): {offense_ave_yards_gained_pass_prob}")
print(f"OSRS: {OSRS_prob}")
print(f"Offensive Average WPA Pass Rate: {offense_ave_wpa_pass_prob}")
print(f"Average Allowed Passing Yards Per Play: {defense_ave_yards_gained_pass_prob}")
print(f"Margin of Victory: {MoV_prob}")

Defense Average EPA Pass Rate: 0.6209912536443148
DSRS: 0.4912536443148688
Average Passing Yards per Play (Offensive): 0.28717201166180756
OSRS: 0.3075801749271137
Offensive Average WPA Pass Rate: 0.29008746355685133
Average Allowed Passing Yards Per Play: 0.6545189504373178
Margin of Victory: 0.15451895043731778


## Calculating CPTs (Second Level)

### Average Points Allowed

In [7]:
conditions = {
    ("T", "T"): (df["defense_ave_epa_pass"] == 1) & (df["DSRS"] == 1),
    ("T", "F"): (df["defense_ave_epa_pass"] == 1) & (df["DSRS"] == 0),
    ("F", "T"): (df["defense_ave_epa_pass"] == 0) & (df["DSRS"] == 1),
    ("F", "F"): (df["defense_ave_epa_pass"] == 0) & (df["DSRS"] == 0)
}

Avg_Points_Allowed_Dic = {}

for key, condition in conditions.items():
    subset = df[condition]
    total_instances = len(subset)
    true_instances = np.sum(subset["Avg_Points_Allowed"] == 0)
    if total_instances > 0:
        Avg_Points_Allowed_Dic[key] = true_instances / total_instances
    else:
        Avg_Points_Allowed_Dic[key] = 0

print(Avg_Points_Allowed_Dic)

{('T', 'T'): 0.5590551181102362, ('T', 'F'): 0.03678929765886288, ('F', 'T'): 0.8904761904761904, ('F', 'F'): 0.28}


### Average Points Scored

In [8]:
# Calculating P(Average_Points_Scored | Average Passing Yards Per Play (Offense), OSRS)
# (Average_Points_Scored = T) / (Average Passing Yards Per Play (Offense) = T/F, OSRS = T/F)
conditions = {
    ("T", "T"): (df["offense_ave_yards_gained_pass"] == 1) & (df["OSRS"] == 1),
    ("T", "F"): (df["offense_ave_yards_gained_pass"] == 1) & (df["OSRS"] == 0),
    ("F", "T"): (df["offense_ave_yards_gained_pass"] == 0) & (df["OSRS"] == 1),
    ("F", "F"): (df["offense_ave_yards_gained_pass"] == 0) & (df["OSRS"] == 0)
}

Avg_Points_Scored_Dic = {}

for key, condition in conditions.items():
    subset = df[condition]
    total_instances = len(subset)
    true_instances = np.sum(subset["Avg_Points_Scored"] == 1)
    if total_instances > 0:
        Avg_Points_Scored_Dic[key] = true_instances / total_instances
    else:
        Avg_Points_Scored_Dic[key] = 0

print(Avg_Points_Scored_Dic)
    

{('T', 'T'): 0.9370629370629371, ('T', 'F'): 0.4074074074074074, ('F', 'T'): 0.7205882352941176, ('F', 'F'): 0.057007125890736345}


### SRS

In [9]:
conditions = {
    ("T", "T"): (df["offense_ave_wpa_pass"] == 1) & (df["defense_ave_yards_gained_pass"] == 1),
    ("T", "F"): (df["offense_ave_wpa_pass"] == 1) & (df["defense_ave_yards_gained_pass"] == 0),
    ("F", "T"): (df["offense_ave_wpa_pass"] == 0) & (df["defense_ave_yards_gained_pass"] == 1),
    ("F", "F"): (df["offense_ave_wpa_pass"] == 0) & (df["defense_ave_yards_gained_pass"] == 0)
}

SRS_Dic = {}

for key, condition in conditions.items():
    subset = df[condition]
    total_instances = len(subset)
    true_instances = np.sum(subset["SRS"] == 1)
    if total_instances > 0:
        SRS_Dic[key] = true_instances / total_instances
    else:
        SRS_Dic[key] = 0

print(SRS_Dic)

{('T', 'T'): 0.7318840579710145, ('T', 'F'): 0.9672131147540983, ('F', 'T'): 0.2315112540192926, ('F', 'F'): 0.6818181818181818}


## Calculating CPTs for Third Level of Network

### Point Difference

In [10]:
conditions = {
    ("T", "T"): (df["Avg_Points_Allowed"] == 1) & (df["Avg_Points_Scored"] == 1),
    ("T", "F"): (df["Avg_Points_Allowed"] == 1) & (df["Avg_Points_Scored"] == 0),
    ("F", "T"): (df["Avg_Points_Allowed"] == 0) & (df["Avg_Points_Scored"] == 1),
    ("F", "F"): (df["Avg_Points_Allowed"] == 0) & (df["Avg_Points_Scored"] == 0)
}

PD_Dic = {}

for key, condition in conditions.items():
    subset = df[condition]
    total_instances = len(subset)
    true_instances = np.sum(subset["PD"] == 1)
    if total_instances > 0:
        PD_Dic[key] = true_instances / total_instances
    else:
        PD_Dic[key] = 0

print(PD_Dic)

{('T', 'T'): 0.5042735042735043, ('T', 'F'): 0.0, ('F', 'T'): 1.0, ('F', 'F'): 0.3157894736842105}


### Win-Loss Percentage

In [12]:
conditions = {
    ("T", "T"): (df["SRS"] == 1) & (df["MoV"] == 1),
    ("T", "F"): (df["SRS"] == 1) & (df["MoV"] == 0),
    ("F", "T"): (df["SRS"] == 0) & (df["MoV"] == 1),
    ("F", "F"): (df["SRS"] == 0) & (df["MoV"] == 0)
}

WL_Dic = {}

for key, condition in conditions.items():
    subset = df[condition]
    total_instances = len(subset)
    true_instances = np.sum(subset["W-L%"] == 1)
    if total_instances > 0:
        WL_Dic[key] = true_instances / total_instances
    else:
        WL_Dic[key] = 0

print(WL_Dic)

{('T', 'T'): 0.9622641509433962, ('T', 'F'): 0.508130081300813, ('F', 'T'): 0, ('F', 'F'): 0.038922155688622756}


### Playoff Variable

In [13]:
conditions = {
    ("T", "T"): (df["PD"] == 1) & (df["W-L%"] == 1),
    ("T", "F"): (df["PD"] == 1) & (df["W-L%"] == 0),
    ("F", "T"): (df["PD"] == 0) & (df["W-L%"] == 1),
    ("F", "F"): (df["PD"] == 0) & (df["W-L%"] == 0)
}

Playoff_Dic = {}

for key, condition in conditions.items():
    subset = df[condition]
    total_instances = len(subset)
    true_instances = np.sum(subset["Playoffs"] == 1)
    if total_instances > 0:
        Playoff_Dic[key] = true_instances / total_instances
    else:
        Playoff_Dic[key] = 0

print(Playoff_Dic)

{('T', 'T'): 0.9587628865979382, ('T', 'F'): 0.3225806451612903, ('F', 'T'): 0.9130434782608695, ('F', 'F'): 0.0819277108433735}


_Note: All of these values will be stored in an excel file called CPTs.xlsx_