## Team Stat Calculations
Author: Akshay Easwaran <akeaswaran@me.com>

---
Based on work by ESPN's Bill Connelly, Football Outsiders, and Football Study Hall

Data from CollegeFootballData.com

Useful Resources for Stat Definitions:
- Football Study Hall website: https://www.footballstudyhall.com/
- Football Study Hall book: https://www.amazon.com/Study-Hall-College-Football-Stories/dp/1484989961
- Football Outsiders: https://www.footballoutsiders.com/info/glossary
- Example Advanced Box Score: https://twitter.com/ESPN_BillC/status/1176572006969597954/photo/1

**How to use this Notebook**

1. Dump a team's play by play data, game data, and drive data from CollegeFootballData.com.
2. Change the file names in the next cell to match where your data files are located.
3. Change the away and home team names appropriately.
4. Hit Cell > Run All Cells.

In [248]:
import requests
import pandas as pd
import json
import html
import os.path

selected_team = 'Clemson'
selected_year = 2020

def verify_division(num1, num2):
    return 0 if num2 == 0 else (num1 / num2)

def retrieveCfbData(endpoint, year):
    file_path = f"data/{endpoint if (endpoint != 'plays') else 'pbp'}/{year}.csv"
    if (os.path.exists(file_path)):
        return file_path
#     res = requests.get(f"https://api.collegefootballdata.com/{endpoint}?seasonType=regular&year=2019&team={html.escape(team)}")
#     content = res.json()
#     with open(file_path, 'w') as f:
#         json.dump(content, f)
#     return json.dumps(content)
    

base_pbp = pd.read_csv(retrieveCfbData('plays', selected_year))
pbp_data = base_pbp[(base_pbp.offense == selected_team) | (base_pbp.defense == selected_team)]
# pbp_data.info()

drive_base = pd.read_csv(retrieveCfbData('drives', selected_year))
drive_base = drive_base[(drive_base.offense == selected_team) | (drive_base.defense == selected_team)]

games = pd.read_csv(retrieveCfbData('games', selected_year))
games = games[(games.home_team == selected_team) | (games.away_team == selected_team)]

drive_base.drop(['offense_conference','start_time.minutes','start_time.seconds','end_time.minutes','end_time.seconds','defense_conference','elapsed.seconds','elapsed.minutes',"drive_number","start_yards_to_goal","end_yards_to_goal"], axis = 1, inplace=True) 
base_drives = pd.merge(drive_base, games[['id','away_team','home_team']], left_on='game_id', right_on='id', how='right')
base_drives.rename(columns={'id_x':'drive_id'}, inplace=True)
base_drives.drop(['id_y'], axis = 1, inplace=True)

base_drives.loc[
    base_drives.defense == base_drives.away_team, ['offense']
] = base_drives.home_team

base_drives.loc[
    base_drives.defense == base_drives.home_team, ['offense']
] = base_drives.away_team

base_drives.loc[
    base_drives.offense == base_drives.away_team, ['start_yardline']
] = 100 - base_drives.start_yardline
base_drives.loc[
    base_drives.offense == base_drives.away_team, ['end_yardline']
] = 100 - base_drives.end_yardline

base_drives.dropna(inplace=True)
print(f"Clean Drives: {len(base_drives)}")


total_points = games[(games.home_team == selected_team)].home_points.sum() + games[(games.away_team == selected_team)].away_points.sum()
opp_total_points = games[(games.home_team != selected_team)].home_points.sum() + games[(games.away_team != selected_team)].away_points.sum()


Clean Drives: 103


In [249]:
# # Data Cleaning
# # Fix the bad yard line markers for away teams
# drives = pd.merge(base_drives, games[['id','away_team','home_team']], left_on='game_id', right_on='id', how='right')
# drives.rename(columns={'id_x':'drive_id'}, inplace=True)
# drives.drop(['id_y'], axis = 1, inplace=True)
# drives.dropna(inplace=True)
# drives.loc[
#     drives.offense == drives.away_team, ['start_yardline']
# ] = 100 - drives.start_yardline
# drives.loc[
#     drives.offense == drives.away_team, ['end_yardline']
# ] = 100 - drives.end_yardline

# print("Total Drives:", len(drives))
# drives

In [250]:
pbp_data = pbp_data[
    (pbp_data.down != 0)
]

pbp_data.loc[
    pbp_data.offense == pbp_data.home, ['yard_line']
] = 100 - pbp_data.yard_line

import numpy as np

pbp_data.distance = pbp_data.distance.astype(float)

bad_types = ["Interception","Pass Interception Return","Interception Return Touchdown",'Fumble Recovery (Opponent)','Sack','Fumble Return Touchdown']

def is_successful_vector(play):
    if (play.play_type in bad_types):
        return False 
    if ((play.down == 1) & (play.yards_gained >= (0.5 * play.distance))):
        return True
    elif ((play.down == 2)) & (play.yards_gained >= (0.7 * play.distance)):
        return True
    elif ((play.down == 3) & (play.yards_gained >= play.distance)):
        return True
    elif ((play.down == 4) & (play.yards_gained >= play.distance)):
        return True
    else:
        return False
    
pbp_data['play_successful'] = pbp_data.apply(lambda x: is_successful_vector(x), axis=1)
pbp_data['play_explosive'] = pbp_data.apply(lambda x: x.yards_gained >= 15, axis=1)

# Drop some unnecessary columns
# pbp_data.drop(['offense_conference','defense_conference','clock'], axis = 1, inplace=True)

In [251]:
def generate_fourth_down_conv_stats(team, offense):
    fourth_down = pbp_data[(pbp_data.down == 4) & (pbp_data.offense == selected_team)] if offense else pbp_data[(pbp_data.down == 4) & (pbp_data.defense == selected_team)]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} 4th Down Conversion Stats for {team}")
    print("Total Fourth Downs:", len(fourth_down))
    punts = fourth_down[(fourth_down.play_type.str.lower().str.contains('punt')) | (fourth_down.play_text.str.lower().str.contains('punt'))]
    print("Total Punts:", len(punts))
    conv_attempts = fourth_down[~(fourth_down.play_type.str.lower().str.contains('punt')) & ~(fourth_down.play_text.str.lower().str.contains('punt')) & ~(fourth_down.play_type.isin(["Defensive 2pt Conversion","Blocked Field Goal","Blocked Punt","Missed Field Goal Return","Blocked Punt Touchdown","Missed Field Goal Return Touchdown","Extra Point Missed","Extra Point Good","Timeout","End of Half","End of Game","Uncategorized","Penalty","Kickoff","Kickoff Return (Offense)","Kickoff Return Touchdown","Punt", "Field Goal Good","Field Goal Missed","Safety"]))]
    print("Total Conversion Attempts:", len(conv_attempts))
#     print("Avg Conv Att Yard Line:", verify_division(sum(conv_attempts.yard_line),len(conv_attempts)))
    print("Avg Conv Att To Go:", verify_division(sum(conv_attempts.distance),len(conv_attempts)))
    print("Avg Conv Att Gain:", verify_division(sum(conv_attempts.yards_gained),len(conv_attempts)))
    print("Go Rate:", verify_division(len(conv_attempts),len(fourth_down)))
    print("ConvAtt/Punt:", verify_division(len(conv_attempts),len(punts)))
    
generate_fourth_down_conv_stats(selected_team, True)
print("\n---\n")
generate_fourth_down_conv_stats(selected_team, False)

Offensive 4th Down Conversion Stats for Clemson
Total Fourth Downs: 34
Total Punts: 14
Total Conversion Attempts: 3
Avg Conv Att To Go: 4.0
Avg Conv Att Gain: 3.6666666666666665
Go Rate: 0.08823529411764706
ConvAtt/Punt: 0.21428571428571427

---

Defensive 4th Down Conversion Stats for Clemson
Total Fourth Downs: 54
Total Punts: 28
Total Conversion Attempts: 9
Avg Conv Att To Go: 2.2222222222222223
Avg Conv Att Gain: 7.777777777777778
Go Rate: 0.16666666666666666
ConvAtt/Punt: 0.32142857142857145


In [252]:
st_types = ["Blocked Field Goal","Blocked Punt","Missed Field Goal Return","Blocked Punt Touchdown","Missed Field Goal Return Touchdown","Extra Point Missed","Extra Point Good","Kickoff","Kickoff Return (Offense)","Kickoff Return Touchdown","Punt", "Field Goal Good","Field Goal Missed"]
def generate_special_teams_stats(team, offense):
    poss = "Kicking Team" if offense else "Receiving Team"
    st_plays = pbp_data[(pbp_data.play_type.isin(st_types)) & (pbp_data.offense == team)] if offense else pbp_data[(pbp_data.play_type.isin(st_types)) & (pbp_data.defense == team)].copy()
    print(f"{poss} Special Teams Stats for {team}")
    fg_plays = st_plays[st_plays.play_type.str.contains("Field Goal")]
    fg_made = fg_plays[fg_plays.play_type.str.contains("Good")]
    print(f"FG Efficiency: {verify_division(len(fg_made), len(fg_plays))}")
    print(f"Avg FG Made Distance:",(fg_made.yard_line.mean()) + 17)
    print(f"Avg FG Att Distance:",(fg_plays.yard_line.mean()) + 17)
    
    kickoff_plays = st_plays[st_plays.play_type.str.contains("Kickoff") & ~(st_plays.play_text.str.contains("on-side"))]
    tmp = pd.DataFrame(data=kickoff_plays.play_text.str.extract('kickoff for (\d+) yds', expand=True).astype(float))
    kickoff_distance = pd.DataFrame()
    kickoff_distance["Index"] = tmp.index
    kickoff_distance["Yardline"] = kickoff_distance.apply(lambda x: 50 - (kickoff_plays.yard_line[x.Index] % 50),axis=1)
    kickoff_distance["PlayText"] = kickoff_distance.apply(lambda x: kickoff_plays.play_text[x.Index],axis=1)
    kickoff_distance["Distance"] = tmp.values
    kickoff_distance["Return"] = kickoff_distance.apply(lambda x: kickoff_plays.yards_gained[x.Index],axis=1)
    kickoff_distance["Net"] = kickoff_distance.Distance - kickoff_distance.Return
    
    print(f"Kickoff SR%: {verify_division(len(kickoff_distance[kickoff_distance.Net >= 40]), len(kickoff_distance))}")
    print(f"Avg Kickoff Distance:",kickoff_distance.Distance.mean())
    print(f"Kickoff Return SR% Allowed: {verify_division(len(kickoff_distance[kickoff_distance.Return >= 24]), len(kickoff_distance))}")
    print(f"Avg Kickoff Return Distance Allowed:",kickoff_distance.Return.mean())
    print(f"Natural Touchback Rate:", verify_division(len(kickoff_distance[(kickoff_distance.Yardline + kickoff_distance.Distance >= 100) & (kickoff_distance.Return == 0)]), len(kickoff_distance)))
    print(f"Touchback Zone Rate (any kicks beyond the 25):", verify_division(len(kickoff_distance[(kickoff_distance.Yardline + kickoff_distance.Distance >= 75) & (kickoff_distance.Return == 0)]), len(kickoff_distance)))
      
    punt_plays = st_plays[st_plays.play_type.str.contains("Punt")]
    pt_tmp = pd.DataFrame(data=punt_plays.play_text.str.extract('punt for (\d+) yds', expand=True).astype(float))
    punt_distance = pd.DataFrame()
    punt_distance["Index"] = pt_tmp.index
    punt_distance["Distance"] = pt_tmp.values
    punt_distance["Return"] = punt_distance.apply(lambda x: punt_plays.yards_gained[x.Index],axis=1)
    punt_distance["Net"] = punt_distance.Distance - punt_distance.Return
    print(f"Avg Punt Distance:",punt_distance.Distance.mean())
    print(f"Avg Punt Return Distance Allowed:",punt_distance.Return.mean())
    print(f"Avg Punt Net:",punt_distance.Net.mean())
    
    
generate_special_teams_stats(selected_team, True)
print("\n---\n")
generate_special_teams_stats(selected_team, False)

Kicking Team Special Teams Stats for Clemson
FG Efficiency: 0.625
Avg FG Made Distance: 42.2
Avg FG Att Distance: 42.5
Kickoff SR%: 0.9666666666666667
Avg Kickoff Distance: 64.5
Kickoff Return SR% Allowed: 0.03333333333333333
Avg Kickoff Return Distance Allowed: 2.6333333333333333
Natural Touchback Rate: 0.8
Touchback Zone Rate (any kicks beyond the 25): 0.8666666666666667
Avg Punt Distance: 44.714285714285715
Avg Punt Return Distance Allowed: 2.7142857142857144
Avg Punt Net: 42.0

---

Receiving Team Special Teams Stats for Clemson
FG Efficiency: 0.5714285714285714
Avg FG Made Distance: 36.75
Avg FG Att Distance: 38.14285714285714
Kickoff SR%: 0.8181818181818182
Avg Kickoff Distance: 61.09090909090909
Kickoff Return SR% Allowed: 0.18181818181818182
Avg Kickoff Return Distance Allowed: 11.545454545454545
Natural Touchback Rate: 0.45454545454545453
Touchback Zone Rate (any kicks beyond the 25): 0.7272727272727273
Avg Punt Distance: 43.370370370370374
Avg Punt Return Distance Allowed: 5.

In [253]:
penalty_plays = pbp_data[(pbp_data.play_type == "Penalty") | (pbp_data.play_text.str.contains("Penalty"))]

def generate_penalty_stats(player, penalty_name):
    return penalty_plays[(penalty_plays.play_text.str.contains(player)) & (penalty_plays.play_text.str.contains(penalty_name))]

generate_penalty_stats("Swilling, T", "pass interference")

Unnamed: 0,id,offense,offense_conference,defense,defense_conference,home,away,offense_score,defense_score,game_id,...,yards_to_goal,down,distance,scoring,yards_gained,play_type,play_text,ppa,play_successful,play_explosive


In [254]:
 
# Ignore some types of plays cause they're special teams and weird
ignore_types = ["Defensive 2pt Conversion","Blocked Field Goal","Blocked Punt","Missed Field Goal Return","Blocked Punt Touchdown","Missed Field Goal Return Touchdown","Extra Point Missed","Extra Point Good","Timeout","End of Half","End of Game","Uncategorized","Penalty","Kickoff","Kickoff Return (Offense)","Kickoff Return Touchdown","Punt", "Field Goal Good","Field Goal Missed","Safety"]
pbp_data = pbp_data[~(pbp_data.play_type.isin(ignore_types))]
base_pbp_data = pbp_data.copy()

# Eliminate garbage time plays
garbage_time_data = pbp_data[
    (pbp_data.down == 0)
    | ((abs(pbp_data.offense_score - pbp_data.defense_score) >= 38) & (pbp_data.period == 2))
    | ((abs(pbp_data.offense_score - pbp_data.defense_score) >= 28) & (pbp_data.period == 3))
    | ((abs(pbp_data.offense_score - pbp_data.defense_score) >= 22) & (pbp_data.period == 4))
]

print("Total Plays:", len(base_pbp_data))
print("Garbage Time Plays:", len(garbage_time_data))
print("% of plays in garbage time:", len(garbage_time_data)/len(pbp_data))
pbp_data.drop(garbage_time_data.index, inplace=True)
print("Non-Garbage Time Plays:", len(pbp_data))

# print("Total clean plays:", len(pbp_data))

Total Plays: 562
Garbage Time Plays: 179
% of plays in garbage time: 0.3185053380782918
Non-Garbage Time Plays: 383


In [255]:
pass_types = ["Pass Reception","Pass Incompletion","Passing Touchdown","Interception","Pass Interception Return","Interception Return Touchdown"]
rush_types = ["Rush","Rushing Touchdown",'Fumble Recovery (Opponent)','Sack','Fumble Return Touchdown']

print("Offensive Success Rates")
print(selected_team)
print("Overall:",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_successful == True)]) / len(base_pbp_data[(base_pbp_data.offense == selected_team)]))
print("Passing:",len(pbp_data[(pbp_data.offense == selected_team) & (pbp_data.play_successful == True) & (pbp_data.play_type.isin(pass_types))]) / len(pbp_data[(pbp_data.offense == selected_team) & (pbp_data.play_type.isin(pass_types))]))
print("Rushing:",len(pbp_data[(pbp_data.offense == selected_team) & (pbp_data.play_successful == True) & (pbp_data.play_type.isin(rush_types))]) / len(pbp_data[(pbp_data.offense == selected_team) & (pbp_data.play_type.isin(rush_types))]))
print("")
print("Success Rates Allowed")
print(selected_team)
print("Overall:",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_successful == True)]) / len(base_pbp_data[(base_pbp_data.defense == selected_team)]))
print("Passing:",len(pbp_data[(pbp_data.defense == selected_team) & (pbp_data.play_successful == True) & (pbp_data.play_type.isin(pass_types))]) / len(pbp_data[(pbp_data.defense == selected_team) & (pbp_data.play_type.isin(pass_types))]))
print("Rushing:",len(pbp_data[(pbp_data.defense == selected_team) & (pbp_data.play_successful == True) & (pbp_data.play_type.isin(rush_types))]) / len(pbp_data[(pbp_data.defense == selected_team) & (pbp_data.play_type.isin(rush_types))]))

Offensive Success Rates
Clemson
Overall: 0.4697986577181208
Passing: 0.5175438596491229
Rushing: 0.5238095238095238

Success Rates Allowed
Clemson
Overall: 0.2916666666666667
Passing: 0.2875
Rushing: 0.2891566265060241


In [256]:
downs = [1, 2, 3, 4]
print("Success Rates on Specific Downs (Off/Allowed)")
print(selected_team)
for d in downs:
    intermed = base_pbp_data[
        (base_pbp_data.down == d)
    ]
    print(f"Down {d}: {len(intermed[(intermed.play_successful == True) & (intermed.offense == selected_team)]) / len(intermed[(intermed.offense == selected_team)])} / {len(intermed[(intermed.play_successful == True) & (intermed.defense == selected_team)]) / len(intermed[(intermed.defense == selected_team)])}")

    print("")
    
print("Success Rate by Q")
for i in range(1, base_pbp_data.period.nunique()+1):
    print(f"Q{i}:", len(base_pbp_data[(base_pbp_data.period == i) & (base_pbp_data.offense == selected_team) & (base_pbp_data.play_successful == True)]) / len(base_pbp_data[(base_pbp_data.period == i) & (base_pbp_data.offense == selected_team)]),f" / ", len(base_pbp_data[(base_pbp_data.period == i) & (base_pbp_data.defense == selected_team) & (base_pbp_data.play_successful == True)]) / len(base_pbp_data[(base_pbp_data.period == i) & (base_pbp_data.defense == selected_team)]))


Success Rates on Specific Downs (Off/Allowed)
Clemson
Down 1: 0.48148148148148145 / 0.27184466019417475

Down 2: 0.42424242424242425 / 0.28735632183908044

Down 3: 0.5245901639344263 / 0.27692307692307694

Down 4: 0.3333333333333333 / 0.6666666666666666

Success Rate by Q
Q1: 0.5714285714285714  /  0.24489795918367346
Q2: 0.4725274725274725  /  0.2459016393442623
Q3: 0.4492753623188406  /  0.2875
Q4: 0.3333333333333333  /  0.36486486486486486


In [257]:
# Explosiveness rates
# Defined as rate of 15+ yards gains

print("Explosiveness Plays (Rates)")
print(selected_team)
print("Overall:",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True)]),"/", len(base_pbp_data[(base_pbp_data.offense == selected_team)]),"(",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True)]) / len(base_pbp_data[(base_pbp_data.offense == selected_team)]),")")
print("Passing:",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(pass_types))]),"/", len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_type.isin(pass_types))]),"(",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(pass_types))]) / len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_type.isin(pass_types))]),")")
print("Rushing:",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(rush_types))]),"/", len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_type.isin(rush_types))]),"(",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(rush_types))]) / len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_type.isin(rush_types))]),")")
print("")
print("Explosiveness Plays Allowed (Rates)")
print(selected_team)
print("Overall:",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True)]),"/", len(base_pbp_data[(base_pbp_data.defense == selected_team)]),"(",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True)]) / len(base_pbp_data[(base_pbp_data.defense == selected_team)]),")")
print("Passing:",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(pass_types))]),"/", len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_type.isin(pass_types))]),"(",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(pass_types))]) / len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_type.isin(pass_types))]),")")
print("Rushing:",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(rush_types))]),"/", len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_type.isin(rush_types))]),"(",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(rush_types))]) / len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_type.isin(rush_types))]),")")

Explosiveness Plays (Rates)
Clemson
Overall: 49 / 298 ( 0.1644295302013423 )
Passing: 34 / 146 ( 0.2328767123287671 )
Rushing: 15 / 150 ( 0.1 )

Explosiveness Plays Allowed (Rates)
Clemson
Overall: 27 / 264 ( 0.10227272727272728 )
Passing: 18 / 120 ( 0.15 )
Rushing: 9 / 143 ( 0.06293706293706294 )


In [258]:
# Havoc plays
havoc_plays = base_pbp_data[
    (((base_pbp_data.play_type == 'Pass Incompletion')
    & (base_pbp_data.play_text.str.contains('broken up', regex=False)))
    | (base_pbp_data.play_type == 'Fumble Recovery (Opponent)')
    | (base_pbp_data.play_type == 'Sack')
    | (base_pbp_data.play_type.str.contains('Interception', regex=False))
    | (base_pbp_data.yards_gained < 0))
    & (base_pbp_data.play_type != 'Penalty')
]

print(selected_team)
print("Havoc Caused Rate: ", len(havoc_plays[
    havoc_plays.defense == selected_team
]), "/", len(base_pbp_data[
    base_pbp_data.defense == selected_team
]), "(",len(havoc_plays[
    havoc_plays.defense == selected_team
]) / len(base_pbp_data[
    base_pbp_data.defense == selected_team
]),")")
print("Havoc Suffered Rate: ", len(havoc_plays[
    havoc_plays.offense == selected_team
]), "/", len(base_pbp_data[
    base_pbp_data.offense == selected_team
]), "(",len(havoc_plays[
    havoc_plays.offense == selected_team
]) / len(base_pbp_data[
    base_pbp_data.offense == selected_team
]),")")

Clemson
Havoc Caused Rate:  49 / 264 ( 0.1856060606060606 )
Havoc Suffered Rate:  29 / 298 ( 0.09731543624161074 )


In [259]:
selected_team_drives = base_drives[
    base_drives.offense == selected_team
]
print(selected_team)
print("Drives:", len(selected_team_drives))
print("Yards:",sum(selected_team_drives.yards))
print("Plays:",sum(selected_team_drives.plays))
print("Avg Starting Field Position:", selected_team_drives.start_yardline.mean())
print("Yards per Play:", sum(selected_team_drives.yards) / sum(selected_team_drives.plays))
print("Plays per Drive:", sum(selected_team_drives.plays) / len(selected_team_drives))
print("Yards per Drive:", sum(selected_team_drives.yards) / len(selected_team_drives))
print("Points per Drive: ", total_points / len(selected_team_drives))

print("")

opp_team_drives = base_drives[
    base_drives.defense == selected_team
]

print("Opponents")
print("Drives:", len(opp_team_drives))
print("Yards:",sum(opp_team_drives.yards))
print("Plays:",sum(opp_team_drives.plays))
print("Avg Starting Field Position:", opp_team_drives.start_yardline.mean())
print("Yards per Play:", sum(opp_team_drives.yards) / sum(opp_team_drives.plays))
print("Plays per Drive:", sum(opp_team_drives.plays) / len(opp_team_drives))
print("Yards per Drive:", sum(opp_team_drives.yards) / len(opp_team_drives))
print("Points per Drive: ", opp_total_points / len(opp_team_drives))

Clemson
Drives: 52
Yards: 1988.0
Plays: 307.0
Avg Starting Field Position: 31.98076923076923
Yards per Play: 6.47557003257329
Plays per Drive: 5.903846153846154
Yards per Drive: 38.23076923076923
Points per Drive:  3.25

Opponents
Drives: 51
Yards: 1146.0
Plays: 272.0
Avg Starting Field Position: 27.019607843137255
Yards per Play: 4.213235294117647
Plays per Drive: 5.333333333333333
Yards per Drive: 22.470588235294116
Points per Drive:  1.0392156862745099


In [260]:
# Measuring success rate for a single player
# pbp_data[
#     (pbp_data.play_text.str.contains("Quentin Harris"))
#     & (pbp_data.play_type.isin(pass_types))
#     & (~pbp_data.play_type.str.contains("Sack"))
# ].play_successful.value_counts(normalize=True)

In [261]:
# Standard vs Passing Downs success rates
# Success rate on standard downs == leverage rate

standard_downs = pbp_data[
    (pbp_data.down == 1)
    | ((pbp_data.down == 2) & (pbp_data.distance <= 7))
    | ((pbp_data.down == 3) & (pbp_data.distance <= 4))
    | ((pbp_data.down == 4) & (pbp_data.distance <= 4)) 
]

passing_downs = pbp_data[
    ((pbp_data.down == 2) & (pbp_data.distance >= 8))
    | ((pbp_data.down == 3) & (pbp_data.distance >= 5))
    | ((pbp_data.down == 4) & (pbp_data.distance >= 5)) 
]

In [262]:
print(selected_team,"Success Rate on Std Downs:",len(standard_downs[
    (standard_downs.offense == selected_team)
    & (standard_downs.play_successful == True)
]) / len(standard_downs[
    (standard_downs.offense == selected_team)
]))
print("Opp Success Rate on Std Downs:",len(standard_downs[
    (standard_downs.defense == selected_team)
    & (standard_downs.play_successful == True)
]) / len(standard_downs[
    (standard_downs.defense == selected_team)
]))

print(selected_team,"Success Rate on Passing Downs:",len(passing_downs[
    (passing_downs.offense == selected_team)
    & (passing_downs.play_successful == True)
]) / len(passing_downs[
    (passing_downs.offense == selected_team)
]))

print("Opp Success Rate on Passing Downs:",len(passing_downs[
    (passing_downs.defense == selected_team)
    & (passing_downs.play_successful == True)
]) / len(passing_downs[
    (passing_downs.defense == selected_team)
]))

Clemson Success Rate on Std Downs: 0.5855263157894737
Opp Success Rate on Std Downs: 0.3333333333333333
Clemson Success Rate on Passing Downs: 0.373134328358209
Opp Success Rate on Passing Downs: 0.22972972972972974


In [263]:
pass_plays = base_pbp_data[
    base_pbp_data.play_type.isin(pass_types)
]
rush_plays = base_pbp_data[
    base_pbp_data.play_type.isin(rush_types)
]

In [264]:
# Stuff Rate
stuffed_plays = rush_plays[
    (rush_plays.yards_gained <= 0)
    & (rush_plays.play_type != 'Sack')
]

In [265]:
selected_team_stuffs = stuffed_plays[
    stuffed_plays.defense == selected_team
]
print(selected_team,"Defensive Stuff Rate: ",len(selected_team_stuffs),"/",len(rush_plays[rush_plays.defense == selected_team]),"(",len(selected_team_stuffs)/len(rush_plays[rush_plays.defense == selected_team]),")")
print(selected_team,"Defensive Stuffs Suffered Rate: ",len(stuffed_plays[
    stuffed_plays.offense == selected_team
]),"/",len(rush_plays[rush_plays.offense == selected_team]),"(",len(stuffed_plays[
    stuffed_plays.offense == selected_team
])/len(rush_plays[rush_plays.offense == selected_team]),")")


Clemson Defensive Stuff Rate:  31 / 143 ( 0.21678321678321677 )
Clemson Defensive Stuffs Suffered Rate:  21 / 150 ( 0.14 )


In [266]:
# Line Yards -- not adjusted for down/distance/opponent/shotgun
def adjust_strength_for_ol(yards_gained):
    if (yards_gained < 0):
        return yards_gained * 1.25
    elif ((yards_gained >= 0) & (yards_gained <= 3)):
        return yards_gained * 1.0
    elif ((yards_gained >= 4) & (yards_gained <= 6)):
        return yards_gained * 0.5
    else:
        return 0

rush_plays['line_yards'] = rush_plays.apply(lambda x: adjust_strength_for_ol(x.yards_gained), axis=1)
rush_plays['highlight_yards'] = rush_plays.apply(lambda x: x.yards_gained - adjust_strength_for_ol(x.yards_gained), axis=1)

# Opportunity Rate
rush_opps = rush_plays[
    (rush_plays.yards_gained >= 4)
]
print(selected_team,"Rush Opp Rate: ",len(rush_opps[rush_opps.offense == selected_team]),"/",len(rush_plays[rush_plays.offense == selected_team]),"(",len(rush_opps[rush_opps.offense == selected_team])/len(rush_plays[rush_plays.offense == selected_team]),")")
print(selected_team,"Rush Opp Allowed Rate: ",len(rush_opps[rush_opps.defense == selected_team]),"/",len(rush_plays[rush_plays.defense == selected_team]),"(",len(rush_opps[rush_opps.defense == selected_team])/len(rush_plays[rush_plays.defense == selected_team]),")")

print(selected_team,"Line Yards per Carry:",sum(rush_plays[(rush_plays.offense == selected_team) & (rush_plays.yards_gained <= 6)].line_yards) / len(rush_plays[(rush_plays.offense == selected_team) & (rush_plays.yards_gained <= 6)]))
print(selected_team,"Highlight Yards per Carry:",sum(rush_plays[rush_plays.offense == selected_team].highlight_yards) / len(rush_plays[rush_plays.offense == selected_team]))
print(selected_team,"Yards per Carry:",sum(rush_plays[rush_plays.offense == selected_team].yards_gained) / len(rush_plays[rush_plays.offense == selected_team]))
print(selected_team,"Highlight Yards per Rush:",sum(rush_opps[rush_opps.offense == selected_team].highlight_yards) / len(rush_opps[rush_opps.offense == selected_team]))

print("")

print("Opp Line Yards per Carry:",sum(rush_plays[(rush_plays.defense == selected_team) & (rush_plays.yards_gained <= 6)].line_yards) / len(rush_plays[(rush_plays.defense == selected_team) & (rush_plays.yards_gained <= 6)]))
print("Opp Highlight Yards per Carry:",sum(rush_plays[rush_plays.defense == selected_team].highlight_yards) / len(rush_plays[rush_plays.defense == selected_team]))
print("Opp Yards per Carry:",sum(rush_plays[rush_plays.defense == selected_team].yards_gained) / len(rush_plays[rush_plays.defense == selected_team]))
print("Opp Highlight Yards per Rush:",sum(rush_opps[rush_opps.defense == selected_team].highlight_yards) / len(rush_opps[rush_opps.defense == selected_team]))

Clemson Rush Opp Rate:  77 / 150 ( 0.5133333333333333 )
Clemson Rush Opp Allowed Rate:  51 / 143 ( 0.35664335664335667 )
Clemson Line Yards per Carry: 0.7106481481481481
Clemson Highlight Yards per Carry: 4.6883333333333335
Clemson Yards per Carry: 5.2
Clemson Highlight Yards per Rush: 8.857142857142858

Opp Line Yards per Carry: -0.33405172413793105
Opp Highlight Yards per Carry: 3.194055944055944
Opp Yards per Carry: 2.923076923076923
Opp Highlight Yards per Rush: 8.284313725490197


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [267]:
# Expected and Adjusted Turnover Margins

adj_turnover_plays = base_pbp_data[
    (base_pbp_data.play_type.str.contains('Interception', regex=False))
    | ((base_pbp_data.play_type == 'Pass Incompletion')
    & (base_pbp_data.play_text.str.contains('broken up', regex=False)))
    | (base_pbp_data.play_type.str.contains('Fumble', regex=False))
]

adj_fum = 0.5 * len(adj_turnover_plays[
    (adj_turnover_plays.play_type.str.contains('Fumble', regex=False))
])

# selected_team Adj Turnovers
selected_team_tos = adj_turnover_plays[
    (adj_turnover_plays.offense == selected_team)
    | (adj_turnover_plays.defense == selected_team)
]
print(selected_team)
selected_team_ints_def = len(selected_team_tos[
   (selected_team_tos.play_type.str.contains('Interception', regex=False))
    & (selected_team_tos.defense == selected_team)
])

selected_team_ints_off = len(selected_team_tos[
   (selected_team_tos.play_type.str.contains('Interception', regex=False))
    & (selected_team_tos.offense == selected_team)
])

selected_team_pds = len(selected_team_tos[
    (((selected_team_tos.play_type == 'Pass Incompletion') & (selected_team_tos.play_text.str.contains('broken up', regex=False))) 
     | (selected_team_tos.play_type.str.contains('Interception', regex=False)))
    & (selected_team_tos.offense == selected_team)
])

selected_team_fum_rec = selected_team_tos[(selected_team_tos.play_type == 'Fumble Recovery (Opponent)') & (selected_team_tos.defense == selected_team)]
selected_team_fum_lost = selected_team_tos[(selected_team_tos.play_type == 'Fumble Recovery (Opponent)') & (selected_team_tos.offense == selected_team)]

print("Def INTs:",selected_team_ints_def)
print("Off INTs:",selected_team_ints_off)
print("Off PDs:",selected_team_pds)
print("Exp INTs:",0.22 * (selected_team_pds + selected_team_ints_off))
print("Fum Recovered:",len(selected_team_fum_rec))
print("Fum Lost:",len(selected_team_fum_lost))
print("Exp Fum:",adj_fum)
print("Actual TO:",selected_team_ints_off + len(selected_team_fum_lost))
print("Actual TO Margin:",(selected_team_ints_def + len(selected_team_fum_rec) - selected_team_ints_off) - len(selected_team_fum_lost))
selected_team_exp_to = (0.22 * (selected_team_pds + selected_team_ints_off)) + adj_fum
print("Exp TO:",selected_team_exp_to)

Clemson
Def INTs: 5
Off INTs: 1
Off PDs: 3
Exp INTs: 0.88
Fum Recovered: 1
Fum Lost: 2
Exp Fum: 3.0
Actual TO: 3
Actual TO Margin: 3
Exp TO: 3.88


In [268]:
to_luck = (selected_team_ints_off + len(selected_team_fum_lost) - selected_team_exp_to)
print("TO Luck for",selected_team,":",to_luck*5.0)
print("TO Luck/gm for",selected_team,":",(to_luck*5.0 / len(games.notna())))

TO Luck for Clemson : -4.3999999999999995
TO Luck/gm for Clemson : -0.39999999999999997


In [269]:
# Scoring Opportunities
# Definition: roughly, any time you get inside the opponent's 40, you should probably score

scoring_opps = base_drives[
    ((base_drives.start_yardline + base_drives.yards) >= 60)
]

# away team's scoring opps
print("Scoring Opportunities (IE: Drives inside Opponent's 40)")
print(selected_team)
selected_team_scoring_opps = scoring_opps[
    scoring_opps.offense == selected_team
]
print("Total:",len(selected_team_scoring_opps))
print("Scored:",len(selected_team_scoring_opps[selected_team_scoring_opps.scoring == True]))
print("Opp Efficiency:",len(selected_team_scoring_opps[selected_team_scoring_opps.scoring == True]) / len(selected_team_scoring_opps))
print("Opps/Drive:",len(selected_team_scoring_opps) / len(base_drives[base_drives.offense == selected_team]))
print("Points/Opp:",(len(selected_team_scoring_opps[selected_team_scoring_opps.drive_result == 'TD']) * 7 + len(selected_team_scoring_opps[selected_team_scoring_opps.drive_result == 'FG'] * 3)) / len(selected_team_scoring_opps))

print("")
# Opponents' scoring opps
print("Opponents")
def_scoring_opps = scoring_opps[
    scoring_opps.defense == selected_team
]
print("Total:",len(def_scoring_opps))
print("Scored:",len(def_scoring_opps[def_scoring_opps.scoring == True]))
print("Opp Efficiency:",len(def_scoring_opps[def_scoring_opps.scoring == True]) / len(def_scoring_opps))
print("Opps/Drive:",len(def_scoring_opps) / len(base_drives[base_drives.defense == selected_team]))
print("Points/Opp:",(len(def_scoring_opps[def_scoring_opps.drive_result == 'TD']) * 7 + len(def_scoring_opps[def_scoring_opps.drive_result == 'FG'] * 3)) / len(def_scoring_opps))

Scoring Opportunities (IE: Drives inside Opponent's 40)
Clemson
Total: 32
Scored: 26
Opp Efficiency: 0.8125
Opps/Drive: 0.6153846153846154
Points/Opp: 4.71875

Opponents
Total: 18
Scored: 9
Opp Efficiency: 0.5
Opps/Drive: 0.35294117647058826
Points/Opp: 2.1666666666666665


In [270]:
def calculate_success_in_scoring_opps(opps, team):
    opp_ids = opps.drive_id.unique()
    success = 0
    total = 0
    for opp_id in opp_ids:
        opp_set = base_pbp_data[(base_pbp_data.drive_id == opp_id)]
        opp_s_rate = verify_division(len(opp_set[opp_set.play_successful == True]), len(opp_set))
#         print(f"{team} SR% in opp {opp_id}: {opp_s_rate}")
        success += len(opp_set[opp_set.play_successful == True])
        total += len(opp_set)
    s_rate = 0 if total == 0 else (success / total)
    print(f"{team} total SR% in scoring opps: {s_rate}")

calculate_success_in_scoring_opps(selected_team_scoring_opps, selected_team)
calculate_success_in_scoring_opps(def_scoring_opps, "Opponents")

Clemson total SR% in scoring opps: 0.5688073394495413
Opponents total SR% in scoring opps: 0.4153846153846154


In [271]:
###### Explosiveness rates
# Defined as rate of 15+ yards gains
def generate_exp_rate_stats(team, offense):
    team_plays = base_pbp_data[(base_pbp_data.offense == team)] if offense else base_pbp_data[(base_pbp_data.defense == team)]
    exp_plays = team_plays[(team_plays.play_explosive == True)]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Exp Rate Stats for {team}")
    print(f"Total Exp Plays: {len(exp_plays)}")
    print(f"Overall Exp %: {verify_division(len(exp_plays),len(team_plays))}")
    print(f"Pass Exp %: {verify_division(len(exp_plays[(exp_plays.play_type.isin(pass_types))]),len(team_plays[(team_plays.play_type.isin(pass_types))]))}")
    print(f"Pass Exp % (Std Downs): {verify_division(len(standard_downs[(standard_downs.offense == team) & (standard_downs.play_explosive == True) & (standard_downs.play_type.isin(pass_types))]),len(standard_downs[(standard_downs.offense == team) & (standard_downs.play_type.isin(pass_types))]))}")
    print(f"Pass Exp % (Pas Downs): {verify_division(len(passing_downs[(passing_downs.offense == team) & (passing_downs.play_explosive == True) & (passing_downs.play_type.isin(pass_types))]),len(passing_downs[(passing_downs.offense == team) & (passing_downs.play_type.isin(pass_types))]))}")
    print(f"Rush Exp %: {verify_division(len(exp_plays[(exp_plays.play_type.isin(rush_types))]),len(team_plays[(team_plays.play_type.isin(rush_types))]))}")

print("Explosiveness Rates")
generate_exp_rate_stats(selected_team, True)
print("\n---\n")
generate_exp_rate_stats(selected_team, False)

Explosiveness Rates
Offensive Exp Rate Stats for Clemson
Total Exp Plays: 49
Overall Exp %: 0.1644295302013423
Pass Exp %: 0.2328767123287671
Pass Exp % (Std Downs): 0.27692307692307694
Pass Exp % (Pas Downs): 0.22448979591836735
Rush Exp %: 0.1

---

Defensive Exp Rate Stats for Clemson
Total Exp Plays: 27
Overall Exp %: 0.10227272727272728
Pass Exp %: 0.15
Pass Exp % (Std Downs): 0.27692307692307694
Pass Exp % (Pas Downs): 0.22448979591836735
Rush Exp %: 0.06293706293706294


In [272]:
###### Stop Rate
## Defined as defensive drives that end in turnovers, punts, or turnovers on downs
import re
stop_types = ['TURNOVER ON DOWNS','PUNT','FUMBLE','INT', 'SAFETY']
def generate_stop_rate_stats(team, offense):
    team_drives = base_drives[(base_drives.offense == team)] if offense else base_drives[(base_drives.defense == team)]
    stop_drives = team_drives[(team_drives.drive_result.str.contains("|".join(stop_types)) == True)]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Stop Stats for {team}")
    print(f"Total: {len(stop_drives)}")
    print(f"Stop Rate %: {verify_division(len(stop_drives),len(team_drives))}")
    print(f"Stops/ScOpp: {verify_division(len(stop_drives),len(team_drives[((team_drives.start_yardline + team_drives.yards) >= 60)]))}")
print("Defensive Stop Rates")
print("---")
generate_stop_rate_stats(selected_team, False)
print("\n---\n")
generate_stop_rate_stats(selected_team, True)

Defensive Stop Rates
---
Defensive Stop Stats for Clemson
Total: 33
Stop Rate %: 0.6470588235294118
Stops/ScOpp: 1.8333333333333333

---

Offensive Stop Stats for Clemson
Total: 18
Stop Rate %: 0.34615384615384615
Stops/ScOpp: 0.5625


In [273]:
# conv_down_plays = base_pbp_data[base_pbp_data.down <= 4]
def generate_conv_down_stats(team, offense):
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Conv Down Stats for {team}")
    team_off_plays = base_pbp_data[base_pbp_data.offense == team] if offense else base_pbp_data[base_pbp_data.defense == team]
    team_conv_downs = team_off_plays[(team_off_plays.down >= 3) & ~(team_off_plays.play_type.str.lower().str.contains('punt')) & ~(team_off_plays.play_text.str.lower().str.contains('punt'))]
    succ_conv = team_conv_downs[team_conv_downs.yards_gained >= team_conv_downs.distance]
    team_drives = base_drives[base_drives.offense == team] if offense else base_drives[base_drives.defense == team]
    print(f"Conv Downs: {len(team_conv_downs)}")
    print(f"Yds/CV: {verify_division(sum(team_conv_downs.distance),len(team_conv_downs))}")
    print(f"Conv Down %: {verify_division(len(team_conv_downs),sum(team_drives.plays))}")
    print(f"Conv/Drive: {verify_division(len(team_conv_downs),len(team_drives))}")
    print(f"Succ Conv: {len(succ_conv)}")
    print(f"Succ Conv %: {verify_division(len(succ_conv),len(team_conv_downs))}")
    print(f"Yds/SC: {verify_division(sum(succ_conv.distance),len(succ_conv))}")
    print(f"SC/Drive: {verify_division(len(succ_conv),len(team_drives))}")
    
generate_conv_down_stats(selected_team, True)
print("\n---\n")
generate_conv_down_stats(selected_team, False)
    

Offensive Conv Down Stats for Clemson
Conv Downs: 64
Yds/CV: 7.390625
Conv Down %: 0.20846905537459284
Conv/Drive: 1.2307692307692308
Succ Conv: 34
Succ Conv %: 0.53125
Yds/SC: 6.352941176470588
SC/Drive: 0.6538461538461539

---

Defensive Conv Down Stats for Clemson
Conv Downs: 74
Yds/CV: 7.743243243243243
Conv Down %: 0.27205882352941174
Conv/Drive: 1.4509803921568627
Succ Conv: 25
Succ Conv %: 0.33783783783783783
Yds/SC: 6.44
SC/Drive: 0.49019607843137253


In [274]:
# Rushing Stats
rush_carries = rush_plays[
    (~rush_plays.play_type.isin(['Fumble Recovery (Opponent)','Fumble Return Touchdown']))
]

print("Aggregate Rushing Stats")
def generate_rush_stats(team, offense):
    selected_team_carries = rush_carries[(rush_carries.offense == team)] if offense else rush_carries[(rush_carries.defense == team)]
    selected_team_rush_opps = selected_team_carries[selected_team_carries.yards_gained >= 4]
    selected_team_rush_stuffs = selected_team_carries[selected_team_carries.yards_gained <= 0]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Rush Stats for {team}")
    print("Carries: ",len(selected_team_carries))
    print("Yards: ", sum(selected_team_carries.yards_gained))
    print("Yards Per Carry: ", verify_division(sum(selected_team_carries.yards_gained), len(selected_team_carries)))
    print("Line Yards Per Carry: ", verify_division(sum(selected_team_carries[(selected_team_carries.yards_gained <= 6)].line_yards), len(selected_team_carries[(selected_team_carries.yards_gained <= 6)])))
    print("Rush Success Rate: ", verify_division(len(selected_team_carries[selected_team_carries.play_successful == True]), len(selected_team_carries)))
    print("Rush Opp Rate: ", verify_division(len(selected_team_rush_opps), len(selected_team_carries)))
    print("Hlt Yards Per Opp: ", verify_division(sum(selected_team_rush_opps.highlight_yards), len(selected_team_rush_opps)))
    print("Stuff Rate: ", verify_division(len(selected_team_rush_stuffs), len(selected_team_carries)))
    print("Rush Exp Rate: ", verify_division(len(selected_team_carries[selected_team_carries.play_explosive == True]), len(selected_team_carries)))
    
generate_rush_stats(selected_team, True)
print("\n---\n")
generate_rush_stats(selected_team, False)

Aggregate Rushing Stats
Offensive Rush Stats for Clemson
Carries:  148
Yards:  757
Yards Per Carry:  5.114864864864865
Line Yards Per Carry:  0.7106481481481481
Rush Success Rate:  0.4864864864864865
Rush Opp Rate:  0.5067567567567568
Hlt Yards Per Opp:  8.786666666666667
Stuff Rate:  0.17567567567567569
Rush Exp Rate:  0.0945945945945946

---

Defensive Rush Stats for Clemson
Carries:  142
Yards:  401
Yards Per Carry:  2.823943661971831
Line Yards Per Carry:  -0.33405172413793105
Rush Success Rate:  0.2535211267605634
Rush Opp Rate:  0.352112676056338
Hlt Yards Per Opp:  8.11
Stuff Rate:  0.34507042253521125
Rush Exp Rate:  0.056338028169014086


In [275]:
def calculate_havoc_stats(team, offense):
    selected_team_plays = base_pbp_data[(base_pbp_data.offense == team)] if offense else base_pbp_data[(base_pbp_data.defense == team)]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Havoc Stats for {team}")
    selected_team_tfl = selected_team_plays[(selected_team_plays.yards_gained < 0) & ~(selected_team_plays.play_text.str.lower().str.contains('penalty'))]
    selected_team_sacks = selected_team_plays[(selected_team_plays.play_type.str.contains('Sack'))]
    selected_team_pbu = selected_team_plays[((selected_team_plays.play_type == 'Pass Incompletion')
    & (selected_team_plays.play_text.str.contains('broken up', regex=False))) ]
    selected_team_ff = selected_team_plays[(selected_team_plays.play_type == 'Fumble Recovery (Opponent)')]
    print(f"TFL: {len(selected_team_tfl)}")
    print(f"Sacks: {len(selected_team_sacks)}")
    print(f"PBU: {len(selected_team_pbu)}")
    print(f"FF: {len(selected_team_ff)}")
    print(f"Havoc Rate: {verify_division(len(selected_team_tfl) + len(selected_team_pbu) + len(selected_team_ff), len(selected_team_plays))}")
    
calculate_havoc_stats(selected_team, False)
print("\n---\n")
calculate_havoc_stats(selected_team, True)

Defensive Havoc Stats for Clemson
TFL: 38
Sacks: 18
PBU: 5
FF: 1
Havoc Rate: 0.16666666666666666

---

Offensive Havoc Stats for Clemson
TFL: 22
Sacks: 5
PBU: 2
FF: 2
Havoc Rate: 0.087248322147651


In [276]:
##### 
def generate_passing_lines(downs, team, offense):
    team_downs = downs[downs.offense == team] if offense else downs[downs.defense == team]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Passing Line for {team}")
    team_passes = team_downs[(team_downs.play_type == 'Pass Reception')
    | (team_downs.play_type == 'Passing Touchdown')
    | (team_downs.play_type == 'Pass Incompletion')
    | (team_downs.play_type.str.contains('Interception'))]
    team_completions = team_passes[(team_passes.play_type == "Pass Reception") | (team_passes.play_type == "Passing Touchdown")]
    team_sacks = team_downs[team_downs.play_type.str.lower().str.contains('sack')]
    print("Comp:",len(team_completions))
    print("Att:",len(team_passes))
    print("Yards:",sum(team_completions.yards_gained))
    print("INT:",len(team_passes[team_passes.play_type.str.contains('Interception')]))
    print("Sacks:",len(team_sacks))
    print("Sack Yds:",sum(team_sacks.yards_gained))
    print("Pass Exp%:", verify_division(len(team_passes[team_passes.play_explosive == True]), len(team_passes)))
    
print("Overall")
generate_passing_lines(base_pbp_data, selected_team, True)
print("\n---\n")
generate_passing_lines(base_pbp_data, selected_team, False)
print("")

print("Standard Downs")
generate_passing_lines(standard_downs, selected_team, True)
print("\n---\n")
generate_passing_lines(standard_downs, selected_team, False)
print("")

print("Passing Downs")
generate_passing_lines(passing_downs, selected_team, True)
print("\n---\n")
generate_passing_lines(passing_downs, selected_team, False)

Overall
Offensive Passing Line for Clemson
Comp: 100
Att: 146
Yards: 1225
INT: 1
Sacks: 5
Sack Yds: -40
Pass Exp%: 0.2328767123287671

---

Defensive Passing Line for Clemson
Comp: 58
Att: 120
Yards: 775
INT: 5
Sacks: 18
Sack Yds: -100
Pass Exp%: 0.15

Standard Downs
Offensive Passing Line for Clemson
Comp: 49
Att: 65
Yards: 571
INT: 0
Sacks: 2
Sack Yds: -9
Pass Exp%: 0.27692307692307694

---

Defensive Passing Line for Clemson
Comp: 15
Att: 38
Yards: 130
INT: 2
Sacks: 3
Sack Yds: -13
Pass Exp%: 0.07894736842105263

Passing Downs
Offensive Passing Line for Clemson
Comp: 31
Att: 49
Yards: 437
INT: 0
Sacks: 2
Sack Yds: -22
Pass Exp%: 0.22448979591836735

---

Defensive Passing Line for Clemson
Comp: 24
Att: 42
Yards: 339
INT: 2
Sacks: 7
Sack Yds: -44
Pass Exp%: 0.19047619047619047


In [277]:
roster = pd.read_json(f"https://api.collegefootballdata.com/player/search?searchTerm={html.escape(' ').replace(' ', '%20')}&team={html.escape(selected_team).replace(' ', '%20')}")
# roster

def generate_position_usage(plays, position, team):
    players = roster[roster.position == position]
    initials = []
    for indx, p in players.iterrows():
        initials.append(f"{p['firstName'][0]}\. {p['lastName']}")
    initl_join = "|".join(initials)
    regex_join = "|".join(players.name)
    comb_join = f"{initl_join}|{regex_join}"
    usg = plays[(plays.play_text.str.contains(comb_join, regex=True))]
    return usg
    
def calculate_havoc_portion(position, team):
    havoc_total = base_pbp_data[
        (base_pbp_data.defense == team)
        &
        (((base_pbp_data.play_type == 'Pass Incompletion')
        & (base_pbp_data.play_text.str.contains('broken up', regex=False)))
        | (base_pbp_data.play_type == 'Fumble Recovery (Opponent)')
        | (base_pbp_data.play_type == 'Sack')
        | (base_pbp_data.play_type.str.contains('Interception', regex=False))
        | (base_pbp_data.yards_gained < 0))
        & (base_pbp_data.play_type != 'Penalty')
    ]
    pos_usg = generate_position_usage(havoc_total, position, team)
    print(f"Havoc Portion for {team}'s {position} Unit: {verify_division(len(pos_usg),len(havoc_total))} ({len(pos_usg)}/{len(havoc_total)})")
    
calculate_havoc_portion('DL',selected_team)
calculate_havoc_portion('LB',selected_team)
calculate_havoc_portion('DB',selected_team)

Havoc Portion for Clemson's DL Unit: 1.0 (49/49)
Havoc Portion for Clemson's LB Unit: 0.1836734693877551 (9/49)
Havoc Portion for Clemson's DB Unit: 0.0 (0/49)


In [278]:
def generate_play_breakdowns(base_plays,team, offense):
    plays = base_plays[(base_plays.offense == team)] if offense else base_plays[(base_plays.defense == team)]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Play Breakdown for {team}")
    print(f"Run %:",(100 * verify_division(len(plays[plays.play_type.isin(rush_types)]), len(plays))))
    print(f"Run SR%:",(100 * verify_division(len(plays[(plays.play_type.isin(rush_types)) & (plays.play_successful == True)]), len(plays[(plays.play_type.isin(rush_types))]))))
    print(f"Pass %:",(100 * verify_division(len(plays[plays.play_type.isin(pass_types)]), len(plays))))
    print(f"Pass SR%:",(100 * verify_division(len(plays[(plays.play_type.isin(pass_types)) & (plays.play_successful == True)]), len(plays[(plays.play_type.isin(pass_types))]))))
    print(f"Other %:",(100 * verify_division(len(plays[(~plays.play_type.isin(pass_types)) & (~plays.play_type.isin(rush_types))]), len(plays))))

print("Overall")
generate_play_breakdowns(base_pbp_data, selected_team, True)
print("\n")
generate_play_breakdowns(base_pbp_data, selected_team, False)
    
print("\n---\nStandard Downs")
generate_play_breakdowns(standard_downs, selected_team, True)
print("")
generate_play_breakdowns(standard_downs, selected_team, False)    
    
print("\n---\nPassing Downs")
generate_play_breakdowns(passing_downs, selected_team, True)
print("\n")
generate_play_breakdowns(passing_downs, selected_team, False)


print("\n---\n1st Half")
# generate_play_breakdowns(base_pbp_data[(base_pbp_data.period == 1) | (base_pbp_data.period == 2)], selected_team, True)
# print("\n")
generate_play_breakdowns(pbp_data[(pbp_data.period == 1) | (pbp_data.period == 2)], selected_team, False)

print("\n---\n2nd Half")
# generate_play_breakdowns(base_pbp_data[(base_pbp_data.period == 3) | (base_pbp_data.period == 4)], selected_team, True)
# print("\n")
generate_play_breakdowns(pbp_data[(pbp_data.period == 3) | (pbp_data.period == 4)], selected_team, False)


Overall
Offensive Play Breakdown for Clemson
Run %: 50.33557046979866
Run SR%: 48.0
Pass %: 48.99328859060403
Pass SR%: 46.57534246575342
Other %: 0.6711409395973155


Defensive Play Breakdown for Clemson
Run %: 54.166666666666664
Run SR%: 25.174825174825177
Pass %: 45.45454545454545
Pass SR%: 34.166666666666664
Other %: 0.3787878787878788

---
Standard Downs
Offensive Play Breakdown for Clemson
Run %: 57.23684210526315
Run SR%: 58.620689655172406
Pass %: 42.76315789473684
Pass SR%: 58.46153846153847
Other %: 0.0

Defensive Play Breakdown for Clemson
Run %: 56.666666666666664
Run SR%: 37.254901960784316
Pass %: 42.22222222222222
Pass SR%: 28.947368421052634
Other %: 1.1111111111111112

---
Passing Downs
Offensive Play Breakdown for Clemson
Run %: 26.865671641791046
Run SR%: 22.22222222222222
Pass %: 73.13432835820896
Pass SR%: 42.857142857142854
Other %: 0.0


Defensive Play Breakdown for Clemson
Run %: 43.24324324324324
Run SR%: 15.625
Pass %: 56.75675675675676
Pass SR%: 28.5714285714

In [279]:
for i in range(1,5):
    print(f"---\nPlay Calling on Down {i}")
    generate_play_breakdowns(base_pbp_data[base_pbp_data.down == i], selected_team, True)
    print("\n")
    generate_play_breakdowns(base_pbp_data[base_pbp_data.down == i], selected_team, False)
    print("\n")

---
Play Calling on Down 1
Offensive Play Breakdown for Clemson
Run %: 57.03703703703704
Run SR%: 49.35064935064935
Pass %: 42.96296296296296
Pass SR%: 46.55172413793103
Other %: 0.0


Defensive Play Breakdown for Clemson
Run %: 55.33980582524271
Run SR%: 21.052631578947366
Pass %: 43.689320388349515
Pass SR%: 35.55555555555556
Other %: 0.9708737864077669


---
Play Calling on Down 2
Offensive Play Breakdown for Clemson
Run %: 43.43434343434344
Run SR%: 41.86046511627907
Pass %: 56.56565656565656
Pass SR%: 42.857142857142854
Other %: 0.0


Defensive Play Breakdown for Clemson
Run %: 47.12643678160919
Run SR%: 21.951219512195124
Pass %: 52.87356321839081
Pass SR%: 34.78260869565217
Other %: 0.0


---
Play Calling on Down 3
Offensive Play Breakdown for Clemson
Run %: 47.540983606557376
Run SR%: 51.724137931034484
Pass %: 50.81967213114754
Pass SR%: 54.83870967741935
Other %: 1.639344262295082


Defensive Play Breakdown for Clemson
Run %: 56.92307692307692
Run SR%: 27.027027027027028
Pass

In [280]:
yard_ranges = [[0,3], [4,7], [8,100]]
for yards in yard_ranges:
    print(f"---\nPlay Calling when Yards to Go between {yards}")
    generate_play_breakdowns(base_pbp_data[(base_pbp_data.distance.between(yards[0],yards[1]))], selected_team, True)
    print("\n")
    generate_play_breakdowns(base_pbp_data[(base_pbp_data.distance.between(yards[0],yards[1]))], selected_team, False)
    print("\n")

---
Play Calling when Yards to Go between [0, 3]
Offensive Play Breakdown for Clemson
Run %: 73.91304347826086
Run SR%: 70.58823529411765
Pass %: 21.73913043478261
Pass SR%: 50.0
Other %: 4.3478260869565215


Defensive Play Breakdown for Clemson
Run %: 69.6969696969697
Run SR%: 52.17391304347826
Pass %: 30.303030303030305
Pass SR%: 60.0
Other %: 0.0


---
Play Calling when Yards to Go between [4, 7]
Offensive Play Breakdown for Clemson
Run %: 45.28301886792453
Run SR%: 45.83333333333333
Pass %: 54.71698113207547
Pass SR%: 58.620689655172406
Other %: 0.0


Defensive Play Breakdown for Clemson
Run %: 53.125
Run SR%: 29.411764705882355
Pass %: 46.875
Pass SR%: 26.666666666666668
Other %: 0.0


---
Play Calling when Yards to Go between [8, 100]
Offensive Play Breakdown for Clemson
Run %: 46.231155778894475
Run SR%: 40.21739130434783
Pass %: 53.768844221105525
Pass SR%: 42.99065420560748
Other %: 0.0


Defensive Play Breakdown for Clemson
Run %: 51.75879396984925
Run SR%: 18.446601941747574

In [281]:
ep_data = pd.read_csv('results/ep.csv', sep=",", encoding='iso-8859-1')
def generate_epa_comparison(base_plays, team, offense):
    plays = base_plays[base_plays.offense == team] if offense else base_plays[base_plays.defense  == team]
    drvs = base_drives[(base_drives.offense == team) & (base_drives.drive_id.isin(plays.drive_id))] if offense else base_drives[(base_drives.defense  == team) & (base_drives.drive_id.isin(plays.drive_id))]
    sample_pbp = plays.copy()
    sample_pbp['epa'] = sample_pbp.apply(lambda x: ep_data.iloc[(min(x.yard_line + x.yards_gained, 100))].ep - ep_data.iloc[(x.yard_line)].ep, axis=1)
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} EPA Breakdown for {team}")
    print(f"Total EPA: {sample_pbp.epa.sum()}")
    print(f"EPA/Drive: {sample_pbp.epa.sum() / len(drvs)}")
    print(f"Avg EPA: {sample_pbp.epa.mean()}")
    print(f"Median EPA: {sample_pbp.epa.median()}")
    print(f"Avg Rush EPA: {sample_pbp[(sample_pbp.play_type.isin(rush_types))].epa.mean()}")
    print(f"Mdn Rush EPA: {sample_pbp[(sample_pbp.play_type.isin(rush_types))].epa.median()}")
    print(f"Avg Pass EPA: {sample_pbp[(sample_pbp.play_type.isin(pass_types))].epa.mean()}")
    print(f"Mdn Pass EPA: {sample_pbp[(sample_pbp.play_type.isin(pass_types))].epa.median()}")
    print(f"Total IsoPPP: {sample_pbp[sample_pbp.play_successful == True].epa.sum()}")
    print(f"Avg IsoPPP: {sample_pbp[sample_pbp.play_successful == True].epa.mean()}")
    print(f"Median IsoPPP: {sample_pbp[sample_pbp.play_successful == True].epa.median()}")
    print(f"Avg Rush IsoPPP: {sample_pbp[(sample_pbp.play_type.isin(rush_types)) & (sample_pbp.play_successful == True)].epa.mean()}")
    print(f"Mdn Rush IsoPPP: {sample_pbp[(sample_pbp.play_type.isin(rush_types)) & (sample_pbp.play_successful == True)].epa.median()}")
    print(f"Avg Pass IsoPPP: {sample_pbp[(sample_pbp.play_type.isin(pass_types)) & (sample_pbp.play_successful == True)].epa.mean()}")
    print(f"Mdn Pass IsoPPP: {sample_pbp[(sample_pbp.play_type.isin(pass_types)) & (sample_pbp.play_successful == True)].epa.median()}")
    
    
print("Overall")
generate_epa_comparison(base_pbp_data, selected_team, True)
print("\n")
generate_epa_comparison(base_pbp_data, selected_team, False)
    
print("\n---\nStandard Downs")
generate_epa_comparison(standard_downs, selected_team, True)
print("")
generate_epa_comparison(standard_downs, selected_team, False)    
    
print("\n---\nPassing Downs")
generate_epa_comparison(passing_downs, selected_team, True)
print("\n")
generate_epa_comparison(passing_downs, selected_team, False)

Overall
Offensive EPA Breakdown for Clemson
Total EPA: 102.8632677226635
EPA/Drive: 1.9781397638973748
Avg EPA: 0.34517875074719295
Median EPA: 0.15978730430554977
Avg Rush EPA: 0.2725529557725266
Mdn Rush EPA: 0.15872928226651806
Avg Pass EPA: 0.42551621861145883
Mdn Pass EPA: 0.1728632533558221
Total IsoPPP: 90.23095512701705
Avg IsoPPP: 0.6445068223358366
Median IsoPPP: 0.4966330732076014
Avg Rush IsoPPP: 0.4307967122531595
Mdn Rush IsoPPP: 0.3259575296587086
Avg Pass IsoPPP: 0.8707881153645528
Mdn Pass IsoPPP: 0.6930601984689522


Defensive EPA Breakdown for Clemson
Total EPA: 66.28476356138574
EPA/Drive: 1.2997012463016813
Avg EPA: 0.251078649853734
Median EPA: 0.06170515667248733
Avg Rush EPA: 0.1580631464571448
Mdn Rush EPA: 0.07594670703727946
Avg Pass EPA: 0.3645190282773674
Mdn Pass EPA: 0.0
Total IsoPPP: 58.02862775004001
Avg IsoPPP: 0.7536185422083121
Median IsoPPP: 0.5700642453540996
Avg Rush IsoPPP: 0.5530119016066182
Mdn Rush IsoPPP: 0.4245369376612722
Avg Pass IsoPPP: 0

In [282]:
def generate_ppa_comparison(base_plays, team, offense):
    plays = base_plays[base_plays.offense == team] if offense else base_plays[base_plays.defense  == team]
    drvs = base_drives[(base_drives.offense == team) & (base_drives.drive_id.isin(plays.drive_id))] if offense else base_drives[(base_drives.defense  == team) & (base_drives.drive_id.isin(plays.drive_id))]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} CFBData.com PPA Breakdown for {team}")
    print(f"Total PPA: {plays.ppa.sum()}")
    print(f"PPA/Drive: {plays.ppa.sum() / len(drvs)}")
    print(f"Avg PPA: {plays.ppa.mean()}")
    print(f"Median PPA: {plays.ppa.median()}")
    print(f"Avg Rush PPA: {plays[(plays.play_type.isin(rush_types))].ppa.mean()}")
    print(f"Mdn Rush PPA: {plays[(plays.play_type.isin(rush_types))].ppa.median()}")
    print(f"Avg Pass PPA: {plays[(plays.play_type.isin(pass_types))].ppa.mean()}")
    print(f"Mdn Pass PPA: {plays[(plays.play_type.isin(pass_types))].ppa.median()}")
    
        
print("Overall")
generate_ppa_comparison(base_pbp_data, selected_team, True)
print("\n")
generate_ppa_comparison(base_pbp_data, selected_team, False)
    
print("\n---\nStandard Downs")
generate_ppa_comparison(standard_downs, selected_team, True)
print("")
generate_ppa_comparison(standard_downs, selected_team, False)    
    
print("\n---\nPassing Downs")
generate_ppa_comparison(passing_downs, selected_team, True)
print("\n")
generate_ppa_comparison(passing_downs, selected_team, False)

Overall
Offensive CFBData.com PPA Breakdown for Clemson
Total PPA: 97.92401133280147
PPA/Drive: 1.8831540640923359
Avg PPA: 0.33421164277406623
Median PPA: -0.061871630862047
Avg Rush PPA: 0.19078453805472892
Mdn Rush PPA: -0.0558269791892986
Avg Pass PPA: 0.5014710257238408
Mdn Pass PPA: -0.0606605607881895


Defensive CFBData.com PPA Breakdown for Clemson
Total PPA: -2.004163468944533
PPA/Drive: -0.03929732292048104
Avg PPA: -0.00770832103440203
Median PPA: -0.2847274108109399
Avg Rush PPA: -0.1334448448123522
Mdn Rush PPA: -0.3057652594509188
Avg Pass PPA: 0.14455132758990097
Mdn Pass PPA: -0.2343610470075812

---
Standard Downs
Offensive CFBData.com PPA Breakdown for Clemson
Total PPA: 52.75255042840358
PPA/Drive: 1.3882250112737784
Avg PPA: 0.35168366952269065
Median PPA: 0.0894443857413222
Avg Rush PPA: 0.29708839805543324
Mdn Rush PPA: 0.0801976735266375
Avg Pass PPA: 0.4230774860567963
Mdn Pass PPA: 0.12588335328053502

Defensive CFBData.com PPA Breakdown for Clemson
Total PPA:

In [283]:
base_pbp_data[(base_pbp_data.play_type.isin(rush_types))].head(25)
# Pre-2017
# Offense
# (QB) pass incomplete to (WR)
# (QB) pass complete to (WR)
# (QB) pass incomplete to (WR), broken up by (defender)
# (RB) run for # yds

# 2017+
# Offense
# get list of seniors/redshirt juniors for team
# get list of top 11 offensive players in terms of PPA or usage for year; sum PPA or usage
    # of players, find graduates/draftees and sum PPA or usage
    # subtract draftee PPA or usage from total and get % of left over PPA or usage

# Pre and post 2017
# Defense
# measure attrition by number of havoc plays missing


# OR 

# 2012-2017
# Offense/Defense
# Sum PPA for team
# Calculate seniors/draftees' plays involved and sum PPA (roster info spotty around 2014, year == 4)
# subtract draftee PPA or usage from total and get % of left over PPA or usage

Unnamed: 0,id,offense,offense_conference,defense,defense_conference,home,away,offense_score,defense_score,game_id,...,yards_to_goal,down,distance,scoring,yards_gained,play_type,play_text,ppa,play_successful,play_explosive
1840,401234604101878402,Clemson,ACC,Miami,ACC,Clemson,Miami,0,0,401234604,...,43,1,10.0,False,0,Rush,Travis Etienne run for no gain to the MiaFl 43,-0.939169,False,False
1842,401234604101896101,Clemson,ACC,Miami,ACC,Clemson,Miami,0,0,401234604,...,39,3,6.0,False,2,Rush,Trevor Lawrence run for 2 yds to the MiaFl 37,-0.623262,False,False
1844,401234604101898901,Clemson,ACC,Miami,ACC,Clemson,Miami,0,0,401234604,...,32,1,10.0,False,8,Rush,Travis Etienne run for 8 yds to the MiaFl 24,0.530754,True,False
1847,401234604101907901,Miami,ACC,Clemson,ACC,Clemson,Miami,0,7,401234604,...,75,1,10.0,False,-5,Sack,D'Eriq King sacked by Baylon Spector for a los...,-1.067074,False,False
1854,401234604101926901,Miami,ACC,Clemson,ACC,Clemson,Miami,0,7,401234604,...,47,3,10.0,False,-8,Sack,D'Eriq King sacked by Myles Murphy and Baylon ...,-1.439201,False,False
1856,401234604101935101,Clemson,ACC,Miami,ACC,Clemson,Miami,7,0,401234604,...,89,1,10.0,False,5,Rush,Travis Etienne run for 5 yds to the Clem 16,0.071184,True,False
1860,401234604101954401,Clemson,ACC,Miami,ACC,Clemson,Miami,7,0,401234604,...,76,1,10.0,False,10,Rush,Lyn-J Dixon run for 10 yds to the Clem 34 for ...,,True,False
1861,401234604101957001,Clemson,ACC,Miami,ACC,Clemson,Miami,7,0,401234604,...,66,1,10.0,False,14,Rush,D.J. Uiagalelei run for 14 yds to the Clem 48 ...,1.328715,True,False
1863,401234604101965701,Clemson,ACC,Miami,ACC,Clemson,Miami,7,0,401234604,...,52,2,10.0,False,1,Rush,Lyn-J Dixon run for 1 yd to the Clem 49,-0.80295,False,False
1867,401234604101978801,Clemson,ACC,Miami,ACC,Clemson,Miami,7,0,401234604,...,34,1,10.0,False,5,Rush,Travis Etienne run for 5 yds to the MiaFl 29,0.08068,True,False
