## Team Stat Calculations
Author: Akshay Easwaran <akeaswaran@me.com>

---
Based on work by ESPN's Bill Connelly, Football Outsiders, and Football Study Hall

Data from CollegeFootballData.com

Useful Resources for Stat Definitions:
- Football Study Hall website: https://www.footballstudyhall.com/
- Football Study Hall book: https://www.amazon.com/Study-Hall-College-Football-Stories/dp/1484989961
- Football Outsiders: https://www.footballoutsiders.com/info/glossary
- Example Advanced Box Score: https://twitter.com/ESPN_BillC/status/1176572006969597954/photo/1

**How to use this Notebook**

1. Dump a team's play by play data, game data, and drive data from CollegeFootballData.com.
2. Change the file names in the next cell to match where your data files are located.
3. Change the away and home team names appropriately.
4. Hit Cell > Run All Cells.

In [81]:
import requests
import pandas as pd
import json
import html
import os.path

selected_team = 'Georgia Tech'
total_points = 200
opp_total_points = 389

def verify_division(num1, num2):
    return 0 if num2 == 0 else (num1 / num2)

def retrieveCfbData(endpoint, team):
    file_path = f"data/{endpoint if (endpoint != 'plays') else 'pbp'}/{endpoint[:-1] if (endpoint != 'plays') else 'pbp'}-data-{team.lower().replace(' ','-')}.json"
    if (os.path.exists(file_path)):
        return file_path
    res = requests.get(f"https://api.collegefootballdata.com/{endpoint}?seasonType=regular&year=2019&team={html.escape(team)}")
    content = res.json()
    with open(file_path, 'w') as f:
        json.dump(content, f)
    return json.dumps(content)
    

pbp_data = pd.read_json(retrieveCfbData('plays',selected_team))
# pbp_data.info()

base_drives = pd.read_json(retrieveCfbData('drives',selected_team))
base_drives.drop(['offense_conference','start_time','end_time','defense_conference','elapsed','start_period','end_period'], axis = 1, inplace=True) 
base_drives = base_drives[
    ~base_drives.drive_result.isin(['END OF HALF','END OF GAME','Uncategorized'])
]

games = pd.read_json(retrieveCfbData('games',selected_team))

In [82]:
# Data Cleaning
# Fix the bad yard line markers for away teams
drives = pd.merge(base_drives, games[['id','away_team','home_team']], left_on='game_id', right_on='id', how='right')
drives.rename(columns={'id_x':'drive_id'}, inplace=True)
drives.drop(['id_y'], axis = 1, inplace=True)
drives.dropna(inplace=True)
drives.loc[
    drives.offense == drives.away_team, ['start_yardline']
] = 100 - drives.start_yardline
drives.loc[
    drives.offense == drives.away_team, ['end_yardline']
] = 100 - drives.end_yardline

print("Total Drives:", len(drives))
# drives

Total Drives: 275


In [83]:
pbp_data = pbp_data[
    (pbp_data.down != 0)
]

import numpy as np

pbp_data.distance = pbp_data.distance.astype(float)

bad_types = ["Interception","Pass Interception Return","Interception Return Touchdown",'Fumble Recovery (Opponent)','Sack','Fumble Return Touchdown']

def is_successful_vector(play):
    if (play.play_type in bad_types):
        return False 
    if ((play.down == 1) & (play.yards_gained >= (0.5 * play.distance))):
        return True
    elif ((play.down == 2)) & (play.yards_gained >= (0.7 * play.distance)):
        return True
    elif ((play.down == 3) & (play.yards_gained >= play.distance)):
        return True
    elif ((play.down == 4) & (play.yards_gained >= play.distance)):
        return True
    else:
        return False
    
pbp_data['play_successful'] = pbp_data.apply(lambda x: is_successful_vector(x), axis=1)
pbp_data['play_explosive'] = pbp_data.apply(lambda x: x.yards_gained >= 15, axis=1)

# Drop some unnecessary columns
pbp_data.drop(['offense_conference','defense_conference','clock'], axis = 1, inplace=True) 
# Ignore some types of plays cause they're special teams and weird
ignore_types = ["Defensive 2pt Conversion","Blocked Field Goal","Blocked Punt","Missed Field Goal Return","Blocked Punt Touchdown","Missed Field Goal Return Touchdown","Extra Point Missed","Extra Point Good","Timeout","End of Half","End of Game","Uncategorized","Penalty","Kickoff","Kickoff Return (Offense)","Kickoff Return Touchdown","Punt", "Field Goal Good","Field Goal Missed","Safety"]
pbp_data = pbp_data[~(pbp_data.play_type.isin(ignore_types))]
base_pbp_data = pbp_data.copy()

# Eliminate garbage time plays
garbage_time_data = pbp_data[
    (pbp_data.down == 0)
    | ((abs(pbp_data.offense_score - pbp_data.defense_score) >= 38) & (pbp_data.period == 2))
    | ((abs(pbp_data.offense_score - pbp_data.defense_score) >= 28) & (pbp_data.period == 3))
    | ((abs(pbp_data.offense_score - pbp_data.defense_score) >= 22) & (pbp_data.period == 4))
]

print("Total Plays:", len(base_pbp_data))
print("Garbage Time Plays:", len(garbage_time_data))
print("% of plays in garbage time:", len(garbage_time_data)/len(pbp_data))
pbp_data.drop(garbage_time_data.index, inplace=True)
print("Non-Garbage Time Plays:", len(pbp_data))

# print("Total clean plays:", len(pbp_data))

Total Plays: 1586
Garbage Time Plays: 209
% of plays in garbage time: 0.1317780580075662
Non-Garbage Time Plays: 1377


In [123]:
pass_types = ["Pass Reception","Pass Incompletion","Passing Touchdown","Interception","Pass Interception Return","Interception Return Touchdown"]
rush_types = ["Rush","Rushing Touchdown",'Fumble Recovery (Opponent)','Sack','Fumble Return Touchdown']

print("Offensive Success Rates")
print(selected_team)
print("Overall:",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_successful == True)]) / len(base_pbp_data[(base_pbp_data.offense == selected_team)]))
print("Passing:",len(pbp_data[(pbp_data.offense == selected_team) & (pbp_data.play_successful == True) & (pbp_data.play_type.isin(pass_types))]) / len(pbp_data[(pbp_data.offense == selected_team) & (pbp_data.play_type.isin(pass_types))]))
print("Rushing:",len(pbp_data[(pbp_data.offense == selected_team) & (pbp_data.play_successful == True) & (pbp_data.play_type.isin(rush_types))]) / len(pbp_data[(pbp_data.offense == selected_team) & (pbp_data.play_type.isin(rush_types))]))
print("")
print("Success Rates Allowed")
print(selected_team)
print("Overall:",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_successful == True)]) / len(base_pbp_data[(base_pbp_data.defense == selected_team)]))
print("Passing:",len(pbp_data[(pbp_data.defense == selected_team) & (pbp_data.play_successful == True) & (pbp_data.play_type.isin(pass_types))]) / len(pbp_data[(pbp_data.defense == selected_team) & (pbp_data.play_type.isin(pass_types))]))
print("Rushing:",len(pbp_data[(pbp_data.defense == selected_team) & (pbp_data.play_successful == True) & (pbp_data.play_type.isin(rush_types))]) / len(pbp_data[(pbp_data.defense == selected_team) & (pbp_data.play_type.isin(rush_types))]))

Offensive Success Rates
Georgia Tech
Overall: 0.3542538354253835
Passing: 0.3183856502242152
Rushing: 0.37142857142857144

Success Rates Allowed
Georgia Tech
Overall: 0.42692750287687
Passing: 0.4115755627009646
Rushing: 0.4337078651685393


In [85]:
downs = [1, 2, 3, 4]
print("Success Rates on Specific Downs (Off/Allowed)")
print(selected_team)
for d in downs:
    intermed = base_pbp_data[
        (base_pbp_data.down == d)
    ]
    print(f"Down {d}: {len(intermed[(intermed.play_successful == True) & (intermed.offense == selected_team)]) / len(intermed[(intermed.offense == selected_team)])} / {len(intermed[(intermed.play_successful == True) & (intermed.defense == selected_team)]) / len(intermed[(intermed.defense == selected_team)])}")

    print("")
    
print("Success Rate by Q")
for i in range(1, base_pbp_data.period.nunique()+1):
    print(f"Q{i}:", len(base_pbp_data[(base_pbp_data.period == i) & (base_pbp_data.offense == selected_team) & (base_pbp_data.play_successful == True)]) / len(base_pbp_data[(base_pbp_data.period == i) & (base_pbp_data.offense == selected_team)]),f" / ", len(base_pbp_data[(base_pbp_data.period == i) & (base_pbp_data.defense == selected_team) & (base_pbp_data.play_successful == True)]) / len(base_pbp_data[(base_pbp_data.period == i) & (base_pbp_data.defense == selected_team)]))


Success Rates on Specific Downs (Off/Allowed)
Georgia Tech
Down 1: 0.3848684210526316 / 0.4166666666666667

Down 2: 0.33766233766233766 / 0.40625

Down 3: 0.3090909090909091 / 0.4550561797752809

Down 4: 0.47058823529411764 / 0.6842105263157895

Success Rate by Q
Q1: 0.3107344632768362  /  0.46017699115044247
Q2: 0.36065573770491804  /  0.4204081632653061
Q3: 0.32941176470588235  /  0.42574257425742573
Q4: 0.4166666666666667  /  0.41397849462365593
Q5: 0.2857142857142857  /  0.1


In [86]:
# Explosiveness rates
# Defined as rate of 15+ yards gains

print("Explosiveness Plays (Rates)")
print(selected_team)
print("Overall:",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True)]),"/", len(base_pbp_data[(base_pbp_data.offense == selected_team)]),"(",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True)]) / len(base_pbp_data[(base_pbp_data.offense == selected_team)]),")")
print("Passing:",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(pass_types))]),"/", len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_type.isin(pass_types))]),"(",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(pass_types))]) / len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_type.isin(pass_types))]),")")
print("Rushing:",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(rush_types))]),"/", len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_type.isin(rush_types))]),"(",len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(rush_types))]) / len(base_pbp_data[(base_pbp_data.offense == selected_team) & (base_pbp_data.play_type.isin(rush_types))]),")")
print("")
print("Explosiveness Plays Allowed (Rates)")
print(selected_team)
print("Overall:",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True)]),"/", len(base_pbp_data[(base_pbp_data.defense == selected_team)]),"(",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True)]) / len(base_pbp_data[(base_pbp_data.defense == selected_team)]),")")
print("Passing:",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(pass_types))]),"/", len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_type.isin(pass_types))]),"(",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(pass_types))]) / len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_type.isin(pass_types))]),")")
print("Rushing:",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(rush_types))]),"/", len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_type.isin(rush_types))]),"(",len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_explosive == True) & (base_pbp_data.play_type.isin(rush_types))]) / len(base_pbp_data[(base_pbp_data.defense == selected_team) & (base_pbp_data.play_type.isin(rush_types))]),")")

Explosiveness Plays (Rates)
Georgia Tech
Overall: 79 / 717 ( 0.1101813110181311 )
Passing: 42 / 268 ( 0.15671641791044777 )
Rushing: 37 / 442 ( 0.083710407239819 )

Explosiveness Plays Allowed (Rates)
Georgia Tech
Overall: 95 / 869 ( 0.1093210586881473 )
Passing: 64 / 330 ( 0.19393939393939394 )
Rushing: 31 / 533 ( 0.058161350844277676 )


In [87]:
# Havoc plays
havoc_plays = base_pbp_data[
    (((base_pbp_data.play_type == 'Pass Incompletion')
    & (base_pbp_data.play_text.str.contains('broken up', regex=False)))
    | (base_pbp_data.play_type == 'Fumble Recovery (Opponent)')
    | (base_pbp_data.play_type == 'Sack')
    | (base_pbp_data.play_type.str.contains('Interception', regex=False))
    | (base_pbp_data.yards_gained < 0))
    & (base_pbp_data.play_type != 'Penalty')
]

print(selected_team)
print("Havoc Caused Rate: ", len(havoc_plays[
    havoc_plays.defense == selected_team
]), "/", len(base_pbp_data[
    base_pbp_data.defense == selected_team
]), "(",len(havoc_plays[
    havoc_plays.defense == selected_team
]) / len(base_pbp_data[
    base_pbp_data.defense == selected_team
]),")")
print("Havoc Suffered Rate: ", len(havoc_plays[
    havoc_plays.offense == selected_team
]), "/", len(base_pbp_data[
    base_pbp_data.offense == selected_team
]), "(",len(havoc_plays[
    havoc_plays.offense == selected_team
]) / len(base_pbp_data[
    base_pbp_data.offense == selected_team
]),")")

Georgia Tech
Havoc Caused Rate:  111 / 869 ( 0.1277330264672037 )
Havoc Suffered Rate:  119 / 717 ( 0.16596931659693165 )


In [88]:
selected_team_drives = drives[
    drives.offense == selected_team
]
print(selected_team)
print("Drives:", len(selected_team_drives))
print("Yards:",sum(selected_team_drives.yards))
print("Plays:",sum(selected_team_drives.plays))
print("Avg Starting Field Position:", selected_team_drives.start_yardline.mean())
print("Yards per Play:", sum(selected_team_drives.yards) / sum(selected_team_drives.plays))
print("Plays per Drive:", sum(selected_team_drives.plays) / len(selected_team_drives))
print("Yards per Drive:", sum(selected_team_drives.yards) / len(selected_team_drives))
print("Points per Drive: ", total_points / len(selected_team_drives))

print("")

opp_team_drives = drives[
    drives.defense == selected_team
]

print("Opponents")
print("Drives:", len(opp_team_drives))
print("Yards:",sum(opp_team_drives.yards))
print("Plays:",sum(opp_team_drives.plays))
print("Avg Starting Field Position:", opp_team_drives.start_yardline.mean())
print("Yards per Play:", sum(opp_team_drives.yards) / sum(opp_team_drives.plays))
print("Plays per Drive:", sum(opp_team_drives.plays) / len(opp_team_drives))
print("Yards per Drive:", sum(opp_team_drives.yards) / len(opp_team_drives))
print("Points per Drive: ", opp_total_points / len(opp_team_drives))

Georgia Tech
Drives: 140
Yards: 3355
Plays: 691
Avg Starting Field Position: 26.714285714285715
Yards per Play: 4.855282199710564
Plays per Drive: 4.935714285714286
Yards per Drive: 23.964285714285715
Points per Drive:  1.4285714285714286

Opponents
Drives: 135
Yards: 4908
Plays: 843
Avg Starting Field Position: 33.2962962962963
Yards per Play: 5.822064056939502
Plays per Drive: 6.2444444444444445
Yards per Drive: 36.355555555555554
Points per Drive:  2.8814814814814813


In [89]:
# Measuring success rate for a single player
# pbp_data[
#     (pbp_data.play_text.str.contains("Quentin Harris"))
#     & (pbp_data.play_type.isin(pass_types))
#     & (~pbp_data.play_type.str.contains("Sack"))
# ].play_successful.value_counts(normalize=True)

In [90]:
# Standard vs Passing Downs success rates
# Success rate on standard downs == leverage rate

standard_downs = pbp_data[
    (pbp_data.down == 1)
    | ((pbp_data.down == 2) & (pbp_data.distance <= 7))
    | ((pbp_data.down == 3) & (pbp_data.distance <= 4))
    | ((pbp_data.down == 4) & (pbp_data.distance <= 4)) 
]

passing_downs = pbp_data[
    ((pbp_data.down == 2) & (pbp_data.distance >= 8))
    | ((pbp_data.down == 3) & (pbp_data.distance >= 5))
    | ((pbp_data.down == 4) & (pbp_data.distance >= 5)) 
]

In [91]:
print(selected_team,"Success Rate on Std Downs:",len(standard_downs[
    (standard_downs.offense == selected_team)
    & (standard_downs.play_successful == True)
]) / len(standard_downs[
    (standard_downs.offense == selected_team)
]))
print("Opp Success Rate on Std Downs:",len(standard_downs[
    (standard_downs.defense == selected_team)
    & (standard_downs.play_successful == True)
]) / len(standard_downs[
    (standard_downs.defense == selected_team)
]))

print(selected_team,"Success Rate on Passing Downs:",len(passing_downs[
    (passing_downs.offense == selected_team)
    & (passing_downs.play_successful == True)
]) / len(passing_downs[
    (passing_downs.offense == selected_team)
]))

print("Opp Success Rate on Passing Downs:",len(passing_downs[
    (passing_downs.defense == selected_team)
    & (passing_downs.play_successful == True)
]) / len(passing_downs[
    (passing_downs.defense == selected_team)
]))

Georgia Tech Success Rate on Std Downs: 0.4138817480719794
Opp Success Rate on Std Downs: 0.4537037037037037
Georgia Tech Success Rate on Passing Downs: 0.23893805309734514
Opp Success Rate on Passing Downs: 0.34234234234234234


In [92]:
pass_plays = base_pbp_data[
    base_pbp_data.play_type.isin(pass_types)
]
rush_plays = base_pbp_data[
    base_pbp_data.play_type.isin(rush_types)
]

In [93]:
# Stuff Rate
stuffed_plays = rush_plays[
    (rush_plays.yards_gained <= 0)
    & (rush_plays.play_type != 'Sack')
]

In [94]:
selected_team_stuffs = stuffed_plays[
    stuffed_plays.defense == selected_team
]
print(selected_team,"Defensive Stuff Rate: ",len(selected_team_stuffs),"/",len(rush_plays[rush_plays.defense == selected_team]),"(",len(selected_team_stuffs)/len(rush_plays[rush_plays.defense == selected_team]),")")
print(selected_team,"Defensive Stuffs Suffered Rate: ",len(stuffed_plays[
    stuffed_plays.offense == selected_team
]),"/",len(rush_plays[rush_plays.offense == selected_team]),"(",len(stuffed_plays[
    stuffed_plays.offense == selected_team
])/len(rush_plays[rush_plays.offense == selected_team]),")")


Georgia Tech Defensive Stuff Rate:  77 / 533 ( 0.14446529080675422 )
Georgia Tech Defensive Stuffs Suffered Rate:  92 / 442 ( 0.2081447963800905 )


In [95]:
# Line Yards -- not adjusted for down/distance/opponent/shotgun
def adjust_strength_for_ol(yards_gained):
    if (yards_gained < 0):
        return yards_gained * 1.25
    elif ((yards_gained >= 0) & (yards_gained <= 3)):
        return yards_gained * 1.0
    elif ((yards_gained >= 4) & (yards_gained <= 6)):
        return yards_gained * 0.5
    else:
        return 0

rush_plays['line_yards'] = rush_plays.apply(lambda x: adjust_strength_for_ol(x.yards_gained), axis=1)
rush_plays['highlight_yards'] = rush_plays.apply(lambda x: x.yards_gained - adjust_strength_for_ol(x.yards_gained), axis=1)

# Opportunity Rate
rush_opps = rush_plays[
    (rush_plays.yards_gained >= 4)
]
print(selected_team,"Rush Opp Rate: ",len(rush_opps[rush_opps.offense == selected_team]),"/",len(rush_plays[rush_plays.offense == selected_team]),"(",len(rush_opps[rush_opps.offense == selected_team])/len(rush_plays[rush_plays.offense == selected_team]),")")
print(selected_team,"Rush Opp Allowed Rate: ",len(rush_opps[rush_opps.defense == selected_team]),"/",len(rush_plays[rush_plays.defense == selected_team]),"(",len(rush_opps[rush_opps.defense == selected_team])/len(rush_plays[rush_plays.defense == selected_team]),")")

print(selected_team,"Line Yards per Carry:",sum(rush_plays[rush_plays.offense == selected_team].line_yards) / len(rush_plays[rush_plays.offense == selected_team]))
print(selected_team,"Highlight Yards per Carry:",sum(rush_plays[rush_plays.offense == selected_team].highlight_yards) / len(rush_plays[rush_plays.offense == selected_team]))
print(selected_team,"Yards per Carry:",sum(rush_plays[rush_plays.offense == selected_team].yards_gained) / len(rush_plays[rush_plays.offense == selected_team]))
print(selected_team,"Highlight Yards per Rush:",sum(rush_opps[rush_opps.offense == selected_team].highlight_yards) / len(rush_opps[rush_opps.offense == selected_team]))

print("")

print("Opp Line Yards per Carry:",sum(rush_plays[rush_plays.defense == selected_team].line_yards) / len(rush_plays[rush_plays.defense == selected_team]))
print("Opp Highlight Yards per Carry:",sum(rush_plays[rush_plays.defense == selected_team].highlight_yards) / len(rush_plays[rush_plays.defense == selected_team]))
print("Opp Yards per Carry:",sum(rush_plays[rush_plays.defense == selected_team].yards_gained) / len(rush_plays[rush_plays.defense == selected_team]))
print("Opp Highlight Yards per Rush:",sum(rush_opps[rush_opps.defense == selected_team].highlight_yards) / len(rush_opps[rush_opps.defense == selected_team]))

Georgia Tech Rush Opp Rate:  207 / 442 ( 0.4683257918552036 )
Georgia Tech Rush Opp Allowed Rate:  255 / 533 ( 0.47842401500938087 )
Georgia Tech Line Yards per Carry: 0.18438914027149322
Georgia Tech Highlight Yards per Carry: 4.478506787330317
Georgia Tech Yards per Carry: 4.66289592760181
Georgia Tech Highlight Yards per Rush: 9.227053140096618

Opp Line Yards per Carry: 0.9202626641651032
Opp Highlight Yards per Carry: 4.083489681050657
Opp Yards per Carry: 5.0037523452157595
Opp Highlight Yards per Rush: 8.362745098039216


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


In [96]:
# Expected and Adjusted Turnover Margins

adj_turnover_plays = base_pbp_data[
    (base_pbp_data.play_type.str.contains('Interception', regex=False))
    | ((base_pbp_data.play_type == 'Pass Incompletion')
    & (base_pbp_data.play_text.str.contains('broken up', regex=False)))
    | (base_pbp_data.play_type.str.contains('Fumble', regex=False))
]

adj_fum = 0.5 * len(adj_turnover_plays[
    (adj_turnover_plays.play_type.str.contains('Fumble', regex=False))
])

# selected_team Adj Turnovers
selected_team_tos = adj_turnover_plays[
    (adj_turnover_plays.offense == selected_team)
    | (adj_turnover_plays.defense == selected_team)
]
print(selected_team)
selected_team_ints_def = len(selected_team_tos[
   (selected_team_tos.play_type.str.contains('Interception', regex=False))
    & (selected_team_tos.defense == selected_team)
])

selected_team_ints_off = len(selected_team_tos[
   (selected_team_tos.play_type.str.contains('Interception', regex=False))
    & (selected_team_tos.offense == selected_team)
])

selected_team_pds = len(selected_team_tos[
    (((selected_team_tos.play_type == 'Pass Incompletion') & (selected_team_tos.play_text.str.contains('broken up', regex=False))) 
     | (selected_team_tos.play_type.str.contains('Interception', regex=False)))
    & (selected_team_tos.offense == selected_team)
])

selected_team_fum_rec = selected_team_tos[(selected_team_tos.play_type == 'Fumble Recovery (Opponent)') & (selected_team_tos.defense == selected_team)]
selected_team_fum_lost = selected_team_tos[(selected_team_tos.play_type == 'Fumble Recovery (Opponent)') & (selected_team_tos.offense == selected_team)]

print("Def INTs:",selected_team_ints_def)
print("Off INTs:",selected_team_ints_off)
print("Off PDs:",selected_team_pds)
print("Exp INTs:",0.22 * (selected_team_pds + selected_team_ints_off))
print("Fum Recovered:",len(selected_team_fum_rec))
print("Fum Lost:",len(selected_team_fum_lost))
print("Exp Fum:",adj_fum)
print("Actual TO:",selected_team_ints_off + len(selected_team_fum_lost))
print("Actual TO Margin:",(selected_team_ints_def + len(selected_team_fum_rec) - selected_team_ints_off) - len(selected_team_fum_lost))
selected_team_exp_to = (0.22 * (selected_team_pds + selected_team_ints_off)) + adj_fum
print("Exp TO:",selected_team_exp_to)

Georgia Tech
Def INTs: 7
Off INTs: 10
Off PDs: 25
Exp INTs: 7.7
Fum Recovered: 5
Fum Lost: 8
Exp Fum: 13.0
Actual TO: 18
Actual TO Margin: -6
Exp TO: 20.7


In [97]:
to_luck = (selected_team_ints_off + len(selected_team_fum_lost) - selected_team_exp_to)
print("TO Luck for",selected_team,":",to_luck*5.0)
print("TO Luck/gm for",selected_team,":",(to_luck*5.0 / len(games.notna())))

TO Luck for Georgia Tech : -13.499999999999996
TO Luck/gm for Georgia Tech : -1.1249999999999998


In [98]:
# Scoring Opportunities
# Definition: roughly, any time you get inside the opponent's 40, you should probably score

scoring_opps = drives[
    ((drives.start_yardline + drives.yards) >= 60)
]

# away team's scoring opps
print("Scoring Opportunities (IE: Drives inside Opponent's 40)")
print(selected_team)
selected_team_scoring_opps = scoring_opps[
    scoring_opps.offense == selected_team
]
print("Total:",len(selected_team_scoring_opps))
print("Scored:",len(selected_team_scoring_opps[selected_team_scoring_opps.scoring == True]))
print("Opp Efficiency:",len(selected_team_scoring_opps[selected_team_scoring_opps.scoring == True]) / len(selected_team_scoring_opps))
print("Opps/Drive:",len(selected_team_scoring_opps) / len(drives[drives.offense == selected_team]))
print("Points/Opp:",(len(selected_team_scoring_opps[selected_team_scoring_opps.drive_result == 'TD']) * 7 + len(selected_team_scoring_opps[selected_team_scoring_opps.drive_result == 'FG'] * 3)) / len(selected_team_scoring_opps))

print("")
# Opponents' scoring opps
print("Opponents")
def_scoring_opps = scoring_opps[
    scoring_opps.defense == selected_team
]
print("Total:",len(def_scoring_opps))
print("Scored:",len(def_scoring_opps[def_scoring_opps.scoring == True]))
print("Opp Efficiency:",len(def_scoring_opps[def_scoring_opps.scoring == True]) / len(def_scoring_opps))
print("Opps/Drive:",len(def_scoring_opps) / len(drives[drives.defense == selected_team]))
print("Points/Opp:",(len(def_scoring_opps[def_scoring_opps.drive_result == 'TD']) * 7 + len(def_scoring_opps[def_scoring_opps.drive_result == 'FG'] * 3)) / len(def_scoring_opps))

Scoring Opportunities (IE: Drives inside Opponent's 40)
Georgia Tech
Total: 51
Scored: 28
Opp Efficiency: 0.5490196078431373
Opps/Drive: 0.36428571428571427
Points/Opp: 3.4901960784313726

Opponents
Total: 81
Scored: 61
Opp Efficiency: 0.7530864197530864
Opps/Drive: 0.6
Points/Opp: 4.08641975308642


In [103]:
def calculate_success_in_scoring_opps(opps, team):
    opp_ids = opps.drive_id.unique()
    success = 0
    total = 0
    for opp_id in opp_ids:
        opp_set = base_pbp_data[(base_pbp_data.drive_id == opp_id)]
        opp_s_rate = verify_division(len(opp_set[opp_set.play_successful == True]), len(opp_set))
#         print(f"{team} SR% in opp {opp_id}: {opp_s_rate}")
        success += len(opp_set[opp_set.play_successful == True])
        total += len(opp_set)
    s_rate = 0 if total == 0 else (success / total)
    print(f"{team} total SR% in scoring opps: {s_rate}")

calculate_success_in_scoring_opps(selected_team_scoring_opps, selected_team)
calculate_success_in_scoring_opps(def_scoring_opps, "Opponents")

Georgia Tech total SR% in scoring opps: 0.5116959064327485
Opponents total SR% in scoring opps: 0.5051369863013698


In [107]:
###### Explosiveness rates
# Defined as rate of 15+ yards gains
def generate_exp_rate_stats(team, offense):
    team_plays = base_pbp_data[(base_pbp_data.offense == team)] if offense else base_pbp_data[(base_pbp_data.defense == team)]
    exp_plays = team_plays[(team_plays.play_explosive == True)]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Exp Rate Stats for {team}")
    print(f"Total Exp Plays: {len(exp_plays)}")
    print(f"Overall Exp %: {verify_division(len(exp_plays),len(team_plays))}")
    print(f"Pass Exp %: {verify_division(len(exp_plays[(exp_plays.play_type.isin(pass_types))]),len(team_plays[(team_plays.play_type.isin(pass_types))]))}")
    print(f"Pass Exp % (Std Downs): {verify_division(len(standard_downs[(standard_downs.offense == team) & (standard_downs.play_explosive == True) & (standard_downs.play_type.isin(pass_types))]),len(standard_downs[(standard_downs.offense == team) & (standard_downs.play_type.isin(pass_types))]))}")
    print(f"Pass Exp % (Pas Downs): {verify_division(len(passing_downs[(passing_downs.offense == team) & (passing_downs.play_explosive == True) & (passing_downs.play_type.isin(pass_types))]),len(passing_downs[(passing_downs.offense == team) & (passing_downs.play_type.isin(pass_types))]))}")
    print(f"Rush Exp %: {verify_division(len(exp_plays[(exp_plays.play_type.isin(rush_types))]),len(team_plays[(team_plays.play_type.isin(rush_types))]))}")

print("Explosiveness Rates")
generate_exp_rate_stats(selected_team, True)
print("\n---\n")
generate_exp_rate_stats(selected_team, False)

Explosiveness Rates
Offensive Exp Rate Stats for Georgia Tech
Total Exp Plays: 79
Overall Exp %: 0.1101813110181311
Pass Exp %: 0.15671641791044777
Pass Exp % (Std Downs): 0.1532258064516129
Pass Exp % (Pas Downs): 0.16161616161616163
Rush Exp %: 0.083710407239819

---

Defensive Exp Rate Stats for Georgia Tech
Total Exp Plays: 95
Overall Exp %: 0.1093210586881473
Pass Exp %: 0.19393939393939394
Pass Exp % (Std Downs): 0.1532258064516129
Pass Exp % (Pas Downs): 0.16161616161616163
Rush Exp %: 0.058161350844277676


In [108]:
###### Stop Rate
## Defined as defensive drives that end in turnovers, punts, or turnovers on downs
import re
stop_types = ['TURNOVER ON DOWNS','PUNT','FUMBLE','INT', 'SAFETY']
def generate_stop_rate_stats(team, offense):
    team_drives = drives[(drives.offense == team)] if offense else drives[(drives.defense == team)]
    stop_drives = team_drives[(team_drives.drive_result.str.contains("|".join(stop_types)) == True)]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Stop Stats for {team}")
    print(f"Total: {len(stop_drives)}")
    print(f"Stop Rate %: {verify_division(len(stop_drives),len(team_drives))}")
    print(f"Stops/ScOpp: {verify_division(len(stop_drives),len(team_drives[((team_drives.start_yardline + team_drives.yards) >= 60)]))}")
print("Defensive Stop Rates")
print("---")
generate_stop_rate_stats(selected_team, False)
print("\n---\n")
generate_stop_rate_stats(selected_team, True)

Defensive Stop Rates
---
Defensive Stop Stats for Georgia Tech
Total: 59
Stop Rate %: 0.43703703703703706
Stops/ScOpp: 0.7283950617283951

---

Offensive Stop Stats for Georgia Tech
Total: 99
Stop Rate %: 0.7071428571428572
Stops/ScOpp: 1.9411764705882353


In [110]:
# conv_down_plays = base_pbp_data[base_pbp_data.down <= 4]
def generate_conv_down_stats(team, offense):
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Conv Down Stats for {team}")
    team_off_plays = base_pbp_data[base_pbp_data.offense == team] if offense else base_pbp_data[base_pbp_data.defense == team]
    team_conv_downs = team_off_plays[team_off_plays.down >= 3]
    succ_conv = team_conv_downs[team_conv_downs.yards_gained >= team_conv_downs.distance]
    team_drives = drives[drives.offense == team] if offense else drives[drives.defense == team]
    print(f"Conv Downs: {len(team_conv_downs)}")
    print(f"Yds/CV: {verify_division(sum(team_conv_downs.distance),len(team_conv_downs))}")
    print(f"Conv Down %: {verify_division(len(team_conv_downs),sum(team_drives.plays))}")
    print(f"Conv/Drive: {verify_division(len(team_conv_downs),len(team_drives))}")
    print(f"Succ Conv: {len(succ_conv)}")
    print(f"Succ Conv %: {verify_division(len(succ_conv),len(team_conv_downs))}")
    print(f"Yds/SC: {verify_division(sum(succ_conv.distance),len(succ_conv))}")
    print(f"SC/Drive: {verify_division(len(succ_conv),len(team_drives))}")
    
generate_conv_down_stats(selected_team, True)
print("\n---\n")
generate_conv_down_stats(selected_team, False)
    

Offensive Conv Down Stats for Georgia Tech
Conv Downs: 182
Yds/CV: 7.302197802197802
Conv Down %: 0.2633863965267728
Conv/Drive: 1.3
Succ Conv: 61
Succ Conv %: 0.33516483516483514
Yds/SC: 5.180327868852459
SC/Drive: 0.4357142857142857

---

Defensive Conv Down Stats for Georgia Tech
Conv Downs: 197
Yds/CV: 6.350253807106599
Conv Down %: 0.23368920521945433
Conv/Drive: 1.4592592592592593
Succ Conv: 97
Succ Conv %: 0.49238578680203043
Yds/SC: 4.721649484536083
SC/Drive: 0.7185185185185186


In [113]:
# Rushing Stats
rush_carries = rush_plays[
    (~rush_plays.play_type.isin(['Fumble Recovery (Opponent)','Fumble Return Touchdown']))
]

print("Aggregate Rushing Stats")
def generate_rush_stats(team, offense):
    selected_team_carries = rush_carries[(rush_carries.offense == team)] if offense else rush_carries[(rush_carries.defense == team)]
    selected_team_rush_opps = selected_team_carries[selected_team_carries.yards_gained >= 4]
    selected_team_rush_stuffs = selected_team_carries[selected_team_carries.yards_gained <= 0]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Rush Stats for {team}")
    print("Carries: ",len(selected_team_carries))
    print("Yards: ", sum(selected_team_carries.yards_gained))
    print("Yards Per Carry: ", verify_division(sum(selected_team_carries.yards_gained), len(selected_team_carries)))
    print("Line Yards Per Carry: ", verify_division(sum(selected_team_carries.line_yards), len(selected_team_carries)))
    print("Rush Success Rate: ", verify_division(len(selected_team_carries[selected_team_carries.play_successful == True]), len(selected_team_carries)))
    print("Rush Opp Rate: ", verify_division(len(selected_team_rush_opps), len(selected_team_carries)))
    print("Hlt Yards Per Opp: ", verify_division(sum(selected_team_rush_opps.highlight_yards), len(selected_team_rush_opps)))
    print("Stuff Rate: ", verify_division(len(selected_team_rush_stuffs), len(selected_team_carries)))
    print("Rush Exp Rate: ", verify_division(len(selected_team_carries[selected_team_carries.play_explosive == True]), len(selected_team_carries)))
    
generate_rush_stats(selected_team, True)
print("\n---\n")
generate_rush_stats(selected_team, False)

Aggregate Rushing Stats
Offensive Rush Stats for Georgia Tech
Carries:  434
Yards:  1915
Yards Per Carry:  4.412442396313364
Line Yards Per Carry:  0.23099078341013826
Rush Success Rate:  0.3847926267281106
Rush Opp Rate:  0.4700460829493088
Hlt Yards Per Opp:  8.573529411764707
Stuff Rate:  0.2534562211981567
Rush Exp Rate:  0.08064516129032258

---

Defensive Rush Stats for Georgia Tech
Carries:  528
Yards:  2638
Yards Per Carry:  4.996212121212121
Line Yards Per Carry:  0.9185606060606061
Rush Success Rate:  0.44507575757575757
Rush Opp Rate:  0.4772727272727273
Hlt Yards Per Opp:  8.369047619047619
Stuff Rate:  0.17045454545454544
Rush Exp Rate:  0.058712121212121215


In [122]:
def calculate_havoc_stats(team, offense):
    selected_team_plays = base_pbp_data[(base_pbp_data.offense == team)] if offense else base_pbp_data[(base_pbp_data.defense == team)]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Havoc Stats for {team}")
    selected_team_tfl = selected_team_plays[(selected_team_plays.yards_gained < 0) & ~(selected_team_plays.play_text.str.lower().str.contains('penalty'))]
    selected_team_sacks = selected_team_plays[(selected_team_plays.play_type.str.contains('Sack'))]
    selected_team_pbu = selected_team_plays[((selected_team_plays.play_type == 'Pass Incompletion')
    & (selected_team_plays.play_text.str.contains('broken up', regex=False))) ]
    selected_team_ff = selected_team_plays[(selected_team_plays.play_type == 'Fumble Recovery (Opponent)')]
    print(f"TFL: {len(selected_team_tfl)}")
    print(f"Sacks: {len(selected_team_sacks)}")
    print(f"PBU: {len(selected_team_pbu)}")
    print(f"FF: {len(selected_team_ff)}")
    print(f"Havoc Rate: {verify_division(len(selected_team_tfl) + len(selected_team_pbu) + len(selected_team_ff), len(selected_team_plays))}")
    
calculate_havoc_stats(selected_team, False)
print("\n---\n")
calculate_havoc_stats(selected_team, True)

Defensive Havoc Stats for Georgia Tech
TFL: 71
Sacks: 15
PBU: 26
FF: 5
Havoc Rate: 0.11737629459148446

---

Offensive Havoc Stats for Georgia Tech
TFL: 89
Sacks: 23
PBU: 15
FF: 8
Havoc Rate: 0.15620641562064155


In [None]:
##### 
def generate_passing_lines(downs, team, offense):
    team_downs = downs[downs.offense == team] if offense else downs[downs.defense == team]
    poss = "Offensive" if offense else "Defensive"
    print(f"{poss} Passing Line for {team}")
    team_passes = team_downs[(team_downs.play_type == 'Pass Reception')
    | (team_downs.play_type == 'Passing Touchdown')
    | (team_downs.play_type == 'Pass Incompletion')
    | (team_downs.play_type.str.contains('Interception'))]
    team_completions = team_passes[(team_passes.play_type == "Pass Reception") | (team_passes.play_type == "Passing Touchdown")]
    team_sacks = team_downs[team_downs.play_type.str.lower().str.contains('sack')]
    print("Comp:",len(team_completions))
    print("Att:",len(team_passes))
    print("Yards:",sum(team_completions.yards_gained))
    print("INT:",len(team_passes[team_passes.play_type.str.contains('Interception')]))
    print("Sacks:",len(team_sacks))
    print("Sack Yds:",sum(team_sacks.yards_gained))
    print("Pass Exp%:", verify_division(len(team_passes[team_passes.play_explosive == True]), len(team_passes)))
    
print("Overall")
generate_passing_lines(base_pbp_data, selected_team, True)
print("\n---\n")
generate_passing_lines(base_pbp_data, selected_team, False)
print("")

print("Standard Downs")
generate_passing_lines(standard_downs, selected_team, True)
print("\n---\n")
generate_passing_lines(standard_downs, selected_team, False)
print("")

print("Passing Downs")
generate_passing_lines(passing_downs, selected_team, True)
print("\n---\n")
generate_passing_lines(passing_downs, selected_team, False)

In [156]:
roster = pd.read_json("https://api.collegefootballdata.com/player/search?searchTerm=%20&team=georgia%20tech")
# roster

def generate_position_usage(plays, position, team):
    players = roster[roster.position == position]
    initials = []
    for indx, p in players.iterrows():
        initials.append(f"{p['firstName'][0]}\. {p['lastName']}")
    initl_join = "|".join(initials)
    regex_join = "|".join(players.name)
    comb_join = f"{initl_join}|{regex_join}"
    usg = plays[(plays.play_text.str.contains(comb_join, regex=True))]
    return usg
    
def calculate_havoc_portion(position, team):
    havoc_total = base_pbp_data[
        (base_pbp_data.defense == team)
        &
        (((base_pbp_data.play_type == 'Pass Incompletion')
        & (base_pbp_data.play_text.str.contains('broken up', regex=False)))
        | (base_pbp_data.play_type == 'Fumble Recovery (Opponent)')
        | (base_pbp_data.play_type == 'Sack')
        | (base_pbp_data.play_type.str.contains('Interception', regex=False))
        | (base_pbp_data.yards_gained < 0))
        & (base_pbp_data.play_type != 'Penalty')
    ]
    pos_usg = generate_position_usage(havoc_total, position, team)
    print(f"Havoc Portion for {team}'s {position} Unit: {verify_division(len(pos_usg),len(havoc_total))} ({len(pos_usg)}/{len(havoc_total)})")
    
calculate_havoc_portion('DL','Georgia Tech')
calculate_havoc_portion('LB','Georgia Tech')
calculate_havoc_portion('DB','Georgia Tech')

Havoc Portion for Georgia Tech's DL Unit: 0.13513513513513514 (15/111)
Havoc Portion for Georgia Tech's LB Unit: 0.15315315315315314 (17/111)
Havoc Portion for Georgia Tech's DB Unit: 0.10810810810810811 (12/111)
