In [81]:
import requests
import pandas as pd
import json
import html
import os.path

selected_team = 'Georgia Tech'
consider_garbage_time = False

def retrieveCfbData(endpoint, team, year):
    file_path = f"data/{endpoint if (endpoint != 'plays') else 'pbp'}/{endpoint[:-1] if (endpoint != 'plays') else 'pbp'}-data-{year}-{team.lower().replace(' ','-')}.json"
    if (os.path.exists(file_path)):
        return file_path
    res = requests.get(f"https://api.collegefootballdata.com/{endpoint}?seasonType=regular&year={year}&team={html.escape(team)}")
    content = res.json()
    with open(file_path, 'w') as f:
        json.dump(content, f)
    return json.dumps(content)

base_drives = pd.DataFrame()

for i in range(2008, 2020):
    df = pd.read_json(retrieveCfbData('drives',selected_team, i))
    df['year'] = i
    base_drives = base_drives.append(df)

base_drives.drop(['offense_conference','start_time','end_time','defense_conference','elapsed','end_period'], axis = 1, inplace=True) 
base_drives = base_drives[
    ~base_drives.drive_result.isin(['Uncategorized'])
]

# base_drives

In [82]:
import numpy as np

turnover_types = ["INT", "FUMBLE"]

def calculate_score(result):
    if result.find("TD") > -1:
        return 7
    elif result == "FG":
        return 3
    elif result == "SAFETY":
        return 2
    else:
        return 0

def process_half_stats(dataset, title, is_defense, sort_teams_alphabetically):
    print(f"Stats for {title}")
    drives_set = pd.DataFrame()
    if is_defense:
        drives_set = dataset[dataset.defense == selected_team] 
    else:
        drives_set = dataset[dataset.offense == selected_team]
        
    points = sum(np.vectorize(calculate_score)(drives_set.drive_result))
    yards = sum(drives_set.yards)
    turnovers = len(drives_set[drives_set.drive_result.isin(turnover_types)])
    print("Total Points:", points)
    print("Total Yards:", yards)
    
    if is_defense:
        print("Total Turnovers:", turnovers)
        
    print("Points per Drive:", (points / len(drives_set)))
    print("Yards per Drive:", (yards / len(drives_set)))
    
    if is_defense:
        print("Turnovers per Drive:", (turnovers / len(drives_set)))
        
    print()
    
    print("By Opponent:")
    team_set = drives_set.offense.unique() if is_defense else drives_set.defense.unique()
    if sort_teams_alphabetically:
        team_set.sort()
    for opponent in team_set:
        game_level = drives_set[drives_set.offense == opponent] if is_defense else drives_set[drives_set.defense == opponent]
        opponent = game_level.offense.iloc[0] if is_defense else game_level.defense.iloc[0]
        game_points = sum(np.vectorize(calculate_score)(game_level.drive_result))
        game_yards = sum(game_level.yards)
        print(f"{opponent}")
        print("    YPD:", game_yards / len(game_level))
        print("    PPD:", game_points / len(game_level))

In [83]:
# Geoff
cur_year_drives = base_drives[
    base_drives.year == 2019
]
cur_year_drives

first_half = cur_year_drives[
    cur_year_drives.start_period < 3
]

second_half = cur_year_drives[
    cur_year_drives.start_period > 2
]

process_half_stats(first_half, "2019 First halves", True, False)
print("\n----\n")
process_half_stats(second_half, "2019 Second halves", True, False)
print("\n----\n\n")
process_half_stats(cur_year_drives, "2019-Pres CGC Defenses", True, True)
print("\n----\n")
process_half_stats(cur_year_drives, "2019-Pres CGC Offenses", False, True)

Stats for 2019 First halves
Total Points: 176
Total Yards: 2079
Total Turnovers: 10
Points per Drive: 2.933333333333333
Yards per Drive: 34.65
Turnovers per Drive: 0.16666666666666666

By Opponent:
Clemson
    YPD: 30.1
    PPD: 2.8
South Florida
    YPD: 18.5
    PPD: 0.5
The Citadel
    YPD: 37.6
    PPD: 2.8
Temple
    YPD: 36.333333333333336
    PPD: 2.3333333333333335
North Carolina
    YPD: 44.0
    PPD: 2.8333333333333335
Duke
    YPD: 47.166666666666664
    PPD: 5.166666666666667
Miami
    YPD: 24.428571428571427
    PPD: 4.0
Pittsburgh
    YPD: 36.375
    PPD: 2.125
Virginia
    YPD: 42.0
    PPD: 4.0

----

Stats for 2019 Second halves
Total Points: 84
Total Yards: 1523
Total Turnovers: 1
Points per Drive: 1.68
Yards per Drive: 30.46
Turnovers per Drive: 0.02

By Opponent:
Clemson
    YPD: 48.833333333333336
    PPD: 4.0
South Florida
    YPD: 29.8
    PPD: 1.4
The Citadel
    YPD: 35.4
    PPD: 2.6
Temple
    YPD: 18.4
    PPD: 0.6
North Carolina
    YPD: 40.285714285714285


In [84]:
# CPJ
base_drives = base_drives[
    base_drives.year != 2019
]
# base_drives
process_half_stats(base_drives, "2008-2018 CPJ Defenses", True, True)
print("\n----\n")
process_half_stats(base_drives, "2008-2018 CPJ Offenses", False, True)

Stats for 2008-2018 CPJ Defenses
Total Points: 3190
Total Yards: 48345
Total Turnovers: 207
Points per Drive: 2.0593931568754034
Yards per Drive: 31.21045836023241
Turnovers per Drive: 0.13363460296965785

By Opponent:
Alabama A&M
    YPD: 20.75
    PPD: 0.5833333333333334
Alcorn State
    YPD: 14.538461538461538
    PPD: 0.5384615384615384
BYU
    YPD: 31.115384615384617
    PPD: 2.6923076923076925
Boston College
    YPD: 25.97142857142857
    PPD: 1.0
Bowling Green
    YPD: 36.18181818181818
    PPD: 2.1818181818181817
Clemson
    YPD: 33.19205298013245
    PPD: 2.2980132450331126
Duke
    YPD: 28.68217054263566
    PPD: 2.046511627906977
Elon
    YPD: 21.5
    PPD: 1.1666666666666667
Florida State
    YPD: 37.42307692307692
    PPD: 2.7115384615384617
Gardner-Webb
    YPD: 15.714285714285714
    PPD: 0.5
Georgia
    YPD: 38.775862068965516
    PPD: 2.8448275862068964
Georgia Southern
    YPD: 42.76190476190476
    PPD: 2.9523809523809526
Jacksonville State
    YPD: 22.22222222222222