In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# nflsavant

In [3]:
files = []
for file in os.listdir('../data'):
    if 'nfl' in file:
        print(file)
        files.append(
            pd.read_csv(f'../data/{file}', header = 0, usecols = range(45), low_memory=False)
        )

nflsavant_2019.csv
nflsavant_2018.csv
nflsavant_2020.csv
nflsavant_2013.csv
nflsavant_2015.csv
nflsavant_2014.csv
nflsavant_2016.csv
nflsavant_2017.csv


In [8]:
files[2]['YardLineDirection']

0        OPP
1        OPP
2        OPP
3        OPP
4        OPP
        ... 
46184    OPP
46185    OPP
46186    OPP
46187    OPP
46188    OPP
Name: YardLineDirection, Length: 46189, dtype: object

In [3]:
def savant_clean(file_list):
    
    # I don't have boundary which sucks
    
    cleaned_files = []
    
    for file in file_list:
        # 2013 has sporatic miss-inputs
        # Change to numeric & turn coerce strings to NA
        file['PASS'] = pd.to_numeric(file.IsPass, errors = 'coerce')
        file['RUN'] = pd.to_numeric(file.IsRush, errors = 'coerce')
         
        
        # Run Clean
        file['RUN_LEFT'] = file.RushDirection.str.contains('LEFT').replace({True:1, False:0})
        file['RUN_RIGHT'] = file.RushDirection.str.contains('RIGHT').replace({True:1, False:0})
        file['RUN_MIDDLE'] = file.RushDirection.str.contains('CENTER').replace({True:1, False:0})
        file['RUN_DIR'] = np.where(file['RUN_LEFT'] == 1, 0,
                                    np.where(file['RUN_MIDDLE'] == 1, 1,
                                              np.where(file['RUN_RIGHT'] == 1, 2,
                                                       -99)
                                            )
                                   )
        # Pass Clean
        file['PASS_SHORT'] = file.PassType.str.contains('SHORT').replace({True:1, False:0})
        file['PASS_DEEP'] = file.PassType.str.contains('DEEP').replace({True:1, False:0})

        file['PASS_LEFT'] = file.PassType.str.contains('LEFT').replace({True:1, False:0})
        file['PASS_RIGHT'] = file.PassType.str.contains('RIGHT').replace({True:1, False:0})
        file['PASS_MIDDLE'] = file.PassType.str.contains('MIDDLE').replace({True:1, False:0})
        
        file['PASS_DIR'] = np.where(file['PASS_LEFT'] == 1, 0,
                                    np.where(file['PASS_RIGHT'] == 1, 1,
                                              np.where(file['PASS_MIDDLE'] == 1, 2,
                                                       -99)
                                            )
                                   )

        # Formation Clean
        file['FORMATION_SHOTGUN'] = file.Formation.str.contains('SHOTGUN').replace({True:1, False:0})
        file['FORMATION_UNDER_CENTER'] = file.Formation.str.contains('CENTER').replace({True:1, False:0})
        
        # Date Clean
        file['MONTH'] = pd.to_datetime(file['GameDate']).dt.month
        file['DAY'] = pd.to_datetime(file['GameDate']).dt.day
        
        # Yardline Clean
        file['YARD'] = np.where(
            
            file['YardLineDirection'] == 'OWN', 
            file['YardLineFixed'] * -1, 
            
            np.where(file['YardLineDirection'] == 'OPP',
                     file['YardLineFixed'], 
                     -99
                    )
        )
        cleaned_file = file[
            [
                'GameId',
                'OffenseTeam','DefenseTeam',
                'Quarter', 'Minute','Second',
                'YARD', 'Down','ToGo',
                'SeasonYear','MONTH','DAY',
                'PASS','RUN',
                'RUN_DIR','RUN_LEFT', 'RUN_RIGHT', 'RUN_MIDDLE',
                'PASS_DIR', 'PASS_LEFT', 'PASS_RIGHT', 'PASS_MIDDLE',
                'PASS_SHORT','PASS_DEEP',
                'FORMATION_SHOTGUN','FORMATION_UNDER_CENTER',
                'Description'
            ]
        ]
        cleaned_files.append(cleaned_file)
    
    big_file = pd.concat(cleaned_files)
    big_file = big_file.query('(PASS == 1) | (RUN == 1)').reset_index(drop=True).sort_values('SeasonYear')
    return big_file

In [4]:
nfl_data = savant_clean(files)

In [None]:
nfl_data.columns

Unnamed: 0,GameId,OffenseTeam,DefenseTeam,Quarter,Minute,Second,YARD,Down,ToGo,SeasonYear,...,RUN_MIDDLE,PASS_DIR,PASS_LEFT,PASS_RIGHT,PASS_MIDDLE,PASS_SHORT,PASS_DEEP,FORMATION_SHOTGUN,FORMATION_UNDER_CENTER,Description
97874,2013091504,WAS,GB,1,3,22,-10.0,3,7,2013.0,...,,0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,(3:22) (SHOTGUN) 10-R.GRIFFIN PASS INCOMPLETE ...
105771,2013102000,ATL,TB,2,14,33,19.0,2,5,2013.0,...,0.0,-99,,,,,,1.0,0.0,(14:33) (SHOTGUN) 44-J.SNELLING LEFT TACKLE TO...
105770,2013102000,ATL,TB,2,15,0,24.0,1,10,2013.0,...,0.0,-99,,,,,,0.0,1.0,(15:00) 44-J.SNELLING LEFT GUARD TO TB 19 FOR ...
105769,2013102000,ATL,TB,1,0,17,44.0,1,10,2013.0,...,,2,0.0,0.0,1.0,1.0,0.0,1.0,0.0,(:17) (SHOTGUN) 2-M.RYAN PASS SHORT MIDDLE TO ...
105768,2013102000,ATL,TB,1,0,49,-30.0,2,11,2013.0,...,,0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,(:49) (SHOTGUN) 2-M.RYAN PASS SHORT LEFT TO 83...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71869,2020110112,DEN,LAC,2,6,51,-30.0,3,12,2020.0,...,,1,0.0,1.0,0.0,1.0,0.0,1.0,0.0,(6:51) (SHOTGUN) 3-D.LOCK PASS SHORT RIGHT TO ...
71868,2020110112,DEN,LAC,2,7,32,-32.0,2,10,2020.0,...,0.0,-99,,,,,,0.0,1.0,(7:32) 25-M.GORDON LEFT TACKLE TO DEN 30 FOR -...
71867,2020110112,DEN,LAC,2,7,56,-32.0,2,10,2020.0,...,,1,0.0,1.0,0.0,1.0,0.0,1.0,0.0,(7:56) (SHOTGUN) 3-D.LOCK PASS SHORT RIGHT TO ...
71880,2020110801,BUF,SEA,2,1,3,-25.0,2,10,2020.0,...,,1,0.0,1.0,0.0,1.0,0.0,1.0,0.0,(1:03) (SHOTGUN) 17-J.ALLEN PASS SHORT RIGHT T...


In [10]:
# nfl_data.head(20)

# nflfastR

In [6]:
# this is data from 

# need to assess the level of missing for the variables I desire
#Enter desired years of data
YEARS = [2017]

data = pd.DataFrame()

for i in YEARS:  
    #low_memory=False eliminates a warning
    i_data = pd.read_csv('https://github.com/nflverse/nflfastR-data/blob/master/data/' \
                         'play_by_play_' + str(i) + '.csv.gz?raw=True',
                         compression='gzip', low_memory=False)

    #sort=True eliminates a warning and alphabetically sorts columns
    data = data.append(i_data, sort=True)

#Give each row a unique index
data.reset_index(drop=True, inplace=True)

In [56]:

list(data)

# string with team abbr
data['side_of_field']

# has Team abbr then yardline
# combine with possession team for +/- yard
# data['split_loc'] = data['yrdln'].str.find(' ')

# data['yrdln'].str.find(' ').values

# Nope looks like side of field is equal to the 
subset = data.dropna(subset = ['yrdln','side_of_field'])
subset.loc[~(subset['yrdln'].str.split().str[0] == subset['side_of_field'])][['side_of_field','yrdln']].query('side_of_field != "50"')

# data['yrdln'].apply(lambda row: row.split())

# run vars
# run_location	String indicator for location of run: left, middle, or right.
# run_gap	String indicator for line gap of run: end, guard, or tackle


# pass_length	String indicator for pass length: short or deep.
# pass_location	String indicator for pass location: left, middle, or right.

Unnamed: 0,side_of_field,yrdln


# sports data stuff 

In [8]:
import zipfile
from zipfile import io
# archive = zipfile.ZipFile('images.zip', 'r')
# imgdata = archive.read('img_01.png')

In [46]:
archive = zipfile.ZipFile('../data/CFB_PBP_Data.zip', 'r')
# archive = archive.read('CFB_PBP_Data.zip')
# bytes_io = io.BytesIO(archive)
dir(archive)
archive.filelist
archive.extract('CFB_PBP_Data/Data Dictionary.txt', path ='../data/')

'../data/CFB_PBP_Data/Data Dictionary.txt'

In [43]:
# out =open('../data/CFB_PBP_Data/2014/2014_CFB_Data.csv', 'r', encoding="ISO-8859-1")
out =open('../data/CFB_PBP_Data/2014/2014_CFB_Data.txt', 'r')

In [44]:
out

<_io.TextIOWrapper name='../data/CFB_PBP_Data/2014/2014_CFB_Data.txt' mode='r' encoding='UTF-8'>

In [24]:
out = pd.read_csv(out)

In [40]:
list(out)
out['playtype'][9]

5

# cfb stats

In [49]:
archive = zipfile.ZipFile('../data/cfbstats.com-2005-1.5.0.zip', 'r')
# archive = archive.read('CFB_PBP_Data.zip')
# bytes_io = io.BytesIO(archive)
dir(archive)
archive.filelist
archive.extract('drive.csv', path ='../data/')

'../data/drive.csv'

In [31]:
out =open('../data/drive.csv', 'r')

# sportsradar

In [39]:
import requests

## NCAAFB

In [89]:
access_level = 'trial'
version = 'v7'
language_code = 'en'
game_id = '00ef0f71-2ad2-4789-8f53-b4bfcc713d3b'
format_ = 'json'
api_key = 'ch4eqj94ajgbqjtr6nj7g4nf'


url_base = f"https://api.sportradar.us/ncaafb/{access_level}/{version}/{language_code}/games/{game_id}/pbp.{format_}?api_key={api_key}"

# response = requests.get(url_base)
# response
url_base

'https://api.sportradar.us/ncaafb/trial/v7/en/games/00ef0f71-2ad2-4789-8f53-b4bfcc713d3b/pbp.json?api_key=ch4eqj94ajgbqjtr6nj7g4nf'

In [72]:
pbp = requests.get(url_base).json()

In [74]:
pbp.keys()

# if wanted to do at scale should probably create a flatten() for this shyt

dict_keys(['id', 'status', 'scheduled', 'attendance', 'entry_mode', 'clock', 'quarter', 'coverage', 'game_type', 'weather', 'summary', 'periods', '_comment'])

In [88]:
pd.DataFrame(pbp['periods'][1]['pbp'][0]['events'])

Unnamed: 0,type,id,sequence,clock,home_points,away_points,play_type,wall_clock,description,alt_description,...,run_pass_option,created_at,updated_at,start_situation,end_situation,statistics,details,scoring_play,scoring_description,score
0,play,bc01270c-f881-45fd-86e6-3ec969a5020d,1561404000000.0,15:00,3,0,rush,2019-06-24T19:12:00+00:00,5-A.McAllister to FOR 19 for 2 yards (44-M.Rob...,A.McAllister to FOR 19 for 2 yards (M.Roberts).,...,False,2019-06-24T19:12:00+00:00,2021-07-12T18:59:25+00:00,"{'clock': '15:00', 'down': 2, 'yfd': 2, 'posse...","{'clock': '15:00', 'down': 1, 'yfd': 10, 'poss...","[{'stat_type': 'rush', 'attempt': 1, 'yards': ...","[{'category': 'rush', 'sequence': 1, 'start_lo...",,,
1,play,5552e6f0-3902-48e8-8c05-7b3e0451f956,1561404000000.0,14:20,3,0,pass,2019-06-24T19:12:12+00:00,17-C.Reynolds incomplete. Intended for 9-V.Tuc...,C.Reynolds incomplete. Intended for V.Tucker.,...,False,2019-06-24T19:12:12+00:00,2021-07-12T18:59:26+00:00,"{'clock': '14:20', 'down': 1, 'yfd': 10, 'poss...","{'clock': '14:20', 'down': 2, 'yfd': 10, 'poss...","[{'stat_type': 'pass', 'attempt': 1, 'complete...","[{'category': 'pass', 'sequence': 1, 'start_lo...",,,
2,play,61462dfe-3809-4dda-ac0a-88d7d660d3a1,1561404000000.0,14:12,3,0,pass,2019-06-24T19:12:30+00:00,17-C.Reynolds sacked at FOR 30 for -11 yards (...,C.Reynolds sacked at FOR 30 for -11 yards (M.R...,...,False,2019-06-24T19:12:30+00:00,2021-07-12T18:59:26+00:00,"{'clock': '14:12', 'down': 2, 'yfd': 10, 'poss...","{'clock': '14:12', 'down': 3, 'yfd': 21, 'poss...","[{'stat_type': 'pass', 'attempt': 0, 'complete...","[{'category': 'pass', 'sequence': 1, 'start_lo...",,,
3,play,0d02cdf1-f78a-45ec-8f3c-f1c1d398ac98,1561404000000.0,13:28,3,0,rush,2019-06-24T19:12:52+00:00,17-C.Reynolds pushed ob at FOR 22 for 8 yards ...,C.Reynolds pushed ob at FOR 22 for 8 yards (L....,...,False,2019-06-24T19:12:52+00:00,2021-07-12T18:59:27+00:00,"{'clock': '13:28', 'down': 3, 'yfd': 21, 'poss...","{'clock': '13:28', 'down': 4, 'yfd': 13, 'poss...","[{'stat_type': 'rush', 'attempt': 1, 'yards': ...","[{'category': 'rush', 'sequence': 1, 'start_lo...",,,
4,play,fce2c9ae-0a8f-4d99-a650-324c50632e20,1561404000000.0,12:55,6,0,field_goal,2019-06-24T19:13:15+00:00,11-J.Cruz 39 yards Field Goal is Good.,J.Cruz 39 yards Field Goal is Good.,...,False,2019-06-24T19:13:15+00:00,2021-07-12T18:59:27+00:00,"{'clock': '12:55', 'down': 4, 'yfd': 13, 'poss...","{'clock': '12:55', 'down': 0, 'yfd': 0, 'posse...","[{'stat_type': 'field_goal', 'attempt': 1, 'at...","[{'category': 'kick_off', 'sequence': 2, 'star...",True,11-J.Cruz 39 yards Field Goal is Good.,"{'sequence': 20245, 'clock': '12:55', 'points'..."


In [75]:
pbp['periods']

[{'period_type': 'quarter',
  'id': '39d98740-e14f-43a1-8402-d0fa61ecb25b',
  'number': 1,
  'sequence': 1,
  'scoring': {'home': {'id': '30abfee7-57a7-4984-9a1c-55cb28970da3',
    'name': '49ers',
    'market': 'Charlotte',
    'alias': 'CHA',
    'points': 3},
   'away': {'id': 'f4593f19-1082-4238-b519-09faadbeca37',
    'name': 'Rams',
    'market': 'Fordham',
    'alias': 'FOR',
    'points': 0}},
  'coin_toss': {'home': {'outcome': 'lost'},
   'away': {'outcome': 'won', 'decision': 'kick'}},
  'pbp': [{'type': 'drive',
    'id': '7111b600-404d-4660-bafb-9344855497d3',
    'sequence': 10000,
    'start_reason': 'UNKNOWN',
    'end_reason': 'UNKNOWN',
    'play_count': 3,
    'duration': '1:52',
    'first_downs': 0,
    'gain': -25,
    'penalty_yards': 0,
    'created_at': '2021-07-12T18:59:00Z',
    'updated_at': '2021-07-12T19:01:18Z',
    'events': [{'type': 'play',
      'id': '3de9e31f-ae65-46de-8556-99bdc3d8be46',
      'sequence': 1561401348492.0,
      'clock': '15:00',
  

In [60]:
## to retrieve gameids

version = 'v7'
language_code = 'en'
format_ = 'json'
api_key = 'ch4eqj94ajgbqjtr6nj7g4nf'
year = 2018
ncaafb_season = 'REG'


url_base = f"https://api.sportradar.us/ncaafb/trial/v7/{language_code}/games/{year}/{ncaafb_season}/schedule.{format_}?api_key={api_key}"

In [61]:
out = requests.get(url_base).json()



In [63]:
weeks = out['weeks']

In [70]:
weeks[0]

{'id': '43104428-6531-410b-a5f9-161453a510c8',
 'sequence': 1,
 'title': '1',
 'games': [{'id': '00ef0f71-2ad2-4789-8f53-b4bfcc713d3b',
   'status': 'closed',
   'scheduled': '2018-09-01T22:00:00+00:00',
   'attendance': 9240,
   'entry_mode': 'INGEST',
   'coverage': 'full',
   'game_type': 'regular',
   'venue': {'id': '5189127d-ea58-4917-84b8-8456f3abb8b2',
    'name': 'Jerry Richardson Stadium',
    'city': 'Charlotte',
    'state': 'NC',
    'country': 'USA',
    'zip': '28262',
    'address': '9201 University City Boulevard',
    'capacity': 15314,
    'surface': 'artificial',
    'roof_type': 'outdoor'},
   'home': {'id': '30abfee7-57a7-4984-9a1c-55cb28970da3',
    'name': 'Charlotte 49ers',
    'alias': 'CHA'},
   'away': {'id': 'f4593f19-1082-4238-b519-09faadbeca37',
    'name': 'Fordham Rams',
    'alias': 'FOR'},
   'broadcast': {'network': 'ESPN+', 'internet': 'WatchESPN'},
   'weather': {'condition': 'Partly Cloudy',
    'humidity': 90,
    'temp': 83,
    'wind': {'speed'

In [248]:
import tensorflow as tf

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization