# 02 - API CALL AND EDA FOR ENTIRE SEASON
- A cleaner, more efficient, and reproducible version of the API call and DF aggregation notebook
- Meant to be applied to all seasons

# FETCH DATA

In [2]:
import numpy as np
import pandas as pd
import warnings
import json
import http.client
import time
import ast

# Column and row display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_seq_items', None)
pd.reset_option('display.max_colwidth', None)


# Notebook cell width display
from IPython.display import display, HTML
display(HTML("<style>:root { --jp-notebook-max-width: 98% !important; }</style>"))

# Float appearance, Pandas and NumPy
pd.set_option('display.float_format', '{:.0f}'.format)
np.set_printoptions(suppress=True, precision=0)

# Supress warnings
warnings.filterwarnings('ignore')

In [3]:
import sys
sys.path.append('../')
import functions as fn

In [7]:
game_ids = ['bf60c8fc-35cd-4749-a29e-4f48d66da57c', '138ba6ee-966d-40b6-8017-5b018c4a31d1', '3a30461a-f6cc-493d-bff3-5c4a447ff812', '5e90e42b-7203-4ae8-a506-43e5dc3140e9', '7af18bf0-4be8-4532-bd9c-845b88e9f684', 'b206fcec-b10d-4bc2-a7fe-3c69c532f8fc', 'b7bbcbe9-fa76-4175-8ced-b87b3076aca8',
'c4a110f2-847b-4710-9a54-7be66fdb3c99', 'ce4b9426-af23-4681-8bf7-800c7e020d4e', 'df15f570-b51c-4ece-a42b-aaa1e130290a', '39c07cba-900b-49dc-ade1-e34c815c202e', '5d7038d9-82cc-4464-a20a-04952454f928', '71ce533f-3932-4927-85af-2f0d01f5686d', 'ab13a972-6ea4-4059-ab5c-51ead63d9abd',
'c60eb416-db18-42aa-b936-f25fe21e65e0', 'd981ce4e-3139-44d4-b6a5-f16fd3e8ca6e', 'de17900b-51bc-4280-8f09-d143bf6cdeb1', '3db2a367-2c28-4c98-9edc-87c3a711952b', '4fe9b8f1-09a9-49a0-a4c7-812aac3d8c21', '7ffcc5a2-e9a5-413d-b692-e46818eb840a', '8024a131-2d3d-4ee1-bc56-816a55f248e0',
'90622f34-5635-42ed-bc72-ba3b5aeb98a1', 'b1429091-b0af-406e-9f09-5968c049e0ae', 'd5119ada-211c-4cf5-bc2a-aeb3589e3fd5', '3f1d5b42-2e10-4cb1-8db9-41a6bb2ab21f', 'f3d27d24-7b63-4314-aec7-c65405c82724', 'd144122e-0963-42f5-85c5-651cb8c1b123', 'e27da541-573a-4c86-bf53-a25a6d85c73b',
'e6e9bd0b-6803-492a-ba93-5d6dfb38c199', 'a1cfb866-3432-46f6-a354-385db0a6e7fb', 'feac652a-72a3-4c1e-a43f-7e2a1035c1dc', '4dc9220a-bc27-4e0a-838e-4860f4bc4da8', 'cf7291ef-cb13-4d5d-882e-0221330aafeb', '2eb9ef80-c7fc-47f0-ba9e-359ffd5208c2', '5a49b508-acbe-4b5d-912d-a50b4f8c6366',
'8b8f8d1e-68b3-4b87-8f3b-d427bf790911', '8badc9e6-483a-498d-8f92-a8f5b02de67f', '9cd300f9-b564-4beb-bc7f-687543a70c7f', 'b3f234aa-0768-488b-930c-4171378cdb97', 'b9a40f08-db99-4cb0-856a-2434f41df26f', 'beb9fe5d-a6aa-425f-8dab-ef5f11e59d11', 'f33e2f4e-0647-4962-97c9-bfac791ff951',
'8ebfa3c4-dc9c-4bf6-a51b-91d4c5b43a1f', '381afd4d-bfe3-4c01-be10-ea85e2e57c18', '8dbee8ee-a3a4-45f3-9425-239669ca778e', 'db55a870-a775-46d3-b338-c73b29596991', '02c1b1e9-e2dd-475f-84cf-6b1c9a2cbc51', 'd4910570-5897-4653-9dfe-552e8121624e', '1530637b-1218-4c7a-8cfe-5a9655a450ef',
'33a593a0-9bc5-49ca-837d-475bc7c90cec', '04d1dfe9-f1d1-430c-b3d2-a440d8d6a59b', '1fd76dfe-636c-46d5-92ca-630da9b25556', '2014bd73-669f-46ca-a507-6f64a47228cc', '43339bf6-5c08-4551-886b-25b5578be23b', '5425a946-fbf7-4735-be0a-dea6b9500bdf', '57727ff1-a524-4323-8e8b-1d7d6f3dc633',
'67c5b2b3-02fe-49ce-9e63-c1f606ed2c06', 'c051a5cd-d887-49f8-8f5a-b0bebd794f88', 'f88ed995-0a58-42c7-8b57-89c5c4640e4f', '6fb3effa-3ec0-42fd-bedf-64e969cdfca8', '347e3fc0-bd45-458d-a5ff-b978e360b63b', '9d2b7cff-f479-48b4-abd6-df2f0e23a5f4', '75225665-e0f5-4449-ad9c-e8cefb3d1b07',
'b295c097-316e-493f-b8a6-ed82346e8dff', 'f19f51de-5185-460f-bdc5-ec5887276fa4', '37d85a48-62c1-4c27-a05b-759fabd1b0c1', '0c925851-577d-4e7c-9722-cf6b58e41f1e', '13025660-aa95-4885-b33e-3a3554d4c4c3', '50250a4a-e3be-4ba5-a53b-a8894b2d6d5e', '5ccb1b6a-1dd1-4ae0-aa3a-f530546488c6',
'859ef7f3-39fd-45d5-8769-d7255837d2ce', 'b6c9bee6-1ec7-4282-96cb-ad1b59439f80', 'cb54b23b-4192-4eda-8431-794d21dd58ab', 'cde58dfc-e789-4ff2-bda4-a8ece588e1d7', 'eb541067-920e-4052-945f-4fb4142341a6', 'e86d674e-a47c-4038-adac-5a6b659dd2c8', '8ee9c37b-4fd8-4954-9d78-f84d1efd700f',
'bf8318c0-e876-4a47-9cb4-d3b85725d8f6', '449e9f9f-d2be-40df-ac2c-7e0caf6b1269', 'befce21a-d8dd-416d-bad2-e104279f8823', 'b18e5b30-2ced-4697-b86c-398965fb44ad', '0005ea17-2fc9-4b63-9a52-8bc893b4b481', '28026bf8-209d-470c-98c3-dce2a16141b5', '5d16b9f5-4e23-43f2-8803-4929046b3128',
'72f82240-0035-4f1f-8854-68f5b7498c79', '81e7ef41-0a62-496f-b224-6aa1957df6aa', 'a62ebedf-c6fd-47e6-b511-ce5e998ac898', 'ac2edcfb-f5e4-4fbd-ae49-fe424174db0c', 'f4fb34ba-f863-48af-95f1-c317562087ed', '6a5922a9-23e8-4c38-b3c4-604d0f97b1b6', 'fc458b69-5e18-4564-af26-8bc3b58756b8',
'e53d49fb-8a14-4fb7-b6aa-9fa9466a7b4d', '78419085-6fc4-4b44-b6c7-f3dd1db63a9c', '1a57777c-7c57-43fa-bf08-66bffc204fd1', '780f2605-5066-4cb2-98cd-75d5634ad77a', '4ec0c067-e9a7-4fb0-ab34-8ae97db6a646', '50c5b405-8f86-427f-a338-a8f2f4c5a629', '77fe4f5c-7dea-4d79-ae8f-6fdc1a34b205',
'897b13fd-bf0a-4744-b2a5-424a09a6b37c', 'a7501f59-9d2e-4f94-96c6-8b4d9ab97408', 'bafd2523-3b0a-49c7-92a6-4c83b2a85dc1', 'e4f775bf-9908-45dd-bd65-871cc37ca375', '3baa86f1-4618-4e06-939a-4e741a7ab8ec', 'ad106c3a-0f28-4de4-8594-324d22b25bae', '553ad2a4-48be-4ff7-92ce-8e5dc7a7329b',
'69102143-6d12-450e-aaa5-8ca26c4e5c9f', '056b4eb7-64ce-4858-bae7-f3701d8eb753', '20ef56e1-8c82-4f96-84b0-3ce79f1a1a31', '430e5d4d-d4e8-4c54-a93f-ddb1a97cd7d9', 'de5a763a-8dd3-4d4e-84e6-41b9d5917c29', '02211cd0-5ad9-4b1d-818c-20817f18484f', '236c72f3-e4a8-4f4b-a469-8a057a1d32db',
'32c7d6d0-5a74-4f14-af43-1337acc4d8e4', '67f337fc-6465-4dd9-8bdd-2a154b70534b', 'c88b342c-fe51-48d2-992e-a4e21a7de71a', 'd5e3b9a0-9f28-439e-bd89-2b87eee439b6', 'ed19500a-c5bf-4f38-89e1-291cb7e20ec9', '40a19cc9-bc8c-4ac4-b588-28d8ee87d29a', '35aedbae-936a-47ab-8113-17c69082a9eb',
'37e27f71-bc52-436d-88d1-d55efe0771fd', 'c1f26d78-9b4c-421c-9787-093471339913', '58f33c2e-85e9-4210-af03-17a576e2ab9b', 'f5f0db8e-7900-4d38-8d94-245a43054141', '9e4f6e8a-2f76-4432-a741-6f26ad6b53ab', '4c107e07-511a-4ec2-9172-af0e381690a0', '74a9daed-6d9a-4a34-b7fd-0537fe991647',
'83cd16ed-aca2-4d13-bff8-bb16225bedb1', 'a4da928d-acf8-48d8-a35b-e0cd247aaabc', 'c9f431dd-637f-48ca-b01b-76510249d4a4', 'cdf0116f-cd16-456e-9b7c-33975736b93e', 'f4acdc1b-4dbe-42d0-a6c3-617558c40684', 'f6dd0415-2dbd-4e98-9b1b-bca1b8074a05', 'fe7fa4d7-f4d6-4e24-85bd-de084acbe3a9',
'77a33d6b-1e3f-416f-8890-fe38f535bee9', 'e1e21176-49f5-4145-be0f-f428b5c11926', 'dbd63c38-1d79-4b19-9db4-b167cadc70d0', 'f7db0b50-cd05-436a-8214-a52969d21462', '44cdfc9a-758f-43b5-8925-8ca783edf65e', '1653b8ff-7a81-4a38-b992-93c4bfd1b3d9', '3e54518d-23f3-4a70-a78a-bf5911bffbe5',
'9384d4ea-e36d-4055-ac1d-6936d963030f', '97f8be0f-64b0-4658-b2f4-c7302a49bdb0', 'ce6ea983-ad12-4d89-be0a-a0c87d37717b', 'd3500c07-dfce-4893-834d-d54be6ff0767', 'f1aae2b9-8ecd-431a-ab18-20e1ed251b27', '2c2ded8a-a51a-4a92-b238-6d97092c2d7a', '9e63e019-88a3-49ca-9c78-e50488651085',
'e496f932-62b9-4a68-bdb5-a05456e0dd83', 'acb5d131-4758-4593-86cf-13b761f29040', 'e20c1ab3-e723-4156-a9de-a14b7ffbe317', 'b7796b52-a95f-456c-9f8a-e5b9826770eb', '5b67e186-7af6-4380-99b7-d4f6aa289ded', '61787d91-5ad7-438d-9b44-bfb4acecaefa', '72fc63d4-6c4d-4ea2-8559-ca5e15f6a129',
'89f6dd2d-aa44-4ad1-adff-643bf9d69c55', '9330ff2e-d460-445e-a708-c999830425ae', 'c7659740-7f30-4a07-a8a3-2dcfddec8be2', 'd01d35c3-ad0b-4687-9e0d-23edb7475ac3', 'e452c2fa-1975-4236-b0c0-bb09c6793a79', '80854172-e672-4cf5-817d-75f326ef93c1', '833598fd-1eb5-4f57-a041-5505da5d943d',
'ab83b7bf-6353-4b3f-b687-393f25f03959', 'b587f407-d807-4367-af81-ca0d03741b7c', '5988c183-f838-4ff1-80a0-8563d48b51a4', 'd762d5d8-927b-49c8-9701-3dae3caa210e', '0570e6ae-b774-45d7-92ff-254de62e70ba', '20e9e73e-9e8c-40cd-9b5c-471fcd224441', '0e8c04c3-b9b9-4a17-ae21-b4572407ba50',
'37c0ea8c-bd70-428c-a8b6-fa9a58eca788', '3eb4c32d-482d-44ab-92a3-88bdf1d50850', '6e4ce9b2-f7ea-400a-89b1-17fcc078048e', '76aba25a-91f2-4d85-8c74-e7a211def3fa', 'b37b5ed5-d66d-41ae-b680-6ae2e6103486', 'bf00aa76-84ce-4553-ba58-f2303f003af4', '379bbe84-0685-4965-af48-73920a50f4bb',
'759d9794-2ea4-4c71-aa91-3ea9efbc9284', '17494e1a-01ce-4b63-8835-24824bf5dd48', 'afb0e513-ab69-4ca1-b1ae-a8ab3fc490a5', 'd954b72e-c480-4776-b64b-75a0a781a50e', 'ec9e43b0-c1d0-4ff8-aab6-9195721e39d3', 'b9a405a3-d53d-483d-8f38-9f10263a0492', '29723007-8261-470a-a7c7-5713075ab27f',
'49fe6388-f9bf-4974-be9b-5a18fbcc0d57', '4a17d98f-a876-4214-b7d1-ab987578c562', '72687ef0-fd87-4e79-a590-82332bff157f', '7673d7a8-151c-493a-9975-25a9d1573f3a', 'bd0eed55-b5b7-4e35-a20f-9cbff8c323eb', 'ceba190d-a811-4b74-a3a3-4bfd4daa3a6a', 'de11a0bc-5f2a-4c6c-b2ac-ead3971c4345',
'17d283c8-bc55-4f8c-891b-3827137a1166', 'dbfb3d69-d093-4456-89dc-64e487d2f57a', '61a7ad62-a168-4279-8077-93ffc53e2656', '7fb75b32-9705-43ca-b7f8-68edf0a093e7', 'd844b5a7-6fb5-42c3-9ded-db10817c477f', '0f58f1bf-21aa-48e4-aaa0-351234764612', 'c6573861-4766-48ed-aae3-53c0aca6e9ea',
'178083b4-c7e3-4754-9d53-a7cf5689932e', '3e067ff5-2a99-4171-a33d-27ab8aec59ae', '439090e4-7429-464e-bd9e-d2fc20f6ba6d', '6e51ac72-a8cd-40d6-b638-6f12536ed243', '9cc461c7-4cc9-4eb5-86db-1a9f6bb9f9aa', '9d2e248e-a120-4ddb-9bbe-4428c2663ebe', 'c6e031ee-56f7-43bb-bd8f-fc92b1cc0b62',
'5fb5d3b5-fac3-496e-b686-d6c8d33dfb7d', '6157d7ea-159a-42e1-83d5-17f3e2c95928', '7a1bf5cb-3a14-49bf-b7e8-8aee0ae5e20d', '906eeb30-3279-4467-93be-55ec1f32fa36', '440180d8-8dc2-4481-b4a1-dc5dfae8f764', 'b32abb73-8268-4f40-aeba-3acf3448bbf1', '9b309a87-6279-450f-852c-fae053566b4c',
'3d3fafda-7d7c-4ecd-b39a-bebd15511465', '931c22e0-7c1b-405b-a1b5-42b5c718c457', '3ca6a8c4-9865-4fcf-a95d-e8c98904272d', '5fcb929c-7e70-48a8-a5a6-78498eea2f6b', '9256a55f-7380-4adf-b546-8cc730553d7b', 'b5d4cc73-6cf6-4257-a3c2-a0fc122f9075', 'de98e9c5-c5e8-4a25-b8d5-ed2051c452f1',
'ea517d29-5b31-49b5-8dcb-ab65db0d2b7b', '7d1ec13f-44e5-4525-9a18-586d0f4f5396', 'ea189422-afff-4730-98d1-e791dd3122b9', '52414295-a998-4b66-80a5-2fdd5c66b264', '96df77c4-50bd-4a7a-a74a-0cca534cd12e', '7c39576e-a060-40d8-8a32-e1a86b5ee7c0', '09d5ac59-8fe4-4239-8bcf-3d77284fa471',
'bbb1588e-dd86-4621-b618-011d0278135e', '17b3df26-4163-4234-83be-700cb0ab5f93', '58ea9307-f76e-45e4-9f31-432e320331c2', '9d0e87dd-5bc0-47a4-9c0a-7a426b5605e5', 'a3a9a355-7d17-45a4-a7b1-d0ac460e4877', 'beaee843-99ce-47c1-b32f-9a4055951698', 'ca33baaa-05ac-4bc9-b1da-4330689ce690',
'e28ece47-84ed-4f83-a5f5-6bfd6cf58396', 'f98ba7c3-d9cf-470d-a1b9-8557e148ece4', '2b86a156-8ba0-4c37-969e-fa14372dd263', '95fb687b-d407-4eb1-907f-af18e41eda0a', '1348ec41-0920-450b-b809-440581016fac', 'b9ceeb26-c2c5-45ac-8a5f-f01639818e4c', '409f6cec-7369-48fc-b707-9a85621fe059',
'87097385-eb47-4932-89a0-656c35c0d285', '9f0a94c7-439a-44f3-979a-ca66855ce34a', 'ef0c58f6-54f6-4889-b880-8a503494d138', '1be368b9-7379-4d76-b954-9d3e7eefc1c1', '2f1ee220-e31b-4427-8686-b65f7f151776', '812e3626-aea7-4907-b726-3b2dc719667c', '86b4dcc4-cc87-43b7-98e1-ea82b24dd400',
'ae1134d1-7e07-4197-8e9b-9fff27ff6519', 'bb9a5164-71d2-4efb-9d3e-13c5a4b7df04', 'beedd537-287a-4699-82b6-58387becf9ad', 'cbf9656b-66ec-4568-b697-93f0f723949b', 'ce9870af-6d45-403a-bf73-2cf6d3382c52', '397e9db0-eb0d-44ec-a579-848b32b116f2', 'efa47ae7-e65b-4ae2-bc09-4400d8ec093e',
'7d5ce394-4034-4ce3-b500-baff56fe0546', '836f3e95-a59b-4ec2-8461-932548ca3acd', 'f96a8e3f-f7d4-4b8d-8a5d-9c5c0bf1d0e0', '2d05d64f-24c6-4d1a-a682-e909f7fcccf9', '521ea2e6-3f9b-4e15-b94c-cb76fa6ca785', '189c02f5-7832-41a3-b6ec-dfa9d244e095', '5301e285-9d4b-4991-8f64-059bfd6bad13',
'7913a1bc-a010-474c-922a-22b0d552fb9c', '9163d9a1-945f-4b0f-9746-20eaf6a73de3', '9cfe315e-1497-41cf-b39c-deff6371f471', 'b3e960cd-6f6f-4da2-897c-ec30a9285e65', 'd714479c-d97d-4aad-9c71-0833ed049647', 'e6b71fe7-ecb3-4931-ad91-907dee42c561', '0b690459-804a-4f62-9625-077b0c3e21da',
'9038a474-aafe-4c67-94b2-72c9e387b206', 'acb4aaf3-8850-4b35-abc6-d0f8f6824905', 'b4adc1bb-a794-42ee-bd33-09646ea596e7', 'fdc4acd7-da9d-4dd0-b1ab-206432a5f781', '3ca61a46-2cef-4962-a481-f7ae6d7751eb']

# '7c0c0abf-f566-4a1f-a482-b74a991e1663' - Canceled due to Damar Hamlin's cardiac arrest

In [512]:
%%time
# game_ids = ['bf60c8fc-35cd-4749-a29e-4f48d66da57c', '138ba6ee-966d-40b6-8017-5b018c4a31d1']
api_key = 'insert_your_api_key_here'
season_all_games = fn.fetch_data(game_ids, api_key)

CPU times: user 45.4 ms, sys: 1.41 ms, total: 46.8 ms
Wall time: 5.38 s


In [514]:
type(season_all_games)

list

JSON normalize to convert to dataframe

In [515]:
season_all_games = pd.json_normalize(season_all_games, errors = 'ignore')
type(season_all_games)

pandas.core.frame.DataFrame

Save to CSV

In [518]:
season_all_games.to_csv('../working_exports/season_all_games.csv')

# DATA UNNESTING AND DF CREATION

In [4]:
season_all_games = pd.read_csv('../working_exports/season_all_games.csv')

In [5]:
season_all_games.shape

(271, 66)

The shape comports with our expectation. 271 rows, each representing a regular season game in the 2022 season.
- 32 teams x 17 games played by each team / 2 teams playing per game = 272
  - Minus 1 game that was canceled due to Damar Hamlin's cardiac arrest = 271

- Reading in the CSV to a dataframe will not resolve the `periods` column formatting issue
- You still have to use `.apply(ast.literal_eval)` to format it properly as a dataframe column

## Games data

In [6]:
%%time
games = season_all_games
games['periods'] = games['periods'].apply(ast.literal_eval)
games_periods_exploded = games.explode('periods')

CPU times: user 1min 5s, sys: 4.68 s, total: 1min 10s
Wall time: 1min 10s


In [7]:
games_periods_exploded.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'id', 'status', 'scheduled', 'attendance',
       'entry_mode', 'clock', 'quarter', 'sr_id', 'game_type',
       'conference_game', 'duration', 'periods', '_comment',
       'weather.condition', 'weather.humidity', 'weather.temp',
       'weather.wind.speed', 'weather.wind.direction', 'summary.season.id',
       'summary.season.year', 'summary.season.type', 'summary.season.name',
       'summary.week.id', 'summary.week.sequence', 'summary.week.title',
       'summary.venue.id', 'summary.venue.name', 'summary.venue.city',
       'summary.venue.state', 'summary.venue.country', 'summary.venue.zip',
       'summary.venue.address', 'summary.venue.capacity',
       'summary.venue.surface', 'summary.venue.roof_type',
       'summary.venue.sr_id', 'summary.venue.location.lat',
       'summary.venue.location.lng', 'summary.home.id', 'summary.home.name',
       'summary.home.market', 'summary.home.alias', 'summary.home.sr_id',
       'summary.home.used_timeou

In [8]:
drop_cols = ['status', 'scheduled', 'attendance', 'entry_mode', 'clock', 'quarter', 'conference_game', 'duration', '_comment', 'weather.wind.direction', 'summary.season.id', 
       'summary.season.name', 'summary.week.id', 'summary.week.title', 'summary.venue.id', 'summary.venue.name', 'summary.venue.city', 'summary.venue.state', 'summary.venue.country',
       'summary.venue.zip', 'summary.venue.address', 'summary.venue.capacity', 'summary.venue.sr_id', 'summary.venue.location.lat', 'summary.venue.location.lng', 'summary.home.id', 'summary.home.name',
       'summary.home.market', 'summary.home.sr_id', 'summary.home.used_timeouts', 'summary.home.remaining_timeouts', 'summary.home.points', 'summary.home.used_challenges',
       'summary.home.remaining_challenges', 'summary.home.record.wins', 'summary.home.record.losses', 'summary.home.record.ties', 'summary.away.id', 'summary.away.name', 'summary.away.market',
       'summary.away.sr_id', 'summary.away.used_timeouts', 'summary.away.remaining_timeouts', 'summary.away.points', 'summary.away.used_challenges', 'summary.away.remaining_challenges', 'summary.away.record.wins', 'summary.away.record.losses', 'summary.away.record.ties']

rename_cols = {"id": "game_id"}

games_periods_exploded = fn.drop_rename_cols(games_periods_exploded, drop_cols, rename_cols)

## Periods data

In [9]:
periods = pd.json_normalize(games_periods_exploded['periods'])
# periods.head()

In [10]:
drop_cols = ['id', 'sequence', 'scoring.home.name', 'scoring.home.market', 'scoring.home.points', 'scoring.away.name', 'scoring.away.market', 'scoring.away.points', 'coin_toss.home.outcome',
       'coin_toss.home.decision', 'coin_toss.home.direction', 'coin_toss.away.outcome', 'coin_toss.away.decision', 'coin_toss.away.direction']

periods = fn.drop_rename_cols(periods, drop_cols, rename_cols)

### Concatenate with periods DF

In [11]:
games_periods_exploded_periods = fn.concat_df(games_periods_exploded, periods)
# games_periods_exploded_periods.head()

In [12]:
games_periods_exploded_periods.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'game_id', 'sr_id', 'game_type',
       'periods', 'weather.condition', 'weather.humidity', 'weather.temp',
       'weather.wind.speed', 'summary.season.year', 'summary.season.type',
       'summary.week.sequence', 'summary.venue.surface',
       'summary.venue.roof_type', 'summary.home.alias', 'summary.away.alias',
       'period_type', 'number', 'pbp', 'scoring.home.id', 'scoring.home.alias',
       'scoring.home.sr_id', 'scoring.away.id', 'scoring.away.alias',
       'scoring.away.sr_id'],
      dtype='object')

## PBP data

In [13]:
games_periods_exploded_periods_pbp_exploded = games_periods_exploded_periods.explode('pbp')
pbp = pd.json_normalize(games_periods_exploded_periods_pbp_exploded['pbp'])
# pbp.head()

In [14]:
drop_cols = ['pbp']

games_periods_exploded_periods_pbp_exploded = fn.drop_rename_cols(games_periods_exploded_periods_pbp_exploded, drop_cols, rename_cols)

In [15]:
drop_cols = ['home_points', 'away_points', 'play_type', 'fake_punt', 'fake_field_goal', 'screen_pass', 'play_action', 'run_pass_option', 'statistics', 'details', 'start_situation.clock', 'start_situation.down', 'start_situation.yfd', 'start_situation.possession.id', 'start_situation.possession.name',
              'start_situation.possession.market', 'start_situation.possession.alias', 'start_situation.possession.sr_id', 'start_situation.location.id', 'start_situation.location.name', 'start_situation.location.market', 'start_situation.location.alias', 'start_situation.location.sr_id', 'start_situation.location.yardline',
              'end_situation.clock', 'end_situation.down', 'end_situation.yfd', 'end_situation.possession.id', 'end_situation.possession.name', 'end_situation.possession.market', 'end_situation.possession.alias', 'end_situation.possession.sr_id', 'end_situation.location.id', 'end_situation.location.name',
              'end_situation.location.market', 'end_situation.location.alias', 'end_situation.location.sr_id', 'end_situation.location.yardline', 'event_type', 'description', 'type', 'inside_20', 'created_at', 'sequence', 'start_reason', 'end_reason', 'play_count', 'duration', 'first_downs', 'gain', 'penalty_yards', 
              'scoring_drive', 'created_at', 'updated_at', 'start_clock', 'end_clock', 'first_drive_yardline', 'last_drive_yardline', 'net_yards', 'pat_successful', 'pat_points_attempted', 'offensive_team.points', 'offensive_team.id', 'defensive_team.points', 'defensive_team.id', 'clock', 'wall_clock', 'scoring_play', 
              'scoring_description', 'hash_mark', 'deleted']

rename_cols = {"id": "event_id"}

pbp = fn.drop_rename_cols(pbp, drop_cols, rename_cols)

### Concatenate with PBP dataframe

In [16]:
games_periods_exploded_periods_pbp_exploded_pbp = fn.concat_df(games_periods_exploded_periods_pbp_exploded, pbp)
# games_periods_exploded_periods_pbp_exploded_pbp.head()

In [17]:
games_periods_exploded_periods_pbp_exploded_pbp.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'game_id', 'sr_id', 'game_type',
       'periods', 'weather.condition', 'weather.humidity', 'weather.temp',
       'weather.wind.speed', 'summary.season.year', 'summary.season.type',
       'summary.week.sequence', 'summary.venue.surface',
       'summary.venue.roof_type', 'summary.home.alias', 'summary.away.alias',
       'period_type', 'number', 'scoring.home.id', 'scoring.home.alias',
       'scoring.home.sr_id', 'scoring.away.id', 'scoring.away.alias',
       'scoring.away.sr_id', 'event_id', 'team_sequence', 'events'],
      dtype='object')

## Events data

In [18]:
games_periods_exploded_periods_pbp_exploded_pbp_events_exploded = games_periods_exploded_periods_pbp_exploded_pbp.explode('events')

# Convert events column to its owned flattened dataframe
events_flattened = pd.json_normalize(games_periods_exploded_periods_pbp_exploded_pbp_events_exploded['events'])

In [19]:
games_periods_exploded_periods_pbp_exploded_pbp_events_exploded.shape, events_flattened.shape

((51314, 28), (51314, 71))

In [20]:
drop_cols = ['events']

games_periods_exploded_periods_pbp_exploded_pbp_events_exploded = fn.drop_rename_cols(games_periods_exploded_periods_pbp_exploded_pbp_events_exploded, drop_cols, rename_cols)

In [21]:
drop_cols = ['event_type', 'blitz', 'goaltogo', 'sequence', 'created_at', 'updated_at', 'start_situation.possession.id', 'start_situation.possession.name', 'start_situation.possession.market', 'start_situation.location.id', 'start_situation.location.name', 'start_situation.location.market', 'score.sequence', 'score.clock', 
              'score.points', 'score.home_points', 'score.away_points', 'score.points-after-play.id', 'score.points-after-play.sequence', 'score.points-after-play.type', 'event_type', 'running_lane', 'end_situation.location.name',	'end_situation.location.market', 'end_situation.possession.name',
              'end_situation.possession.market', 'end_situation.location.id', 'end_situation.location.sr_id', 'start_situation.possession.sr_id', 'start_situation.location.sr_id', 'end_situation.possession.id', 'end_situation.possession.sr_id', 'details', 'deleted']

rename_cols = {"id": "play_id"}

events_flattened = fn.drop_rename_cols(events_flattened, drop_cols, rename_cols)

### Concatenate with events dataframe

In [22]:
games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events = fn.concat_df(games_periods_exploded_periods_pbp_exploded_pbp_events_exploded, events_flattened)
# games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events.head()

## Statistics data

In [23]:
games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded = games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events.explode('statistics')

# Convert events column to its owned flattened dataframe
statistics_flattened = pd.json_normalize(games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded['statistics'])

In [24]:
drop_cols = ['statistics']
games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded = fn.drop_rename_cols(games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded, drop_cols, rename_cols)

In [25]:
drop_cols = ['missed_tackles', 'def_comp', 'hang_time', 'faircatch', 'nullified', 'sack', 'sack_yards', 'ast_sack', 'tlost', 'tlost_yards', 'ast_tlost', 'fumble', 'forced', 'own_rec', 'own_rec_yards', 'squib_kick', 'onside_attempt', 'onside_success', 'play_category', 
                'forced_fumble', 'out_of_bounds', 'category', 'team.name', 'team.market', 'touchback', 'net_yards', 'kneel_down', 'scramble', 'ast_tackle', 'down', 'made', 'penalty', 'qb_hit', 'missed', 'return', 'pass_defended']

statistics_flattened = fn.drop_rename_cols(statistics_flattened, drop_cols, rename_cols)

### Concatenate with statistics dataframe

In [26]:
games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded = fn.concat_df(games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded, statistics_flattened)
games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,game_id,sr_id,game_type,periods,weather.condition,weather.humidity,weather.temp,weather.wind.speed,summary.season.year,summary.season.type,summary.week.sequence,summary.venue.surface,summary.venue.roof_type,summary.home.alias,summary.away.alias,period_type,number,scoring.home.id,scoring.home.alias,scoring.home.sr_id,scoring.away.id,scoring.away.alias,scoring.away.sr_id,event_id,team_sequence,type,play_id,clock,home_points,away_points,play_type,wall_clock,description,fake_punt,fake_field_goal,screen_pass,hash_mark,play_action,run_pass_option,start_situation.clock,start_situation.down,start_situation.yfd,start_situation.possession.alias,start_situation.location.alias,start_situation.location.yardline,end_situation.clock,end_situation.down,end_situation.yfd,end_situation.possession.alias,end_situation.location.alias,end_situation.location.yardline,players_rushed,men_in_box,play_direction,left_tightends,right_tightends,pocket_location,qb_at_snap,huddle,pass_route,scoring_play,scoring_description,stat_type,attempt,yards,player.id,player.name,player.jersey,player.position,player.sr_id,team.id,team.alias,team.sr_id,complete,att_yards,firstdown,inside_20,goaltogo,blitz,hurry,knockdown,pocket_time,on_target_throw,batted_pass,target,reception,yards_after_catch,dropped,catchable,tackle,def_target,broken_tackles,yards_after_contact,touchdown,endzone,interception,incompletion_type,int_yards,opp_rec,opp_rec_yards,lost,blocked,block,int_touchdown,opp_rec_td,safety,own_rec_td,downed
0,0,0,bf60c8fc-35cd-4749-a29e-4f48d66da57c,sr:match:33622875,regular,"{'period_type': 'quarter', 'id': '1ce4819a-9e7...",Cloudy,48,88,8,2022,REG,1,artificial,outdoor,LA,BUF,quarter,1,2eff2a03-54d4-46ba-890e-2bc3925548f3,LA,sr:competitor:4387,768c92aa-75ff-4a43-bcc0-f2798c2e1724,BUF,sr:competitor:4376,b8a4b06c-7a04-427a-84af-c06984e0286e,1,play,fb5c7670-2fcf-11ed-b415-9ba7d2295705,15:00,0,0,kickoff,2022-09-09T00:23:36+00:00,M.Gay kicks 65 yards from LA 35 to the BUF End...,False,False,False,Middle,False,False,15:00,0,0,LA,LA,35,15:00,1,10,BUF,BUF,25,,,,,,,,,,,,kick,1.0,65.0,2b90e091-ef78-4753-93eb-0acf3632c206,Matt Gay,8.0,K,sr:player:1262704,2eff2a03-54d4-46ba-890e-2bc3925548f3,LA,sr:competitor:4387,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,0,bf60c8fc-35cd-4749-a29e-4f48d66da57c,sr:match:33622875,regular,"{'period_type': 'quarter', 'id': '1ce4819a-9e7...",Cloudy,48,88,8,2022,REG,1,artificial,outdoor,LA,BUF,quarter,1,2eff2a03-54d4-46ba-890e-2bc3925548f3,LA,sr:competitor:4387,768c92aa-75ff-4a43-bcc0-f2798c2e1724,BUF,sr:competitor:4376,b8a4b06c-7a04-427a-84af-c06984e0286e,1,play,fb5c7670-2fcf-11ed-b415-9ba7d2295705,15:00,0,0,kickoff,2022-09-09T00:23:36+00:00,M.Gay kicks 65 yards from LA 35 to the BUF End...,False,False,False,Middle,False,False,15:00,0,0,LA,LA,35,15:00,1,10,BUF,BUF,25,,,,,,,,,,,,return,,,,,,,,768c92aa-75ff-4a43-bcc0-f2798c2e1724,BUF,sr:competitor:4376,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0,0,bf60c8fc-35cd-4749-a29e-4f48d66da57c,sr:match:33622875,regular,"{'period_type': 'quarter', 'id': '1ce4819a-9e7...",Cloudy,48,88,8,2022,REG,1,artificial,outdoor,LA,BUF,quarter,1,2eff2a03-54d4-46ba-890e-2bc3925548f3,LA,sr:competitor:4387,768c92aa-75ff-4a43-bcc0-f2798c2e1724,BUF,sr:competitor:4376,b8a4b06c-7a04-427a-84af-c06984e0286e,1,play,a8b90270-2fd5-11ed-b415-9ba7d2295705,15:00,0,0,pass,2022-09-09T00:24:21+00:00,J.Allen pass short right complete to BUF 30. C...,False,False,False,Right Hash,False,False,15:00,1,10,BUF,BUF,25,14:30,2,4,BUF,BUF,31,4.0,6.0,Right,0.0,1.0,Middle,Shotgun,Huddle,In,,,pass,1.0,6.0,3069db07-aa43-4503-ab11-2ae5c0002721,Josh Allen,17.0,QB,sr:player:1208608,768c92aa-75ff-4a43-bcc0-f2798c2e1724,BUF,sr:competitor:4376,1.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,
3,0,0,bf60c8fc-35cd-4749-a29e-4f48d66da57c,sr:match:33622875,regular,"{'period_type': 'quarter', 'id': '1ce4819a-9e7...",Cloudy,48,88,8,2022,REG,1,artificial,outdoor,LA,BUF,quarter,1,2eff2a03-54d4-46ba-890e-2bc3925548f3,LA,sr:competitor:4387,768c92aa-75ff-4a43-bcc0-f2798c2e1724,BUF,sr:competitor:4376,b8a4b06c-7a04-427a-84af-c06984e0286e,1,play,a8b90270-2fd5-11ed-b415-9ba7d2295705,15:00,0,0,pass,2022-09-09T00:24:21+00:00,J.Allen pass short right complete to BUF 30. C...,False,False,False,Right Hash,False,False,15:00,1,10,BUF,BUF,25,14:30,2,4,BUF,BUF,31,4.0,6.0,Right,0.0,1.0,Middle,Shotgun,Huddle,In,,,receive,,6.0,a1c40664-b265-4083-aad2-54b4c734f2c5,Stefon Diggs,14.0,WR,sr:player:835197,768c92aa-75ff-4a43-bcc0-f2798c2e1724,BUF,sr:competitor:4376,,,0.0,0.0,0.0,,,,,,,1.0,1.0,1.0,0.0,0.0,,,,,,,,,,,,,,,,,,,
4,0,0,bf60c8fc-35cd-4749-a29e-4f48d66da57c,sr:match:33622875,regular,"{'period_type': 'quarter', 'id': '1ce4819a-9e7...",Cloudy,48,88,8,2022,REG,1,artificial,outdoor,LA,BUF,quarter,1,2eff2a03-54d4-46ba-890e-2bc3925548f3,LA,sr:competitor:4387,768c92aa-75ff-4a43-bcc0-f2798c2e1724,BUF,sr:competitor:4376,b8a4b06c-7a04-427a-84af-c06984e0286e,1,play,a8b90270-2fd5-11ed-b415-9ba7d2295705,15:00,0,0,pass,2022-09-09T00:24:21+00:00,J.Allen pass short right complete to BUF 30. C...,False,False,False,Right Hash,False,False,15:00,1,10,BUF,BUF,25,14:30,2,4,BUF,BUF,31,4.0,6.0,Right,0.0,1.0,Middle,Shotgun,Huddle,In,,,defense,,,ca53fda9-d20a-4bc7-b8dc-deef28355399,Jalen Ramsey,5.0,DB,sr:player:986891,2eff2a03-54d4-46ba-890e-2bc3925548f3,LA,sr:competitor:4387,,,,,,0.0,0.0,0.0,,,0.0,,,,,,1.0,1.0,,,,,,,,,,,,,,,,,


In [27]:
sportradar = games_periods_exploded_periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded

## Merge dataframes

In [28]:
# Max column width so we can read play descriptions
pd.set_option('display.max_colwidth', None)

In [29]:
# Check for duplicate columns
column_names = list(sportradar.columns)
duplicate_columns = set([x for x in column_names if column_names.count(x) > 1])
duplicate_columns_list = list(duplicate_columns)
duplicate_columns_list

[]

In [30]:
drop_cols = ['game_type', 'periods', 'interception', 'int_yards', 'opp_rec', 'opp_rec_yards', 'lost', 'blocked', 'block', 'int_touchdown', 'opp_rec_td', 'safety', 'own_rec_td', 'downed']

sportradar = fn.drop_rename_cols(sportradar, drop_cols, rename_cols)

In [31]:
sportradar.shape

(154285, 96)

# CLEANING

### Filter for WR-specific rows

In [32]:
# Filter for padd and receive plays
sportradar = sportradar[
                        (sportradar['stat_type'] == 'pass') 
                        | (sportradar['stat_type'] == 'receive') 
                        | ((sportradar['stat_type'] == 'penalty') & (sportradar['description'].str.contains('defensive pass interference', case = False, na = False)))
                        ]

In [33]:
# Create dummy column for quarterback name
sportradar['alias'] = sportradar.loc[sportradar['stat_type'] == 'pass']['player.name']
sportradar['alias'] = sportradar['alias'].apply(str)

# Create dummy column for defender name
sportradar['name'] = sportradar.loc[sportradar['stat_type'] == 'penalty']['player.name']
sportradar['name'] = sportradar['name'].apply(str)

In [34]:
# Extract passer data
passer_data = sportradar[sportradar['stat_type'] == 'pass'].copy()

# Rename passer columns
passer_columns = {
    'complete': 'qb_complete', 
    'att_yards': 'qb_att_yards', 
    'blitz': 'qb_blitz',
    'hurry': 'qb_hurry',
    'knockdown': 'qb_knockdown',
    'pocket_time': 'qb_pocket_time',
    'on_target_throw': 'qb_on_target_throw',
    'batted_pass': 'qb_batted_pass',
    'incompletion_type': 'qb_incompletion_type',
    'alias': 'qb_name'
}
passer_data.rename(columns=passer_columns, inplace=True)

In [35]:
# Extract defender data
defender_data = sportradar[sportradar['stat_type'] == 'penalty'].copy()

# Rename defender columns (assuming 'name' represents defender's name)
defender_data.rename(columns={'name': 'defender_name'}, inplace=True)

In [36]:
# Extract receiver data and create a separate dataframe for it
sportradar_wr = sportradar[sportradar['stat_type'] == 'receive'].copy()

# Merge receiver data with passer data
# Can merge on the `play_id` field
sportradar_wr = sportradar_wr.merge(passer_data[['play_id', 'qb_complete', 'qb_att_yards', 'qb_blitz', 'qb_hurry', 'qb_knockdown', 'qb_pocket_time', 'qb_on_target_throw', 'qb_batted_pass', 'qb_incompletion_type', 'qb_name'
                                                ]], on='play_id', how='left')

# Merge receiver data with defender data
sportradar_wr = sportradar_wr.merge(defender_data[['play_id', 'defender_name']], on='play_id', how='left')

# Drop old passer-specific columns
sportradar_wr.drop(['complete', 'att_yards', 'blitz', 'hurry', 'knockdown', 'pocket_time', 'on_target_throw', 'batted_pass', 'incompletion_type', 'alias', 'name'], axis=1, inplace=True, errors = 'ignore')

In [37]:
# Change reception nulls to zeros
sportradar_wr['reception'].fillna(0, inplace = True)

### Filtering out rows that contain "no play" in the description
- SR is incorrectly counting pass plays that end with defensive penalties as receiver targets
- We need to delete these to avoid overcounting targets

In [38]:
sportradar_wr = sportradar_wr[~sportradar_wr['description'].str.contains('no play', case=False)]

### Drop unnecessary columns
- Some are redudnant

In [39]:
drop_cols = ['sr_id', 'type', 'play_type', 'stat_type', 'scoring.home.id', 'scoring.away.id', 'player.id', 'team.id', 'scoring.home.alias', 'scoring.away.alias', 'clock', 'wall_clock', 'scoring_description', 'attempt', 'start_situation_possession_alias',
                'end_situation_possession_alias']

sportradar_wr = fn.drop_rename_cols(sportradar_wr, drop_cols, rename_cols)

### Suffix removal

In [40]:
suffixes = ['weather.', 'summary.', 'scoring.']

for s in suffixes:
    sportradar_wr = sportradar_wr.rename(columns = {col: col.replace(s, '') for col in sportradar_wr.columns})

### Change '.' to '_'

In [41]:
sportradar_wr = sportradar_wr.rename(columns = {col: col.replace('.', '_') for col in sportradar_wr.columns})

### Overtime adjustment

In [42]:
sportradar_wr.loc[(sportradar_wr['period_type'] == 'overtime') & (sportradar_wr['number'] == 1), 'number'] = 5

In [43]:
drop_cols = ['period_type']

sportradar_wr = fn.drop_rename_cols(sportradar_wr, drop_cols, rename_cols)

### Rename columns

In [44]:
rename_cols = {'week_sequence': 'week', 'number': 'period', 'start_situation_clock': 'start_play_clock', 'start_situation_down': 'start_play_down', 'start_situation_yfd': 'start_play_yfd', 'start_situation_possession_alias': 'start_play_possession_alias', 'start_situation_location_yardline': 'start_play_yardline', 
              'start_situation_location_alias': 'start_play_field_side', 'end_situation_clock': 'end_play_clock', 'end_situation_down': 'end_play_down', 'end_situation_yfd': 'end_play_yfd', 'end_situation_possession_alias': 'end_play_possession_alias', 'end_situation_location_yardline': 'end_play_yardline', 
              'end_situation_location_alias': 'end_play_field_side', 'qb_blitz': 'blitz', 'qb_hurry': 'hurry', 'qb_knockdown': 'knockdown', 'qb_pocket_time': 'pocket_time', 'qb_complete': 'complete', 'qb_on_target_throw': 'on_target_throw', 'qb_batted_pass': 'batted_pass', 'qb_incompletion_type': 'incompletion_type', 'qb_att_yards': 'att_yards'}

sportradar_wr = fn.drop_rename_cols(sportradar_wr, drop_cols, rename_cols)

### Play clock adjustments

In [45]:
clock_fields = ['start_play_clock', 'end_play_clock']

for c in clock_fields:
    sportradar_wr[c + '_string'] = sportradar_wr[c]
    sportradar_wr[c] = pd.to_timedelta('00:' + sportradar_wr[c])

### Pocket time to string

In [46]:
sportradar_wr['pocket_time_string'] = sportradar_wr['pocket_time'].map('{:,.2f}'.format)

### Effecive yardline adjustment

In [47]:
yardline_fields = ['start_play_yardline', 'end_play_yardline']

for y in yardline_fields:
    first_term = y.split('_')[0]
    sportradar_wr['effective_' + y] = sportradar_wr[y]
    sportradar_wr.loc[sportradar_wr['team_alias'] == sportradar_wr[first_term + '_play_field_side'], 'effective_' + y] = 50 - sportradar_wr[y] + 50

### Re-ordering columns

In [48]:
sportradar_wr = sportradar_wr[['game_id','season_year','season_type','week','home_alias' ,'home_sr_id','away_alias' ,'away_sr_id', 'temp' ,'humidity' ,'wind_speed' ,'venue_surface' ,'venue_roof_type' ,'period' ,'event_id',
                                'team_sequence','play_id','home_points' ,'away_points' ,'description','start_play_clock', 'start_play_clock_string', 'start_play_down' ,'start_play_yfd' ,'start_play_yardline', 'effective_start_play_yardline', 'start_play_field_side', 'inside_20' ,
                                'goaltogo' ,'end_play_clock','end_play_clock_string', 'end_play_down','end_play_yfd', 'end_play_yardline', 'effective_end_play_yardline', 'end_play_field_side', 'firstdown','scoring_play','players_rushed','men_in_box','huddle',
                                'hash_mark' ,'qb_at_snap' ,'left_tightends' ,'right_tightends', 'qb_name', 'pocket_location' ,'play_direction', 'screen_pass' ,'play_action' ,'run_pass_option' ,'pass_route' ,'fake_punt' ,'fake_field_goal',
                                'defender_name', 'player_name' ,'player_jersey','player_position','player_sr_id','team_alias' ,'team_sr_id', 'blitz' ,'hurry' ,'knockdown' ,'pocket_time', 'pocket_time_string', 'on_target_throw' ,'batted_pass',
                                'incompletion_type' ,'target' ,'reception' ,'yards' ,'att_yards' ,'yards_after_catch', 'yards_after_contact' ,'broken_tackles' ,'dropped','catchable', 'touchdown']]

## Fixing receiver features

### Poorly thrown passes
- If the pass was poorly thrown, it should be considered on target or catchable

In [49]:
sportradar_wr.loc[(sportradar_wr['on_target_throw'] == 1) & (sportradar_wr['incompletion_type'] == 'Poorly Thrown'), 'on_target_throw'] = 0
sportradar_wr.loc[(sportradar_wr['catchable'] == 1) & (sportradar_wr['incompletion_type'] == 'Poorly Thrown'), 'catchable'] = 0

### Dropped passes
- If the pass was dropped, we should assume that it was on target and catchable

In [50]:
sportradar_wr.loc[(sportradar_wr['on_target_throw'] == 0) & (sportradar_wr['incompletion_type'] == 'Dropped Pass'), 'on_target_throw'] = 1
sportradar_wr.loc[(sportradar_wr['catchable'] == 0) & (sportradar_wr['incompletion_type'] == 'Dropped Pass'), 'catchable'] = 1

### Defended passes

In [51]:
sportradar_wr.loc[(sportradar_wr['on_target_throw'] == 0) & (sportradar_wr['incompletion_type'] == 'Pass Defended') & (sportradar_wr['batted_pass'] == False), 'on_target_throw'] = 1
sportradar_wr.loc[(sportradar_wr['catchable'] == 0) & (sportradar_wr['incompletion_type'] == 'Pass Defended') & (sportradar_wr['batted_pass'] == False), 'catchable'] = 1

## New receiver columns
- We should create new features that capture the nuances of player style based on:
  - Utilization by route run
  - Physical attributes
  - Reliability in clutch situations
  - Contact tolerance
  - The ability to create separation from a defender

### Catches of poorly thrown passes
- To capture when a receiver bails out a quarterback by managing to catch an off-target pass
  - Demonstrates a receiver's overall "catch radius"
    - A combination of body control, height, arm length, hand size, and ball tracking ability
- Seems to only be recorded when a penalty is called.
  - Not sure if this dataset actually records catches of poorly thrown passes or assumes they don't get caught by default.
  - Still going to keep this feature just in case.

In [52]:
sportradar_wr['difficult_attempt'] = False
sportradar_wr.loc[(sportradar_wr['on_target_throw'] == 0) & (~sportradar_wr['description'].str.contains('penalty', case = False, na = False)), 'difficult_attempt'] = True

sportradar_wr['difficult_catch'] = False
sportradar_wr.loc[(sportradar_wr['on_target_throw'] == 0) & (sportradar_wr['reception'] == 1) & (~sportradar_wr['description'].str.contains('penalty', case = False, na = False)), 'difficult_catch'] = True

### Adverse weather catches
- Can the receiver keep his footing when there's heavy rain? Can he catch a wet ball?
- If OR:
  - Temp is 32 F or less
  - Wind speed is 10 MPH or greater
  - Humidity is 70% or greater

In [53]:
sportradar_wr['weather_attempt'] = False
sportradar_wr.loc[((sportradar_wr['temp'] <= 32) | (sportradar_wr['wind_speed'] >= 10) | (sportradar_wr['humidity'] >= 70 )) & (sportradar_wr['reception'] == 1), 'weather_attempt'] = True

sportradar_wr['weather_catch'] = False
sportradar_wr.loc[((sportradar_wr['temp'] <= 32) | (sportradar_wr['wind_speed'] >= 10) | (sportradar_wr['humidity'] >= 70 )) & (sportradar_wr['reception'] == 1), 'weather_catch'] = True

### QB bail-out catches
- Does the receiver know when the QB is under duress, and does he know when and where to make himself open for a pass to prevent a loss of yardage?
- Catches made when the quarterback is under duress (if OR):
  - If QB scrambled
  - If QB was hurried
  - If QB was blitzed
  - If QB was knocked down

In [54]:
sportradar_wr['qb_bf_attempt'] = False
sportradar_wr.loc[(
                    (sportradar_wr['pocket_location'] == 'Scramble Left') 
                    | (sportradar_wr['pocket_location'] == 'Scramble Right') 
                    | (sportradar_wr['hurry'] == True) 
                    | (sportradar_wr['blitz'] == True) 
                    | (sportradar_wr['knockdown'] == True)
                  ) , 'qb_bf_attempt'] = True

sportradar_wr['qb_bf_catch'] = False
sportradar_wr.loc[(
                    (sportradar_wr['pocket_location'] == 'Scramble Left') 
                    | (sportradar_wr['pocket_location'] == 'Scramble Right') 
                    | (sportradar_wr['hurry'] == True) 
                    | (sportradar_wr['blitz'] == True) 
                    | (sportradar_wr['knockdown'] == True)
                  ) 
                  & (sportradar_wr['reception'] == 1), 'qb_bf_catch'] = True

### Clutch catches
- Does the receiver sustain drives by converting first downs or catching touchdowns near the end of close games? Or does he buckle under the pressure?
- If AND:
  - 4th quarter or overtime AND 4 or fewer minutes left in period
  - Difference of one score (8 or fewer points)
  - Converts first down or is touchdown

In [55]:
sportradar_wr['clutch_catch'] = False
sportradar_wr.loc[(
                    (sportradar_wr['period'] >= 4) & (sportradar_wr['start_play_clock'] < pd.Timedelta(minutes=4)) 
                    & (
                        (
                            (sportradar_wr['team_alias'] == sportradar_wr['home_alias']) & (abs(sportradar_wr['away_points'] - sportradar_wr['home_points']) <= 8)
                        )
                      | (
                            (sportradar_wr['team_alias'] == sportradar_wr['away_alias']) & (abs(sportradar_wr['home_points'] - sportradar_wr['away_points']) <= 8)
                        )
                      )
                    & (
                        (sportradar_wr['att_yards'] >= sportradar_wr['start_play_yfd'])
                      )
                  ) & (sportradar_wr['reception'] == 1), 'clutch_catch'
                ] = True

### Conversion catches
- Does the receiver sustain drives or catch touchdowns when the drive is in jeopardy?
- The offensive team's 3rd down is commonly regarded as the last practical down of a drive
  - Although the offensive team has 4 downs to sustain a drive or score, the 4th down is commonly used to kick the ball in one of two ways:
    - Kicking a field goal for 3 points, provided that the offensive team is within the placekicker's range
    - Punting the ball to move it downfield so that the opposing team starts with worse field position when it receives the punt
  - If the offensive team attempts a pass or run play on 4th down and fails to convert a 1st down or score, the referee declares a "turnover on downs" and the opposing team takes possession of the ball where it was downed on the failed conversion attempt
    - Due to the difficulty, pressure, and cost of converting on 4th down, offensive teams typically use the 4th down to punt or kick a field goal
- If OR:
  - 3rd down or greater and converts 1st down
  - 3rd down or greater and scores TD

In [56]:
sportradar_wr['conversion_attempt'] = False
sportradar_wr.loc[
                    (sportradar_wr['start_play_down'] >= 3) & (sportradar_wr['att_yards'] >= sportradar_wr['start_play_yfd']),
                    'conversion_attempt'
                ] = True

sportradar_wr['conversion_catch'] = False
sportradar_wr.loc[
                    (sportradar_wr['start_play_down'] >= 3) & ((sportradar_wr['firstdown'] == 1) | (sportradar_wr['touchdown'] == 1)) & (sportradar_wr['reception'] == 1),
                    'conversion_catch'
                ] = True

### Red zone touchdown catches
- Touchdown catches from the 20 yardline or closer
- Completing a pass in the red zone is difficult because the play starts from within the 20 yardline, making the 22 players bunched very closely together
  - There is much less room for receivers to create horizontal separation from defenders
  - Taller receivers are favored on redzone pass plays because they can use their height, arm length, and jumping ability to create separation

In [57]:
sportradar_wr['redzone_attempt'] = False
sportradar_wr.loc[(
                    (sportradar_wr['inside_20'] == 1) & (sportradar_wr['att_yards'] >= sportradar_wr['start_play_yfd'])
                  ), 'redzone_attempt'] = True

sportradar_wr['redzone_catch'] = False
sportradar_wr.loc[(
                    (sportradar_wr['inside_20'] == 1) & (sportradar_wr['touchdown'] == 1)
                  )
                   & (sportradar_wr['reception'] == 1), 'redzone_catch'] = True

### Catches by route
- Elite route runners are able to change direction very quickly without telegraphing their intentions to their defenders
- Routes that feature quick direction change, such as the curl, require that the receiver have these traits:
- Rapid acceleration
- Rapid deceleration - The ability to stop movement in one direction with very few steps
  - If a receiver "pitter-patters," the defender knows he intends to stop and change directions
- Flexible ankles
- Hip mobility
- Leverage intuition
  - "Inside leverage" - When a receiver is positioned with his defender closer to the nearest lateral boundary, making the receiver closer to the "inside" of the field
  - "Outside leverage" - When a receiver is positioned closer to the nearest lateral boundary than his defender, making the receiver closer to the "outside" of the field
  - Understanding leverage tells a receiver:
    - When to expect the pass without looking back at the QB
    - What kind of pass the QB will throw
- Not every elite receiver is an elite route runner.
  - Some can run a limited "route tree," running only relatively straight routes that take advantage of their speed.

In [58]:
routes = ['In', 'Slant', 'Corner', 'Flat', 'Curl', 'WR Screen', 'Out', 'Go', 'Cross', 'Post', 'Comeback', 'Underneath Screen']

for r in routes:
    sportradar_wr[r.replace(' ', '_').lower() + '_attempt'] = False
    sportradar_wr.loc[ (sportradar_wr['pass_route'] == r), r.replace(' ', '_').lower() + '_attempt'] = True

for r in routes:
    sportradar_wr[r.replace(' ', '_').lower()+'_catch'] = False
    sportradar_wr.loc[ (sportradar_wr['pass_route'] == r) & (sportradar_wr['reception'] == 1), r.replace(' ', '_').lower()+'_catch'] = True

- Only one play has a null value for route.
- The video (https://youtu.be/Pn3l2AN3o9s?si=C7Iq0f_Uf7oru74F&t=2792) shows that the play resulted in a penalty anyway and should not count as a pass play.
  - The description does not contain "no play," so it escaped the penalty filter we applied to the dataframe.

In [59]:
sportradar_wr = sportradar_wr[~sportradar_wr['pass_route'].isna()]

### Deep catch
- A pass play can result in a large gain, but this can be achieved by a short pass to a receiver who gains many yards after the catch
- This metric is meant to capture receivers who are frequently relied on to catch deep passes - passes that travel 20+ yards through the air before a catch attempt is made

In [60]:
sportradar_wr['deep_attempt'] = False
sportradar_wr.loc[ (sportradar_wr['att_yards'] >= 20), 'deep_attempt'] = True

sportradar_wr['deep_catch'] = False
sportradar_wr.loc[ (sportradar_wr['att_yards'] >= 20) & (sportradar_wr['reception'] == 1), 'deep_catch'] = True

### Large YAC catch
- Some receivers, such as Deebo Samuel, excel at catching short passes and gaining many yards after the catch
  - This requires great vision, lateral agility, and acceleration
- Catches with 10+ yards after catch

In [61]:
sportradar_wr['large_yac_catch'] = False
sportradar_wr.loc[ (sportradar_wr['yards_after_catch'] >= 10) & (sportradar_wr['reception'] == 1), 'large_yac_catch'] = True

### Play action catch
- A play action play is when the quarterback takes the snap from directly "under center" (right behind the center), runs backward, and fakes a hand-off to the running back
  - This play design fakes a run attempt, causing defenders to get "drawn up" leaving the deeper parts of the field more open for a receiver to exploit
- Catches made from play action

In [62]:
sportradar_wr['play_action_attempt'] = False
sportradar_wr.loc[ (sportradar_wr['play_action'] == True), 'play_action_attempt'] = True

sportradar_wr['play_action_catch'] = False
sportradar_wr.loc[ (sportradar_wr['play_action'] == True) & (sportradar_wr['reception'] == 1), 'play_action_catch'] = True

### RPO catch
- A run-pass option is similar to a play action design in that it is a fake designed to deceive defenses, but the quarterback has multiple options.
- Unlike a play action concept, the QB does not turn his back toward the defense. He keeps his eyes downfield at all times. With his eyes downfield, he motions as if he's handing the ball off to the running back. He has 3 options:
  - Complete the hand-off to the running back
  - Keep the ball, thus faking the hand-off, and run
  - Keep the ball, thus faking the hand-off, and pass
- Catches made on a run-pass option

In [63]:
sportradar_wr['rpo_attempt'] = False
sportradar_wr.loc[ (sportradar_wr['run_pass_option'] == True), 'rpo_attempt'] = True

sportradar_wr['rpo_catch'] = False
sportradar_wr.loc[ (sportradar_wr['run_pass_option'] == True) & (sportradar_wr['reception'] == 1), 'rpo_catch'] = True

### Tackle-breaker catch
- Some receives use their size and strength to bully defenders, inviting contact that fatigues defenders over the course of the game
- Tired defenders refer to the dilemma of tackling a physical ball carrier as a "business decision"
  - "Expend more energy and get hurt trying to tackle him, or let him run by me?"
- Catches after which the receiver breaks a tackle

In [64]:
sportradar_wr['tackle_breaker_catch'] = False
sportradar_wr.loc[ (sportradar_wr['broken_tackles'] >= 1) & (sportradar_wr['reception'] == 1), 'tackle_breaker_catch'] = True

### Beast catch
- Similar in intent to the tackle-breaker catch, but focuses on receivers who gain many yards after being contacted by a defender
- Catches that result in 10 or more yards after contact

In [65]:
sportradar_wr['beast_catch'] = False
sportradar_wr.loc[ (sportradar_wr['yards_after_contact'] >= 10) & (sportradar_wr['reception'] == 1), 'beast_catch'] = True

### Hurry-up catch
- Catches made on plays before which there was no huddle
- Measures a receiver's knowledge of the playbook and what their role is without detailed explanation
- Demonstrates receiver's chemistry with QB and whether they can be trusted to be ready at all times
- Crucial in time-limited situations

In [66]:
sportradar_wr['hurry_up_attempt'] = False
sportradar_wr.loc[ (sportradar_wr['huddle'] == 'No Huddle'), 'hurry_up_attempt'] = True

sportradar_wr['hurry_up_catch'] = False
sportradar_wr.loc[ (sportradar_wr['huddle'] == 'No Huddle') & (sportradar_wr['reception'] == 1), 'hurry_up_catch'] = True

### Deep sideline catch
- 20 or more air yards
- Near one of the sidelines
- Gauges receiver's ability to be a vertical threat, maintain spatial awareness, and maintain body control when attempting the catch and staying inbounds

In [67]:
sportradar_wr['deep_sideline_attempt'] = False
sportradar_wr.loc[(
                    (sportradar_wr['play_direction'] == 'Right Sideline') | (sportradar_wr['play_direction'] == 'Left Sideline')
                  )
                  & (sportradar_wr['att_yards'] >= 20)
                  , 'deep_sideline_attempt'] = True

sportradar_wr['deep_sideline_catch'] = False
sportradar_wr.loc[(
                    (sportradar_wr['play_direction'] == 'Right Sideline') | (sportradar_wr['play_direction'] == 'Left Sideline')
                  )
                  & (sportradar_wr['att_yards'] >= 20)
                  & (sportradar_wr['reception'] == 1), 'deep_sideline_catch'] = True

### DPI drawn
- Receivers who frequently draw defensive pass interference calls worry defenders who don't want to surrender a big yardage gain

In [68]:
sportradar_wr['dpi_drawn'] = False
sportradar_wr.loc[sportradar_wr['defender_name'].notnull(), 'dpi_drawn'] = True

### Possession-saver catch
- A "possession receiver" is usually a tall, strong receiver who lacks speed, but can reliably make contested catches for short to intermediate yardage at the line to gain
- By catching passes intended for them at the line to gain, they sustain drives

In [69]:
sportradar_wr['possession_saver_attempt'] = False
sportradar_wr.loc[(sportradar_wr['att_yards'] >= sportradar_wr['start_play_yfd']), 'possession_saver_attempt'] = True

sportradar_wr['possession_saver_catch'] = False
sportradar_wr.loc[(sportradar_wr['att_yards'] >= sportradar_wr['start_play_yfd']) & (sportradar_wr['reception'] == 1), 'possession_saver_catch'] = True

In [70]:
sportradar_wr = sportradar_wr[sportradar_wr['player_position'] == 'WR']
sportradar_wr.shape

(10340, 130)

## Trick plays with multiple rows
- Dropping trick plays in which a receiver also acted as a passer
  - Since the receiver in these plays usually  are rare and are not helpful for analysis of playing style

In [71]:
drop_cols = ['Unnamed: 0']

sportradar_wr = fn.drop_rename_cols(sportradar_wr, drop_cols, rename_cols)

In [72]:
sportradar_wr['play_id'].nunique(), sportradar_wr['play_id'].shape

(10301, (10340,))

In [73]:
sportradar_wr.groupby("play_id").filter(lambda x: len(x) > 1)['play_id'].nunique()

17

In [74]:
trick_plays = sportradar_wr.groupby("play_id").filter(lambda x: len(x) > 1)

In [75]:
trick_plays[trick_plays['pocket_time'].isna()].index

Index([  585,  2360,  2362,  3712,  4435,  4437,  4439,  4928,  4930,  4932,
        6232,  6234,  6888, 10292, 10294, 13132, 13287, 13289, 13380, 13382,
       13478, 16869, 16873, 17690, 17694, 18096, 18293, 18295],
      dtype='int64')

In [76]:
sportradar_wr = sportradar_wr[~sportradar_wr.index.isin([585,  2360,  2362,  3712,  4435,  4437,  4439,  4928,  4930,  4932,
                                                        6232,  6234,  6888, 10292, 10294, 13132, 13287, 13289, 13380, 13382,
                                                       13478, 16869, 16873, 17690, 17694, 18096, 18293, 18295])]

In [77]:
sportradar_wr.shape

(10312, 130)

In [78]:
sportradar_wr.to_csv('../working_exports/sportradar_wr.csv')