In [1]:
import numpy as np
import pandas as pd
import warnings

from sklearn.model_selection import train_test_split

# Column and row display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_seq_items', None)


# Notebook cell width display
from IPython.display import display, HTML
display(HTML("<style>:root { --jp-notebook-max-width: 98% !important; }</style>"))

# Float appearance, Pandas and NumPy
pd.set_option('display.float_format', '{:.2f}'.format)
np.set_printoptions(suppress=True, precision=0)

# Supress warnings
warnings.filterwarnings('ignore')

# SPORTRADAR API CALL

In [2]:
import http.client
import json

conn = http.client.HTTPSConnection("api.sportradar.us")

# DATAFRAME CREATION

In [4]:
game_ids = ['bf60c8fc-35cd-4749-a29e-4f48d66da57c', '138ba6ee-966d-40b6-8017-5b018c4a31d1', '3a30461a-f6cc-493d-bff3-5c4a447ff812', '5e90e42b-7203-4ae8-a506-43e5dc3140e9', '7af18bf0-4be8-4532-bd9c-845b88e9f684', 'b206fcec-b10d-4bc2-a7fe-3c69c532f8fc', 'b7bbcbe9-fa76-4175-8ced-b87b3076aca8',
'c4a110f2-847b-4710-9a54-7be66fdb3c99', 'ce4b9426-af23-4681-8bf7-800c7e020d4e', 'df15f570-b51c-4ece-a42b-aaa1e130290a', '39c07cba-900b-49dc-ade1-e34c815c202e', '5d7038d9-82cc-4464-a20a-04952454f928', '71ce533f-3932-4927-85af-2f0d01f5686d', 'ab13a972-6ea4-4059-ab5c-51ead63d9abd',
'c60eb416-db18-42aa-b936-f25fe21e65e0', 'd981ce4e-3139-44d4-b6a5-f16fd3e8ca6e', 'de17900b-51bc-4280-8f09-d143bf6cdeb1', '3db2a367-2c28-4c98-9edc-87c3a711952b', '4fe9b8f1-09a9-49a0-a4c7-812aac3d8c21', '7ffcc5a2-e9a5-413d-b692-e46818eb840a', '8024a131-2d3d-4ee1-bc56-816a55f248e0',
'90622f34-5635-42ed-bc72-ba3b5aeb98a1', 'b1429091-b0af-406e-9f09-5968c049e0ae', 'd5119ada-211c-4cf5-bc2a-aeb3589e3fd5', '3f1d5b42-2e10-4cb1-8db9-41a6bb2ab21f', 'f3d27d24-7b63-4314-aec7-c65405c82724', 'd144122e-0963-42f5-85c5-651cb8c1b123', 'e27da541-573a-4c86-bf53-a25a6d85c73b',
'e6e9bd0b-6803-492a-ba93-5d6dfb38c199', 'a1cfb866-3432-46f6-a354-385db0a6e7fb', 'feac652a-72a3-4c1e-a43f-7e2a1035c1dc', '4dc9220a-bc27-4e0a-838e-4860f4bc4da8', 'cf7291ef-cb13-4d5d-882e-0221330aafeb', '2eb9ef80-c7fc-47f0-ba9e-359ffd5208c2', '5a49b508-acbe-4b5d-912d-a50b4f8c6366',
'8b8f8d1e-68b3-4b87-8f3b-d427bf790911', '8badc9e6-483a-498d-8f92-a8f5b02de67f', '9cd300f9-b564-4beb-bc7f-687543a70c7f', 'b3f234aa-0768-488b-930c-4171378cdb97', 'b9a40f08-db99-4cb0-856a-2434f41df26f', 'beb9fe5d-a6aa-425f-8dab-ef5f11e59d11', 'f33e2f4e-0647-4962-97c9-bfac791ff951',
'8ebfa3c4-dc9c-4bf6-a51b-91d4c5b43a1f', '381afd4d-bfe3-4c01-be10-ea85e2e57c18', '8dbee8ee-a3a4-45f3-9425-239669ca778e', 'db55a870-a775-46d3-b338-c73b29596991', '02c1b1e9-e2dd-475f-84cf-6b1c9a2cbc51', 'd4910570-5897-4653-9dfe-552e8121624e', '1530637b-1218-4c7a-8cfe-5a9655a450ef',
'33a593a0-9bc5-49ca-837d-475bc7c90cec', '04d1dfe9-f1d1-430c-b3d2-a440d8d6a59b', '1fd76dfe-636c-46d5-92ca-630da9b25556', '2014bd73-669f-46ca-a507-6f64a47228cc', '43339bf6-5c08-4551-886b-25b5578be23b', '5425a946-fbf7-4735-be0a-dea6b9500bdf', '57727ff1-a524-4323-8e8b-1d7d6f3dc633',
'67c5b2b3-02fe-49ce-9e63-c1f606ed2c06', 'c051a5cd-d887-49f8-8f5a-b0bebd794f88', 'f88ed995-0a58-42c7-8b57-89c5c4640e4f', '6fb3effa-3ec0-42fd-bedf-64e969cdfca8', '347e3fc0-bd45-458d-a5ff-b978e360b63b', '9d2b7cff-f479-48b4-abd6-df2f0e23a5f4', '75225665-e0f5-4449-ad9c-e8cefb3d1b07',
'b295c097-316e-493f-b8a6-ed82346e8dff', 'f19f51de-5185-460f-bdc5-ec5887276fa4', '37d85a48-62c1-4c27-a05b-759fabd1b0c1', '0c925851-577d-4e7c-9722-cf6b58e41f1e', '13025660-aa95-4885-b33e-3a3554d4c4c3', '50250a4a-e3be-4ba5-a53b-a8894b2d6d5e', '5ccb1b6a-1dd1-4ae0-aa3a-f530546488c6',
'859ef7f3-39fd-45d5-8769-d7255837d2ce', 'b6c9bee6-1ec7-4282-96cb-ad1b59439f80', 'cb54b23b-4192-4eda-8431-794d21dd58ab', 'cde58dfc-e789-4ff2-bda4-a8ece588e1d7', 'eb541067-920e-4052-945f-4fb4142341a6', 'e86d674e-a47c-4038-adac-5a6b659dd2c8', '8ee9c37b-4fd8-4954-9d78-f84d1efd700f',
'bf8318c0-e876-4a47-9cb4-d3b85725d8f6', '449e9f9f-d2be-40df-ac2c-7e0caf6b1269', 'befce21a-d8dd-416d-bad2-e104279f8823', 'b18e5b30-2ced-4697-b86c-398965fb44ad', '0005ea17-2fc9-4b63-9a52-8bc893b4b481', '28026bf8-209d-470c-98c3-dce2a16141b5', '5d16b9f5-4e23-43f2-8803-4929046b3128',
'72f82240-0035-4f1f-8854-68f5b7498c79', '81e7ef41-0a62-496f-b224-6aa1957df6aa', 'a62ebedf-c6fd-47e6-b511-ce5e998ac898', 'ac2edcfb-f5e4-4fbd-ae49-fe424174db0c', 'f4fb34ba-f863-48af-95f1-c317562087ed', '6a5922a9-23e8-4c38-b3c4-604d0f97b1b6', 'fc458b69-5e18-4564-af26-8bc3b58756b8',
'e53d49fb-8a14-4fb7-b6aa-9fa9466a7b4d', '78419085-6fc4-4b44-b6c7-f3dd1db63a9c', '1a57777c-7c57-43fa-bf08-66bffc204fd1', '780f2605-5066-4cb2-98cd-75d5634ad77a', '4ec0c067-e9a7-4fb0-ab34-8ae97db6a646', '50c5b405-8f86-427f-a338-a8f2f4c5a629', '77fe4f5c-7dea-4d79-ae8f-6fdc1a34b205',
'897b13fd-bf0a-4744-b2a5-424a09a6b37c', 'a7501f59-9d2e-4f94-96c6-8b4d9ab97408', 'bafd2523-3b0a-49c7-92a6-4c83b2a85dc1', 'e4f775bf-9908-45dd-bd65-871cc37ca375', '3baa86f1-4618-4e06-939a-4e741a7ab8ec', 'ad106c3a-0f28-4de4-8594-324d22b25bae', '553ad2a4-48be-4ff7-92ce-8e5dc7a7329b',
'69102143-6d12-450e-aaa5-8ca26c4e5c9f', '056b4eb7-64ce-4858-bae7-f3701d8eb753', '20ef56e1-8c82-4f96-84b0-3ce79f1a1a31', '430e5d4d-d4e8-4c54-a93f-ddb1a97cd7d9', 'de5a763a-8dd3-4d4e-84e6-41b9d5917c29', '02211cd0-5ad9-4b1d-818c-20817f18484f', '236c72f3-e4a8-4f4b-a469-8a057a1d32db',
'32c7d6d0-5a74-4f14-af43-1337acc4d8e4', '67f337fc-6465-4dd9-8bdd-2a154b70534b', 'c88b342c-fe51-48d2-992e-a4e21a7de71a', 'd5e3b9a0-9f28-439e-bd89-2b87eee439b6', 'ed19500a-c5bf-4f38-89e1-291cb7e20ec9', '40a19cc9-bc8c-4ac4-b588-28d8ee87d29a', '35aedbae-936a-47ab-8113-17c69082a9eb',
'37e27f71-bc52-436d-88d1-d55efe0771fd', 'c1f26d78-9b4c-421c-9787-093471339913', '58f33c2e-85e9-4210-af03-17a576e2ab9b', 'f5f0db8e-7900-4d38-8d94-245a43054141', '9e4f6e8a-2f76-4432-a741-6f26ad6b53ab', '4c107e07-511a-4ec2-9172-af0e381690a0', '74a9daed-6d9a-4a34-b7fd-0537fe991647',
'83cd16ed-aca2-4d13-bff8-bb16225bedb1', 'a4da928d-acf8-48d8-a35b-e0cd247aaabc', 'c9f431dd-637f-48ca-b01b-76510249d4a4', 'cdf0116f-cd16-456e-9b7c-33975736b93e', 'f4acdc1b-4dbe-42d0-a6c3-617558c40684', 'f6dd0415-2dbd-4e98-9b1b-bca1b8074a05', 'fe7fa4d7-f4d6-4e24-85bd-de084acbe3a9',
'77a33d6b-1e3f-416f-8890-fe38f535bee9', 'e1e21176-49f5-4145-be0f-f428b5c11926', 'dbd63c38-1d79-4b19-9db4-b167cadc70d0', 'f7db0b50-cd05-436a-8214-a52969d21462', '44cdfc9a-758f-43b5-8925-8ca783edf65e', '1653b8ff-7a81-4a38-b992-93c4bfd1b3d9', '3e54518d-23f3-4a70-a78a-bf5911bffbe5',
'9384d4ea-e36d-4055-ac1d-6936d963030f', '97f8be0f-64b0-4658-b2f4-c7302a49bdb0', 'ce6ea983-ad12-4d89-be0a-a0c87d37717b', 'd3500c07-dfce-4893-834d-d54be6ff0767', 'f1aae2b9-8ecd-431a-ab18-20e1ed251b27', '2c2ded8a-a51a-4a92-b238-6d97092c2d7a', '9e63e019-88a3-49ca-9c78-e50488651085',
'e496f932-62b9-4a68-bdb5-a05456e0dd83', 'acb5d131-4758-4593-86cf-13b761f29040', 'e20c1ab3-e723-4156-a9de-a14b7ffbe317', 'b7796b52-a95f-456c-9f8a-e5b9826770eb', '5b67e186-7af6-4380-99b7-d4f6aa289ded', '61787d91-5ad7-438d-9b44-bfb4acecaefa', '72fc63d4-6c4d-4ea2-8559-ca5e15f6a129',
'89f6dd2d-aa44-4ad1-adff-643bf9d69c55', '9330ff2e-d460-445e-a708-c999830425ae', 'c7659740-7f30-4a07-a8a3-2dcfddec8be2', 'd01d35c3-ad0b-4687-9e0d-23edb7475ac3', 'e452c2fa-1975-4236-b0c0-bb09c6793a79', '80854172-e672-4cf5-817d-75f326ef93c1', '833598fd-1eb5-4f57-a041-5505da5d943d',
'ab83b7bf-6353-4b3f-b687-393f25f03959', 'b587f407-d807-4367-af81-ca0d03741b7c', '5988c183-f838-4ff1-80a0-8563d48b51a4', 'd762d5d8-927b-49c8-9701-3dae3caa210e', '0570e6ae-b774-45d7-92ff-254de62e70ba', '20e9e73e-9e8c-40cd-9b5c-471fcd224441', '0e8c04c3-b9b9-4a17-ae21-b4572407ba50',
'37c0ea8c-bd70-428c-a8b6-fa9a58eca788', '3eb4c32d-482d-44ab-92a3-88bdf1d50850', '6e4ce9b2-f7ea-400a-89b1-17fcc078048e', '76aba25a-91f2-4d85-8c74-e7a211def3fa', 'b37b5ed5-d66d-41ae-b680-6ae2e6103486', 'bf00aa76-84ce-4553-ba58-f2303f003af4', '379bbe84-0685-4965-af48-73920a50f4bb',
'759d9794-2ea4-4c71-aa91-3ea9efbc9284', '17494e1a-01ce-4b63-8835-24824bf5dd48', 'afb0e513-ab69-4ca1-b1ae-a8ab3fc490a5', 'd954b72e-c480-4776-b64b-75a0a781a50e', 'ec9e43b0-c1d0-4ff8-aab6-9195721e39d3', 'b9a405a3-d53d-483d-8f38-9f10263a0492', '29723007-8261-470a-a7c7-5713075ab27f',
'49fe6388-f9bf-4974-be9b-5a18fbcc0d57', '4a17d98f-a876-4214-b7d1-ab987578c562', '72687ef0-fd87-4e79-a590-82332bff157f', '7673d7a8-151c-493a-9975-25a9d1573f3a', 'bd0eed55-b5b7-4e35-a20f-9cbff8c323eb', 'ceba190d-a811-4b74-a3a3-4bfd4daa3a6a', 'de11a0bc-5f2a-4c6c-b2ac-ead3971c4345',
'17d283c8-bc55-4f8c-891b-3827137a1166', 'dbfb3d69-d093-4456-89dc-64e487d2f57a', '61a7ad62-a168-4279-8077-93ffc53e2656', '7fb75b32-9705-43ca-b7f8-68edf0a093e7', 'd844b5a7-6fb5-42c3-9ded-db10817c477f', '0f58f1bf-21aa-48e4-aaa0-351234764612', 'c6573861-4766-48ed-aae3-53c0aca6e9ea',
'178083b4-c7e3-4754-9d53-a7cf5689932e', '3e067ff5-2a99-4171-a33d-27ab8aec59ae', '439090e4-7429-464e-bd9e-d2fc20f6ba6d', '6e51ac72-a8cd-40d6-b638-6f12536ed243', '9cc461c7-4cc9-4eb5-86db-1a9f6bb9f9aa', '9d2e248e-a120-4ddb-9bbe-4428c2663ebe', 'c6e031ee-56f7-43bb-bd8f-fc92b1cc0b62',
'5fb5d3b5-fac3-496e-b686-d6c8d33dfb7d', '6157d7ea-159a-42e1-83d5-17f3e2c95928', '7a1bf5cb-3a14-49bf-b7e8-8aee0ae5e20d', '906eeb30-3279-4467-93be-55ec1f32fa36', '440180d8-8dc2-4481-b4a1-dc5dfae8f764', 'b32abb73-8268-4f40-aeba-3acf3448bbf1', '9b309a87-6279-450f-852c-fae053566b4c',
'3d3fafda-7d7c-4ecd-b39a-bebd15511465', '931c22e0-7c1b-405b-a1b5-42b5c718c457', '3ca6a8c4-9865-4fcf-a95d-e8c98904272d', '5fcb929c-7e70-48a8-a5a6-78498eea2f6b', '9256a55f-7380-4adf-b546-8cc730553d7b', 'b5d4cc73-6cf6-4257-a3c2-a0fc122f9075', 'de98e9c5-c5e8-4a25-b8d5-ed2051c452f1',
'ea517d29-5b31-49b5-8dcb-ab65db0d2b7b', '7d1ec13f-44e5-4525-9a18-586d0f4f5396', 'ea189422-afff-4730-98d1-e791dd3122b9', '52414295-a998-4b66-80a5-2fdd5c66b264', '96df77c4-50bd-4a7a-a74a-0cca534cd12e', '7c39576e-a060-40d8-8a32-e1a86b5ee7c0', '09d5ac59-8fe4-4239-8bcf-3d77284fa471',
'bbb1588e-dd86-4621-b618-011d0278135e', '17b3df26-4163-4234-83be-700cb0ab5f93', '58ea9307-f76e-45e4-9f31-432e320331c2', '9d0e87dd-5bc0-47a4-9c0a-7a426b5605e5', 'a3a9a355-7d17-45a4-a7b1-d0ac460e4877', 'beaee843-99ce-47c1-b32f-9a4055951698', 'ca33baaa-05ac-4bc9-b1da-4330689ce690',
'e28ece47-84ed-4f83-a5f5-6bfd6cf58396', 'f98ba7c3-d9cf-470d-a1b9-8557e148ece4', '2b86a156-8ba0-4c37-969e-fa14372dd263', '95fb687b-d407-4eb1-907f-af18e41eda0a', '1348ec41-0920-450b-b809-440581016fac', 'b9ceeb26-c2c5-45ac-8a5f-f01639818e4c', '409f6cec-7369-48fc-b707-9a85621fe059',
'87097385-eb47-4932-89a0-656c35c0d285', '9f0a94c7-439a-44f3-979a-ca66855ce34a', 'ef0c58f6-54f6-4889-b880-8a503494d138', '1be368b9-7379-4d76-b954-9d3e7eefc1c1', '2f1ee220-e31b-4427-8686-b65f7f151776', '812e3626-aea7-4907-b726-3b2dc719667c', '86b4dcc4-cc87-43b7-98e1-ea82b24dd400',
'ae1134d1-7e07-4197-8e9b-9fff27ff6519', 'bb9a5164-71d2-4efb-9d3e-13c5a4b7df04', 'beedd537-287a-4699-82b6-58387becf9ad', 'cbf9656b-66ec-4568-b697-93f0f723949b', 'ce9870af-6d45-403a-bf73-2cf6d3382c52', '397e9db0-eb0d-44ec-a579-848b32b116f2', 'efa47ae7-e65b-4ae2-bc09-4400d8ec093e',
'7d5ce394-4034-4ce3-b500-baff56fe0546', '836f3e95-a59b-4ec2-8461-932548ca3acd', 'f96a8e3f-f7d4-4b8d-8a5d-9c5c0bf1d0e0', '2d05d64f-24c6-4d1a-a682-e909f7fcccf9', '521ea2e6-3f9b-4e15-b94c-cb76fa6ca785', '189c02f5-7832-41a3-b6ec-dfa9d244e095', '5301e285-9d4b-4991-8f64-059bfd6bad13',
'7913a1bc-a010-474c-922a-22b0d552fb9c', '9163d9a1-945f-4b0f-9746-20eaf6a73de3', '9cfe315e-1497-41cf-b39c-deff6371f471', 'b3e960cd-6f6f-4da2-897c-ec30a9285e65', 'd714479c-d97d-4aad-9c71-0833ed049647', 'e6b71fe7-ecb3-4931-ad91-907dee42c561', '0b690459-804a-4f62-9625-077b0c3e21da',
'9038a474-aafe-4c67-94b2-72c9e387b206', 'acb4aaf3-8850-4b35-abc6-d0f8f6824905', 'b4adc1bb-a794-42ee-bd33-09646ea596e7', 'fdc4acd7-da9d-4dd0-b1ab-206432a5f781', '3ca61a46-2cef-4962-a481-f7ae6d7751eb']

# '7c0c0abf-f566-4a1f-a482-b74a991e1663' - Canceled due to Damar Hamlin's cardiac arrest

In [5]:
# Define which columns we want int he final dataframe, and in which order
all_columns = ['game_id','season_year','season_type','week','home_alias' ,'home_sr_id','away_alias' ,'away_sr_id', 'temp' ,'humidity' ,'wind_speed' ,'venue_surface' ,'venue_roof_type' ,'period' ,'event_id',
                'team_sequence','play_id','home_points' ,'away_points' ,'description','start_play_clock', 'start_play_clock_string', 'start_play_down' ,'start_play_yfd' ,'start_play_yardline', 'effective_start_play_yardline', 'start_play_field_side', 'inside_20' ,
                'goaltogo' ,'end_play_clock','end_play_clock_string', 'end_play_down','end_play_yfd', 'end_play_yardline', 'effective_end_play_yardline', 'end_play_field_side', 'firstdown','scoring_play','players_rushed','men_in_box','huddle',
                'hash_mark' ,'qb_at_snap' ,'left_tightends' ,'right_tightends', 'qb_name', 'pocket_location' ,'play_direction', 'screen_pass' ,'play_action' ,'run_pass_option' ,'pass_route' ,'fake_punt' ,'fake_field_goal',
                'defender_name', 'player_name' ,'player_jersey','player_position','player_sr_id','team_alias' ,'team_sr_id', 'blitz' ,'hurry' ,'knockdown' ,'pocket_time', 'pocket_time_string', 'on_target_throw' ,'batted_pass',
                'incompletion_type' ,'target' ,'reception' ,'yards' ,'att_yards' ,'yards_after_catch', 'yards_after_contact' ,'broken_tackles' ,'dropped','catchable', 'touchdown']

In [6]:
%%time

consolidated_sr_wr = pd.DataFrame(columns = all_columns)

for game in game_ids:
    url = f"/nfl/official/trial/v7/en/games/{game}/pbp.json?api_key={personal_key_removed}"
    conn.request("GET", url)
    res = conn.getresponse()
    data = res.read()
    json_data = json.loads(data.decode("utf-8"))

    # DICTIONARY UNNESTING
    ## Games
    games = pd.json_normalize(json_data, errors='ignore')
    games = games.drop(columns = ['status', 'scheduled', 'attendance', 'entry_mode', 'clock', 'quarter', 'conference_game', 'duration', 'periods', '_comment', 'weather.wind.direction', 'summary.season.id', 'summary.season.name', 'summary.week.id', 'summary.week.title',
                                  'summary.venue.id', 'summary.venue.name', 'summary.venue.city', 'summary.venue.state', 'summary.venue.country', 'summary.venue.zip', 'summary.venue.address', 'summary.venue.capacity', 'summary.venue.sr_id', 'summary.venue.location.lat', 
                                  'summary.venue.location.lng', 'summary.home.id', 'summary.home.name', 'summary.home.market', 'summary.home.sr_id', 'summary.home.used_timeouts', 'summary.home.remaining_timeouts', 'summary.home.points', 'summary.home.used_challenges',
                                  'summary.home.remaining_challenges', 'summary.home.record.wins', 'summary.home.record.losses', 'summary.home.record.ties', 'summary.away.id', 'summary.away.name', 'summary.away.market', 'summary.away.sr_id', 'summary.away.used_timeouts', 
                                  'summary.away.remaining_timeouts', 'summary.away.points', 'summary.away.used_challenges', 'summary.away.remaining_challenges', 'summary.away.record.wins', 'summary.away.record.losses', 'summary.away.record.ties'], errors = 'ignore')
    games = games.rename(columns={"id": "game_id"})
    
    # Periods
    periods = pd.json_normalize(json_data, record_path=['periods'], errors='ignore')
    periods = periods.drop(columns = ['id', 'sequence', 'scoring.home.name', 'scoring.home.market', 'scoring.home.points', 'scoring.away.name', 'scoring.away.market', 'scoring.away.points', 'coin_toss.home.outcome', 'coin_toss.home.decision', 'coin_toss.home.direction', 
                                      'coin_toss.away.outcome', 'coin_toss.away.decision', 'coin_toss.away.direction'], errors = 'ignore')

    
    # PBP
    periods_pbp_exploded = periods.explode('pbp')
    # Convert pbp column to its own flattened dataframe
    pbp = pd.json_normalize(periods_pbp_exploded['pbp'])
    # Delete unnecessary columns
    pbp = pbp.drop(columns = ['pbp', 'home_points', 'away_points', 'play_type', 'fake_punt', 'fake_field_goal', 'screen_pass', 'play_action', 'run_pass_option', 'statistics', 'details', 'start_situation.clock', 'start_situation.down', 'start_situation.yfd', 'start_situation.possession.id', 'start_situation.possession.name',
                          'start_situation.possession.market', 'start_situation.possession.alias', 'start_situation.possession.sr_id', 'start_situation.location.id', 'start_situation.location.name', 'start_situation.location.market', 'start_situation.location.alias', 'start_situation.location.sr_id', 'start_situation.location.yardline',
                          'end_situation.clock', 'end_situation.down', 'end_situation.yfd', 'end_situation.possession.id', 'end_situation.possession.name', 'end_situation.possession.market', 'end_situation.possession.alias', 'end_situation.possession.sr_id', 'end_situation.location.id', 'end_situation.location.name',
                          'end_situation.location.market', 'end_situation.location.alias', 'end_situation.location.sr_id', 'end_situation.location.yardline', 'event_type', 'description', 'type', 'inside_20', 'created_at', 'sequence', 'start_reason', 'end_reason', 'play_count', 'duration', 'first_downs', 'gain', 'penalty_yards', 
                          'scoring_drive', 'created_at', 'updated_at', 'start_clock', 'end_clock', 'first_drive_yardline', 'last_drive_yardline', 'net_yards', 'pat_successful', 'pat_points_attempted', 'offensive_team.points', 'offensive_team.id', 'defensive_team.points', 'defensive_team.id', 'clock', 'wall_clock', 'scoring_play', 
                          'scoring_description', 'hash_mark'], errors = 'ignore')
    pbp = pbp.rename(columns={"id": "event_id"})
    # Concatenate
    periods_pbp_exploded = periods_pbp_exploded.reset_index(drop = True)
    pbp = pbp.reset_index(drop = True)
    periods_pbp_exploded_pbp = pd.concat([periods_pbp_exploded, pbp], axis=1)

    
    # Events
    periods_pbp_exploded_pbp_events_exploded = periods_pbp_exploded_pbp.explode('events')
    # Convert events column to its owned flattened dataframe
    events_flattened = pd.json_normalize(periods_pbp_exploded_pbp_events_exploded['events'])
    events_flattened = events_flattened.drop(['events', 'event_type', 'blitz', 'goaltogo', 'sequence', 'created_at', 'updated_at', 'start_situation.possession.id', 'start_situation.possession.name', 'start_situation.possession.market', 'start_situation.location.id', 'start_situation.location.name', 'start_situation.location.market', 
                                              'score.sequence', 'score.clock', 'score.points', 'score.home_points', 'score.away_points', 'score.points-after-play.id', 'score.points-after-play.sequence', 'score.points-after-play.type', 'event_type', 'running_lane', 'end_situation.location.name','end_situation.location.market',
                                              'end_situation.possession.name', 'end_situation.possession.market', 'end_situation.location.id', 'end_situation.location.sr_id', 'start_situation.possession.sr_id', 'start_situation.location.sr_id', 'end_situation.possession.id', 'end_situation.possession.sr_id',
                                              'details', 'deleted'], axis = 1, errors = 'ignore')
    # Rename events `id` column to `play_id`
    events_flattened = events_flattened.rename(columns={"id": "play_id"})
    # Concatenate
    periods_pbp_exploded_pbp_events_exploded = periods_pbp_exploded_pbp_events_exploded.reset_index(drop = True)
    events_flattened = events_flattened.reset_index(drop = True)
    periods_pbp_exploded_pbp_events_exploded_events = pd.concat([periods_pbp_exploded_pbp_events_exploded, events_flattened], axis=1)

    
    # Statistics
    periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded = periods_pbp_exploded_pbp_events_exploded_events.explode('statistics')
    # Convert events column to its owned flattened dataframe
    statistics_flattened = pd.json_normalize(periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded['statistics'])
    statistics_flattened = statistics_flattened.drop(columns = ['statistics', 'missed_tackles', 'def_target', 'def_comp', 'tackle', 'hang_time', 'faircatch', 'nullified', 'sack', 'sack_yards', 'ast_sack', 'tlost', 'tlost_yards', 'ast_tlost', 'fumble', 'forced', 'own_rec', 'own_rec_yards', 'squib_kick', 'onside_attempt', 'onside_success', 
                                                                'play_category', 'forced_fumble', 'out_of_bounds', 'category', 'team.name', 'team.market', 'touchback', 'net_yards', 'kneel_down', 'scramble', 'ast_tackle', 'down', 'made', 'penalty', 'qb_hit', 'missed', 'endzone', 'return', 'pass_defended'], axis = 1, errors = 'ignore')
    # Concatenate
    periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded = periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded.reset_index(drop = True)
    statistics_flattened = statistics_flattened.reset_index(drop = True)
    periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded_statistics = pd.concat([periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded, statistics_flattened], axis=1)
    periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded_statistics.head()


    
    # MERGE DATAFRAMES
    games = games.reset_index(drop = True)
    periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded_statistics = periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded_statistics.reset_index(drop = True)
    sportradar = pd.concat([games, periods_pbp_exploded_pbp_events_exploded_events_statistics_exploded_statistics], axis=1)
    
    # Forward fill for game data
    columns_to_fill = ['game_id', 'weather.condition', 'weather.humidity', 'weather.temp', 'weather.wind.speed', 'summary.season.year', 'summary.season.type','summary.week.sequence', 'summary.venue.surface', 'summary.venue.roof_type', 'summary.home.alias', 'summary.away.alias']
    sportradar[columns_to_fill] = sportradar[columns_to_fill].ffill()


    
    # MODIFY DATASET
    # Filter for pass and receive plays
    sportradar = sportradar[
                            (sportradar['stat_type'] == 'pass') 
                            | (sportradar['stat_type'] == 'receive') 
                            | ((sportradar['stat_type'] == 'penalty') & (sportradar['description'].str.contains('defensive pass interference', case = False, na = False)))
                           ]
    # Create column for QB name
    sportradar['alias'] = sportradar.loc[sportradar['stat_type'] == 'pass']['player.name']
    sportradar['alias'] = sportradar['alias'].astype(str)
    # Create dummy column for defender name
    sportradar['name'] = sportradar.loc[sportradar['stat_type'] == 'penalty']['player.name']
    sportradar['name'] = sportradar['name'].apply(str)
    # Cast incompletion_type as string
    sportradar['incompletion_type'] = sportradar['incompletion_type'].astype(str)

    # TRANSFER PASSER- AND DEFENDER-SPECIFIC DATA TO RECEIVER-SPECIFIC ROWS
    # Extract passer data
    passer_data = sportradar[sportradar['stat_type'] == 'pass'].copy()
    # Rename passer columns
    passer_columns = {
        'complete': 'qb_complete', 
        'att_yards': 'qb_att_yards', 
        'blitz': 'qb_blitz',
        'hurry': 'qb_hurry',
        'knockdown': 'qb_knockdown',
        'pocket_time': 'qb_pocket_time',
        'on_target_throw': 'qb_on_target_throw',
        'batted_pass': 'qb_batted_pass',
        'incompletion_type': 'qb_incompletion_type',
        'alias': 'qb_name'
    }
    passer_data.rename(columns=passer_columns, inplace=True)          
    # Extract defender data
    defender_data = sportradar[sportradar['stat_type'] == 'penalty'].copy()
    # Rename defender columns (assuming 'name' represents defender's name)
    defender_data.rename(columns={'name': 'defender_name'}, inplace=True)
    # Extract receiver data
    sportradar_wr = sportradar[sportradar['stat_type'] == 'receive'].copy()
    # Merge receiver data with passer data
    sportradar_wr = sportradar_wr.merge(passer_data[['play_id', 'qb_complete', 'qb_att_yards', 'qb_blitz', 'qb_hurry', 'qb_knockdown', 'qb_pocket_time', 'qb_on_target_throw', 'qb_batted_pass', 'qb_incompletion_type', 'qb_name'
                                                    ]], on='play_id', how='left')
    # Merge receiver data with defender data
    sportradar_wr = sportradar_wr.merge(defender_data[['play_id', 'defender_name']], on='play_id', how='left')
    # Drop old passer-specific columns
    sportradar_wr.drop(['complete', 'att_yards', 'blitz', 'hurry', 'knockdown', 'pocket_time', 'on_target_throw', 'batted_pass', 'incompletion_type', 'alias', 'name'], axis=1, inplace=True, errors = 'ignore')

    # Replace reception nulls with 0
    sportradar_wr['reception'].fillna(0, inplace = True)
    # Delete columns that are not in all game datasets
    sportradar_wr = sportradar_wr.drop(columns = ['game_type','weather.condition', 'lost','opp_rec','opp_rec_yards','interception','int_touchdown','int_yards'], errors = 'ignore')
    # Delete superfluous columns
    sportradar_wr = sportradar_wr.drop(columns = ['type', 'play_type', 'stat_type', 'scoring.home.id', 'scoring.away.id', 'player.id', 'team.id', 'scoring.home.alias', 'scoring.away.alias', 'clock', 'wall_clock', 'scoring_description', 'attempt', 'start_situation_possession_alias',
                                                'end_situation_possession_alias',], errors = 'ignore')
    # Suffix removal
    sportradar_wr = sportradar_wr.rename(columns = {col: col.replace('weather.', '') for col in sportradar_wr.columns})
    sportradar_wr = sportradar_wr.rename(columns = {col: col.replace('summary.', '') for col in sportradar_wr.columns})
    sportradar_wr = sportradar_wr.rename(columns = {col: col.replace('scoring.', '') for col in sportradar_wr.columns})
    sportradar_wr = sportradar_wr.rename(columns = {col: col.replace('.', '_') for col in sportradar_wr.columns})
    # Overtime adjustment
    sportradar_wr.loc[(sportradar_wr['period_type'] == 'overtime') & (sportradar_wr['number'] == 1), 'number'] = 5
    sportradar_wr = sportradar_wr.drop(columns = ['period_type'], errors = 'ignore')
    # Rename columns
    sportradar_wr = sportradar_wr.rename(columns={'week_sequence': 'week', 'number': 'period', 'start_situation_clock': 'start_play_clock', 'start_situation_down': 'start_play_down', 'start_situation_yfd': 'start_play_yfd', 'start_situation_possession_alias': 'start_play_possession_alias', 'start_situation_location_yardline': 'start_play_yardline', 
                                              'start_situation_location_alias': 'start_play_field_side', 'end_situation_clock': 'end_play_clock', 'end_situation_down': 'end_play_down', 'end_situation_yfd': 'end_play_yfd', 'end_situation_possession_alias': 'end_play_possession_alias', 'end_situation_location_yardline': 'end_play_yardline', 
                                              'end_situation_location_alias': 'end_play_field_side', 'qb_blitz': 'blitz', 'qb_hurry': 'hurry', 'qb_knockdown': 'knockdown', 'qb_pocket_time': 'pocket_time', 'qb_complete': 'complete', 'qb_on_target_throw': 'on_target_throw', 'qb_batted_pass': 'batted_pass', 'qb_incompletion_type': 'incompletion_type', 'qb_att_yards': 'att_yards'})
    # For time display in string format
    sportradar_wr['start_play_clock_string'] = sportradar_wr['start_play_clock']
    sportradar_wr['end_play_clock_string'] = sportradar_wr['end_play_clock']
    sportradar_wr['pocket_time_string'] = sportradar_wr['pocket_time'].map('{:,.2f}'.format)
    # Yardline adjustment
    sportradar_wr['effective_start_play_yardline'] = sportradar_wr['start_play_yardline']
    sportradar_wr.loc[(sportradar_wr['team_alias']) == (sportradar_wr['start_play_field_side']), 'effective_start_play_yardline'] = 50 - sportradar_wr['start_play_yardline'] + 50
    sportradar_wr['effective_end_play_yardline'] = sportradar_wr['end_play_yardline']
    sportradar_wr.loc[(sportradar_wr['team_alias']) == (sportradar_wr['end_play_field_side']), 'effective_end_play_yardline'] = 50 - sportradar_wr['end_play_yardline'] + 50
    # To convert to timedelta
    sportradar_wr['start_play_clock'] = pd.to_timedelta('00:' + sportradar_wr['start_play_clock'])
    sportradar_wr['end_play_clock'] = pd.to_timedelta('00:' + sportradar_wr['end_play_clock'])
    # Re-order columns
    sportradar_wr = sportradar_wr.reindex(columns=all_columns)
    # Poorly thrown passes
    sportradar_wr.loc[(sportradar_wr['on_target_throw'] == 1) & (sportradar_wr['incompletion_type'] == 'Poorly Thrown'), 'on_target_throw'] = 0
    sportradar_wr.loc[(sportradar_wr['catchable'] == 1) & (sportradar_wr['incompletion_type'] == 'Poorly Thrown'), 'catchable'] = 0
    # Dropped passes
    sportradar_wr.loc[(sportradar_wr['on_target_throw'] == 0) & (sportradar_wr['incompletion_type'] == 'Dropped Pass'), 'on_target_throw'] = 1
    sportradar_wr.loc[(sportradar_wr['catchable'] == 0) & (sportradar_wr['incompletion_type'] == 'Dropped Pass'), 'catchable'] = 1


    
    # CREATED METRICS
    # Difficult catch
    sportradar_wr['difficult_attempt'] = False
    sportradar_wr.loc[(sportradar_wr['on_target_throw'] == 0) & (~sportradar_wr['description'].str.contains('penalty', case = False, na = False)), 'difficult_attempt'] = True
    sportradar_wr['difficult_catch'] = False
    sportradar_wr.loc[(sportradar_wr['on_target_throw'] == 0) & (sportradar_wr['reception'] == 1) & (~sportradar_wr['description'].str.contains('penalty', case = False, na = False)), 'difficult_catch'] = True
    # Adverse weather catch
    sportradar_wr['weather_attempt'] = False
    sportradar_wr.loc[(sportradar_wr['temp'] <= 32) | (sportradar_wr['wind_speed'] >= 10) | (sportradar_wr['humidity'] >= 70 ), 'weather_attempt'] = True
    sportradar_wr['weather_catch'] = False
    sportradar_wr.loc[((sportradar_wr['temp'] <= 32) | (sportradar_wr['wind_speed'] >= 10) | (sportradar_wr['humidity'] >= 70 )) & (sportradar_wr['reception'] == 1), 'weather_catch'] = True
    # QB bailout catch
    sportradar_wr['qb_bf_attempt'] = False
    sportradar_wr.loc[(
                        (sportradar_wr['pocket_location'] == 'Scramble Left') 
                        | (sportradar_wr['pocket_location'] == 'Scramble Right') 
                        | (sportradar_wr['hurry'] == True) 
                        | (sportradar_wr['blitz'] == True) 
                        | (sportradar_wr['knockdown'] == True)
                      ) , 'qb_bf_attempt'] = True
    sportradar_wr['qb_bf_catch'] = False
    sportradar_wr.loc[(
                        (sportradar_wr['pocket_location'] == 'Scramble Left') 
                        | (sportradar_wr['pocket_location'] == 'Scramble Right') 
                        | (sportradar_wr['hurry'] == True) 
                        | (sportradar_wr['blitz'] == True) 
                        | (sportradar_wr['knockdown'] == True)
                      ) 
                      & (sportradar_wr['reception'] == 1), 'qb_bf_catch'] = True
    # Clutch catch
    sportradar_wr['clutch_catch'] = False
    sportradar_wr.loc[(
                        (sportradar_wr['period'] >= 4) & (sportradar_wr['start_play_clock'] < pd.Timedelta(minutes=4)) 
                        & (
                          ((sportradar_wr['team_alias'] == sportradar_wr['home_alias']) & (abs(sportradar_wr['away_points'] - sportradar_wr['home_points']) <= 8))
                          | 
                          ((sportradar_wr['team_alias'] == sportradar_wr['away_alias']) & (abs(sportradar_wr['home_points'] - sportradar_wr['away_points']) <= 8))
                          )
                        & (
                            (sportradar_wr['firstdown'] == 1) | (sportradar_wr['touchdown'] == 1)
                          )
                      ) & (sportradar_wr['reception'] == 1), 'clutch_catch'
                    ] = True
    # Conversion catch
    sportradar_wr.loc[(
                        (
                            ((sportradar_wr['start_play_down'] >= 3) & (sportradar_wr['firstdown'] == 1)) | ((sportradar_wr['start_play_down'] >= 3) & (sportradar_wr['touchdown'] == 1))
                        )
                      ) & (sportradar_wr['reception'] == 1), 'conversion_catch'] = True
    # Red zone touchdown catch
    sportradar_wr.loc[(
                        (
                            (sportradar_wr['inside_20'] == 1) & (sportradar_wr['touchdown'] == 1)
                        )
                      ) & (sportradar_wr['reception'] == 1), 'redzone_catch'] = True
    # Catches by route
    routes = ['Cross', 'Curl', 'Post', 'Underneath Screen', 'Flat', 'Slant', 'WR Screen', 'Comeback', 'Go', 'In']
    for r in routes:
        sportradar_wr[r.replace(' ', '_').lower() + '_attempt'] = False
        sportradar_wr.loc[ (sportradar_wr['pass_route'] == r), r.replace(' ', '_').lower() + '_attempt'] = True
    for r in routes:
        sportradar_wr[r.replace(' ', '_').lower() + '_catch'] = False
        sportradar_wr.loc[ (sportradar_wr['pass_route'] == r) & (sportradar_wr['reception'] == 1), r.replace(' ', '_').lower() + '_catch'] = True
    # Deep catch
    sportradar_wr['deep_attempt'] = False
    sportradar_wr.loc[ (sportradar_wr['att_yards'] >= 20), 'deep_attempt'] = True
    sportradar_wr['deep_catch'] = False
    sportradar_wr.loc[ (sportradar_wr['att_yards'] >= 20) & (sportradar_wr['reception'] == 1), 'deep_catch'] = True
    # Large YAC catch
    sportradar_wr['large_yac_catch'] = False
    sportradar_wr.loc[ (sportradar_wr['yards_after_catch'] >= 10) & (sportradar_wr['reception'] == 1), 'large_yac_catch'] = True
    # Play action catch
    sportradar_wr['play_action_attempt'] = False
    sportradar_wr.loc[ (sportradar_wr['play_action'] == True), 'play_action_attempt'] = True
    sportradar_wr['play_action_catch'] = False
    sportradar_wr.loc[ (sportradar_wr['play_action'] == True) & (sportradar_wr['reception'] == 1), 'play_action_catch'] = True
    # RPO catch
    sportradar_wr['rpo_attempt'] = False
    sportradar_wr.loc[ (sportradar_wr['run_pass_option'] == True), 'rpo_attempt'] = True
    sportradar_wr['rpo_catch'] = False
    sportradar_wr.loc[ (sportradar_wr['run_pass_option'] == True) & (sportradar_wr['reception'] == 1), 'rpo_catch'] = True
    # Tackle-breaker catch
    sportradar_wr['tackle_breaker_catch'] = False
    sportradar_wr.loc[ (sportradar_wr['broken_tackles'] >= 1) & (sportradar_wr['reception'] == 1), 'tackle_breaker_catch'] = True
    # Beast catch
    sportradar_wr['beast_catch'] = False
    sportradar_wr.loc[ (sportradar_wr['yards_after_contact'] >= 10) & (sportradar_wr['reception'] == 1), 'beast_catch'] = True
    # Hurry-up catch
    sportradar_wr['hurry_up_attempt'] = False
    sportradar_wr.loc[ (sportradar_wr['huddle'] == 'No Huddle'), 'hurry_up_attempt'] = True
    sportradar_wr['hurry_up_catch'] = False
    sportradar_wr.loc[ (sportradar_wr['huddle'] == 'No Huddle') & (sportradar_wr['reception'] == 1), 'hurry_up_catch'] = True
    # Deep sideline catch
    sportradar_wr['deep_sideline_attempt'] = False
    sportradar_wr.loc[(
                        (sportradar_wr['play_direction'] == 'Right Sideline') | (sportradar_wr['play_direction'] == 'Left Sideline')
                      )
                      & (sportradar_wr['att_yards'] >= 20)
                      , 'deep_sideline_attempt'] = True
    sportradar_wr['deep_sideline_catch'] = False
    sportradar_wr.loc[(
                        (sportradar_wr['play_direction'] == 'Right Sideline') | (sportradar_wr['play_direction'] == 'Left Sideline')
                      )
                      & (sportradar_wr['att_yards'] >= 20)
                      & (sportradar_wr['reception'] == 1), 'deep_sideline_catch'] = True
    # DPI drawn
    sportradar_wr['dpi_drawn'] = False
    sportradar_wr.loc[sportradar_wr['defender_name'].notnull(), 'dpi_drawn'] = True
    # Possession saver catch
    sportradar_wr['possession_saver_attempt'] = False
    sportradar_wr.loc[(sportradar_wr['att_yards'] >= sportradar_wr['start_play_yfd']), 'possession_saver_attempt'] = True    
    sportradar_wr['possession_saver_catch'] = False
    sportradar_wr.loc[(sportradar_wr['att_yards'] >= sportradar_wr['start_play_yfd']) & (sportradar_wr['reception'] == 1), 'possession_saver_catch'] = True


    
    # APPEND TO CONSOLIDATED DF
    consolidated_sr_wr = pd.concat([consolidated_sr_wr, sportradar_wr])

CPU times: user 34.2 s, sys: 238 ms, total: 34.4 s
Wall time: 10min 35s


In [7]:
consolidated_sr_wr = consolidated_sr_wr.reset_index(drop = True)

# RESTART POINT

In [8]:
# Max column width so we can read play descriptions
pd.set_option('display.max_colwidth', None)

### Trick plays with multiple rows

In [9]:
consolidated_sr_wr['play_id'].nunique(), consolidated_sr_wr['play_id'].shape

(18231, (18299,))

In [10]:
consolidated_sr_wr.groupby("play_id").filter(lambda x: len(x) > 1)['play_id'].nunique()

18

In [11]:
consolidated_sr_wr.groupby("play_id").filter(lambda x: len(x) > 1)[['game_id', 'play_id', 'period', 'start_play_clock_string', 'qb_name', 'pocket_location', 
                                                            'hurry', 'blitz', 'knockdown', 'players_rushed', 'pocket_time_string', 'pass_route', 'player_name', 
                                                            'att_yards', 'reception', 'yards', 'yards_after_catch', 'on_target_throw', 'description']]

Unnamed: 0,game_id,play_id,period,start_play_clock_string,qb_name,pocket_location,hurry,blitz,knockdown,players_rushed,pocket_time_string,pass_route,player_name,att_yards,reception,yards,yards_after_catch,on_target_throw,description
583,c4a110f2-847b-4710-9a54-7be66fdb3c99,859d8fa0-320e-11ed-a5d2-6f00292b4fe8,4,00:08,Baker Mayfield,Middle,0,0,0,2,,Cross,Christian McCaffrey,,1,1.0,0.0,0,B.Mayfield pass short middle complete to CAR 26. Catch made by C.McCaffrey at CAR 26. Gain of 1 yards. Lateral to D.Moore to CAR 22 for yards. Tackled by M.Garrett at CAR 25.
584,c4a110f2-847b-4710-9a54-7be66fdb3c99,859d8fa0-320e-11ed-a5d2-6f00292b4fe8,4,00:08,Baker Mayfield,Middle,0,0,0,2,1.93,Cross,Christian McCaffrey,1.0,1,1.0,0.0,1,B.Mayfield pass short middle complete to CAR 26. Catch made by C.McCaffrey at CAR 26. Gain of 1 yards. Lateral to D.Moore to CAR 22 for yards. Tackled by M.Garrett at CAR 25.
585,c4a110f2-847b-4710-9a54-7be66fdb3c99,859d8fa0-320e-11ed-a5d2-6f00292b4fe8,4,00:08,Baker Mayfield,Middle,0,0,0,2,,Cross,DJ Moore,,0,-4.0,,0,B.Mayfield pass short middle complete to CAR 26. Catch made by C.McCaffrey at CAR 26. Gain of 1 yards. Lateral to D.Moore to CAR 22 for yards. Tackled by M.Garrett at CAR 25.
586,c4a110f2-847b-4710-9a54-7be66fdb3c99,859d8fa0-320e-11ed-a5d2-6f00292b4fe8,4,00:08,Baker Mayfield,Middle,0,0,0,2,1.93,Cross,DJ Moore,1.0,0,-4.0,,1,B.Mayfield pass short middle complete to CAR 26. Catch made by C.McCaffrey at CAR 26. Gain of 1 yards. Lateral to D.Moore to CAR 22 for yards. Tackled by M.Garrett at CAR 25.
2358,cf7291ef-cb13-4d5d-882e-0221330aafeb,45a01ff0-3aef-11ed-abfe-93196c8dd10a,4,00:09,Mitch Trubisky,Middle,0,0,0,2,,Curl,Najee Harris,,0,-4.0,,0,"M.Trubisky pass short left complete to PIT 18. Catch made by D.Johnson at PIT 18. Gain of 8 yards. Lateral to C.Claypool to PIT 6 for yards. Lateral to N.Harris to PIT 2 for yards. N.Harris FUMBLES. Fumble RECOVERED by CLE-D.Ward at PIT End Zone. D.Ward for yards, TOUCHDOWN."
2359,cf7291ef-cb13-4d5d-882e-0221330aafeb,45a01ff0-3aef-11ed-abfe-93196c8dd10a,4,00:09,Mitch Trubisky,Middle,0,0,0,2,2.82,Curl,Najee Harris,14.0,0,-4.0,,1,"M.Trubisky pass short left complete to PIT 18. Catch made by D.Johnson at PIT 18. Gain of 8 yards. Lateral to C.Claypool to PIT 6 for yards. Lateral to N.Harris to PIT 2 for yards. N.Harris FUMBLES. Fumble RECOVERED by CLE-D.Ward at PIT End Zone. D.Ward for yards, TOUCHDOWN."
2360,cf7291ef-cb13-4d5d-882e-0221330aafeb,45a01ff0-3aef-11ed-abfe-93196c8dd10a,4,00:09,Mitch Trubisky,Middle,0,0,0,2,,Curl,Chase Claypool,,0,-6.0,,0,"M.Trubisky pass short left complete to PIT 18. Catch made by D.Johnson at PIT 18. Gain of 8 yards. Lateral to C.Claypool to PIT 6 for yards. Lateral to N.Harris to PIT 2 for yards. N.Harris FUMBLES. Fumble RECOVERED by CLE-D.Ward at PIT End Zone. D.Ward for yards, TOUCHDOWN."
2361,cf7291ef-cb13-4d5d-882e-0221330aafeb,45a01ff0-3aef-11ed-abfe-93196c8dd10a,4,00:09,Mitch Trubisky,Middle,0,0,0,2,2.82,Curl,Chase Claypool,14.0,0,-6.0,,1,"M.Trubisky pass short left complete to PIT 18. Catch made by D.Johnson at PIT 18. Gain of 8 yards. Lateral to C.Claypool to PIT 6 for yards. Lateral to N.Harris to PIT 2 for yards. N.Harris FUMBLES. Fumble RECOVERED by CLE-D.Ward at PIT End Zone. D.Ward for yards, TOUCHDOWN."
2362,cf7291ef-cb13-4d5d-882e-0221330aafeb,45a01ff0-3aef-11ed-abfe-93196c8dd10a,4,00:09,Mitch Trubisky,Middle,0,0,0,2,,Curl,Diontae Johnson,,1,8.0,-6.0,0,"M.Trubisky pass short left complete to PIT 18. Catch made by D.Johnson at PIT 18. Gain of 8 yards. Lateral to C.Claypool to PIT 6 for yards. Lateral to N.Harris to PIT 2 for yards. N.Harris FUMBLES. Fumble RECOVERED by CLE-D.Ward at PIT End Zone. D.Ward for yards, TOUCHDOWN."
2363,cf7291ef-cb13-4d5d-882e-0221330aafeb,45a01ff0-3aef-11ed-abfe-93196c8dd10a,4,00:09,Mitch Trubisky,Middle,0,0,0,2,2.82,Curl,Diontae Johnson,14.0,1,8.0,-6.0,1,"M.Trubisky pass short left complete to PIT 18. Catch made by D.Johnson at PIT 18. Gain of 8 yards. Lateral to C.Claypool to PIT 6 for yards. Lateral to N.Harris to PIT 2 for yards. N.Harris FUMBLES. Fumble RECOVERED by CLE-D.Ward at PIT End Zone. D.Ward for yards, TOUCHDOWN."


In [12]:
trick_plays = consolidated_sr_wr.groupby("play_id").filter(lambda x: len(x) > 1)
# trick_plays.to_csv('/mnt/c/Data_Science/Personal_Projects/nfl_wr_knn/trick_plays.csv', index=False)

In [13]:
trick_plays[trick_plays['pocket_time_string'] == 'nan'].index

Index([  583,   585,  2358,  2360,  2362,  3712,  3714,  3716,  4433,  4435,
        4437,  4439,  4928,  4930,  4932,  6230,  6232,  6234,  6888,  6890,
        6892, 10292, 10294, 11093, 11095, 13130, 13132, 13287, 13289, 13380,
       13382, 13476, 13478, 16869, 16871, 16873, 17690, 17692, 17694, 18096,
       18293, 18295, 18297],
      dtype='int64')

In [14]:
consolidated_sr_wr = consolidated_sr_wr[~consolidated_sr_wr.index.isin([583, 585, 2358, 2360, 2362,  3712,  3714,  3716,  4433,  4435, 4437,  4439,  4928,  4930,  4932,  6230,  6232,  6234,  6888,  6890, 6892, 10292, 10294, 11093, 11095,
                                                                13130, 13132, 13287, 13289, 13380, 13382, 13476, 13478, 16869, 16871, 16873, 17690, 17692, 17694, 18096, 18293, 18295, 18297])]

In [15]:
consolidated_sr_wr.shape

(18256, 124)

# AGGREGATE METRICS

In [1013]:
pd.set_option('display.max_colwidth', None)
# pd.reset_option('display.max_colwidth')

# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_seq_items', None)

In [1015]:
adot = consolidated_sr_wr.groupby(['player_name'])['att_yards'].mean().reset_index()
adot.rename(columns={'att_yards': 'adot'}, inplace = True)
consolidated_sr_wr = consolidated_sr_wr.merge(adot, on = 'player_name', how = 'left')

avg_yac = consolidated_sr_wr.groupby(['player_name'])['yards_after_catch'].mean().reset_index()
avg_yac.rename(columns={'yards_after_catch': 'avg_yac'}, inplace = True)
consolidated_sr_wr = consolidated_sr_wr.merge(avg_yac, on = 'player_name', how = 'left')

avg_yacon = consolidated_sr_wr.groupby(['player_name'])['yards_after_contact'].mean().reset_index()
avg_yacon.rename(columns={'yards_after_contact': 'avg_yacon'}, inplace = True)
consolidated_sr_wr = consolidated_sr_wr.merge(avg_yacon, on = 'player_name', how = 'left')

In [1016]:
# consolidated_sr_wr[['player_name', 'adot', 'avg_yac', 'avg_yacon']].head()
new_cols = consolidated_sr_wr.groupby(['player_name', 'season_year'])[['adot', 'avg_yac', 'avg_yacon']].min()

In [1017]:
aggregate = consolidated_sr_wr.groupby(['player_name', 'player_position', 'season_year'])[['reception', 'yards', 'att_yards', 'yards_after_catch', 'yards_after_contact', 'broken_tackles', 'catchable', 'touchdown', 'difficult_attempt', 'difficult_catch', 'weather_attempt', 'weather_catch', 'qb_bf_attempt', 'qb_bf_catch', 'clutch_catch', 
                                        'conversion_catch', 'redzone_catch', 'cross_attempt', 'cross_catch', 'curl_attempt', 'curl_catch', 'post_attempt', 'post_catch', 'underneath_screen_attempt', 'underneath_screen_catch', 'flat_attempt', 'flat_catch', 'slant_attempt', 'slant_catch', 'wr_screen_attempt', 
                                        'wr_screen_catch', 'comeback_attempt', 'comeback_catch', 'go_attempt', 'go_catch', 'in_attempt', 'in_catch', 'deep_attempt', 'deep_catch', 'large_yac_catch', 'play_action_attempt', 'play_action_catch', 'rpo_attempt', 'rpo_catch', 'tackle_breaker_catch', 'beast_catch', 
                                        'hurry_up_attempt', 'hurry_up_catch', 'deep_sideline_attempt', 'deep_sideline_catch', 'dpi_drawn', 'possession_saver_attempt', 'possession_saver_catch']].sum().sort_values('yards', ascending = False).reset_index()

In [1018]:
aggregate.head()

Unnamed: 0,player_name,player_position,season_year,reception,yards,att_yards,yards_after_catch,yards_after_contact,broken_tackles,catchable,touchdown,difficult_attempt,difficult_catch,weather_attempt,weather_catch,qb_bf_attempt,qb_bf_catch,clutch_catch,conversion_catch,redzone_catch,cross_attempt,cross_catch,curl_attempt,curl_catch,post_attempt,post_catch,underneath_screen_attempt,underneath_screen_catch,flat_attempt,flat_catch,slant_attempt,slant_catch,wr_screen_attempt,wr_screen_catch,comeback_attempt,comeback_catch,go_attempt,go_catch,in_attempt,in_catch,deep_attempt,deep_catch,large_yac_catch,play_action_attempt,play_action_catch,rpo_attempt,rpo_catch,tackle_breaker_catch,beast_catch,hurry_up_attempt,hurry_up_catch,deep_sideline_attempt,deep_sideline_catch,dpi_drawn,possession_saver_attempt,possession_saver_catch
0,Justin Jefferson,WR,2022.0,130.0,1809.0,1858.0,624.0,132.0,6.0,6.0,8.0,48,1,130,86,66,40,8,27,6,5,4,18,11,12,10,0,0,16,14,12,10,12,11,18,9,21,8,14,12,27,16,22,44,31,4,2,6,1,33,23,20,11,4,106,64
1,Tyreek Hill,WR,2022.0,124.0,1710.0,2107.0,482.0,58.0,5.0,8.0,7.0,38,0,132,88,46,33,3,22,2,16,13,18,15,19,8,0,0,7,7,18,13,14,12,17,16,23,12,9,7,38,20,17,69,50,25,15,4,0,5,5,15,10,4,111,71
2,Davante Adams,WR,2022.0,104.0,1516.0,2129.0,493.0,95.0,9.0,7.0,14.0,64,0,91,52,68,31,4,23,4,6,6,21,12,15,7,0,0,8,8,14,6,7,6,17,6,35,14,20,16,36,14,16,36,23,8,6,8,1,9,5,20,8,4,100,45
3,A.J. Brown,WR,2022.0,94.0,1496.0,1754.0,548.0,192.0,7.0,6.0,11.0,45,0,75,49,56,30,2,17,4,4,4,16,10,2,1,1,1,6,5,35,26,6,5,13,6,22,9,14,8,28,13,18,47,27,43,32,7,4,24,12,21,11,1,84,44
4,Stefon Diggs,WR,2022.0,111.0,1429.0,1729.0,419.0,100.0,2.0,8.0,11.0,36,1,126,81,58,35,5,17,7,5,3,20,17,12,6,2,2,15,13,22,18,9,9,14,8,16,9,10,6,23,12,12,46,37,26,20,2,2,16,12,14,8,3,87,55


In [1019]:
# aggregate = aggregate.merge(consolidated_sr_wr[['adot', 'avg_yac', 'avg_yacon']], on = 'player_name', how = 'left')
aggregate = aggregate.merge(new_cols, on = 'player_name', how = 'inner')

### Aggregation Prep

#### New columns in Sportradar

In [1020]:
# aggregate['catch_rate'] = aggregate['reception'] / aggregate['target'] -- Calculate with PFF receptions. Sportradar receptions are slightly off.
aggregate['cross_pct'] = aggregate['cross_catch'] / aggregate['cross_attempt']
aggregate['curl_pct'] = aggregate['curl_catch'] / aggregate['curl_attempt']
aggregate['post_pct'] = aggregate['post_catch'] / aggregate['post_attempt']
aggregate['underneath_screen_pct'] = aggregate['underneath_screen_catch'] / aggregate['underneath_screen_attempt']
aggregate['flat_pct'] = aggregate['flat_catch'] / aggregate['flat_attempt']
aggregate['slant_pct'] = aggregate['slant_catch'] / aggregate['slant_attempt']
aggregate['wr_screen_pct'] = aggregate['wr_screen_catch'] / aggregate['wr_screen_attempt']
aggregate['comeback_pct'] = aggregate['comeback_catch'] / aggregate['comeback_attempt']
aggregate['go_pct'] = aggregate['go_catch'] / aggregate['go_attempt']
aggregate['in_pct'] = aggregate['in_catch'] / aggregate['in_attempt']
aggregate['deep_pct'] = aggregate['deep_catch'] / aggregate['deep_attempt']
aggregate['play_action_pct'] = aggregate['play_action_catch'] / aggregate['play_action_attempt']
aggregate['rpo_pct'] = aggregate['rpo_catch'] / aggregate['rpo_attempt']
aggregate['hurry_up_pct'] = aggregate['hurry_up_catch'] / aggregate['hurry_up_attempt']
aggregate['deep_sideline_pct'] = aggregate['deep_sideline_catch'] / aggregate['deep_sideline_attempt']
aggregate['possession_saver_pct'] = aggregate['possession_saver_catch'] / aggregate['possession_saver_attempt']

### PFF

In [1021]:
pff_wr_grades_2022 = pd.read_csv('/mnt/c/Data_Science/Personal_Projects/nfl_wr_knn/pff_csv_exports/pff_2022_wr_summary.csv')

#### New PFF columns

In [1022]:
pff_wr_grades_2022['inline_rate'] = pff_wr_grades_2022['inline_snaps'] / pff_wr_grades_2022['pass_plays']
pff_wr_grades_2022['pass_block_rate'] = pff_wr_grades_2022['pass_blocks'] / pff_wr_grades_2022['pass_plays']
pff_wr_grades_2022['route_rate'] = pff_wr_grades_2022['routes'] / pff_wr_grades_2022['pass_plays']
pff_wr_grades_2022['slot_rate'] = pff_wr_grades_2022['slot_snaps'] / pff_wr_grades_2022['pass_plays']
pff_wr_grades_2022['wide_rate'] = pff_wr_grades_2022['wide_snaps'] / pff_wr_grades_2022['pass_plays']
pff_wr_grades_2022['contested_catch_rate'] = pff_wr_grades_2022['contested_receptions'] / pff_wr_grades_2022['contested_targets']

In [1023]:
pff_wr_grades_2022 = pff_wr_grades_2022 [['player', 'player_game_count', 'targets', 'receptions', 'contested_receptions', 'contested_targets', 'contested_catch_rate', 'drop_rate', 'drops', 'fumbles', 'grades_pass_block', 'pass_block_rate', 
                                          'inline_snaps', 'inline_rate', 'pass_blocks', 'pass_plays', 'routes', 'route_rate', 'slot_snaps', 'slot_rate', 'wide_snaps', 'wide_rate']]

In [1024]:
pff_wr_grades_2022 = pff_wr_grades_2022.rename(columns={"player": "player_name"})

#### SR player renaming

In [1025]:
aggregate.loc[aggregate['player_name'] == 'Darrell Henderson Jr.', 'player_name'] = 'Darrell Henderson'
aggregate.loc[aggregate['player_name'] == 'Brian Robinson Jr.', 'player_name'] = 'Brian Robinson'
aggregate.loc[aggregate['player_name'] == 'Tyron Billy-Johnson', 'player_name'] = 'Tyron Johnson'
aggregate.loc[aggregate['player_name'] == 'Ty Montgomery II', 'player_name'] = 'Ty Montgomery'

#### PFF player renaming

In [1026]:
pff_wr_grades_2022.loc[pff_wr_grades_2022['player_name'] == 'D.K. Metcalf', 'player_name'] = 'DK Metcalf'
pff_wr_grades_2022.loc[pff_wr_grades_2022['player_name'] == 'Mecole Hardman Jr.', 'player_name'] = 'Mecole Hardman'
pff_wr_grades_2022.loc[pff_wr_grades_2022['player_name'] == 'David Sills V', 'player_name'] = 'David Sills'
pff_wr_grades_2022.loc[pff_wr_grades_2022['player_name'] == 'James Proche II', 'player_name'] = 'James Proche'
pff_wr_grades_2022.loc[pff_wr_grades_2022['player_name'] == 'D\'Wayne Eskridge', 'player_name'] = 'Dee Eskridge'
pff_wr_grades_2022.loc[pff_wr_grades_2022['player_name'] == 'Tony Jones Jr.', 'player_name'] = 'Tony Jones'
pff_wr_grades_2022.loc[pff_wr_grades_2022['player_name'] == 'Demetric Felton Jr.', 'player_name'] = 'Demetric Felton'

In [1027]:
aggregate = aggregate.merge(pff_wr_grades_2022, how = 'left', on = 'player_name')
aggregate.head()

Unnamed: 0,player_name,player_position,season_year,reception,yards,att_yards,yards_after_catch,yards_after_contact,broken_tackles,catchable,touchdown,difficult_attempt,difficult_catch,weather_attempt,weather_catch,qb_bf_attempt,qb_bf_catch,clutch_catch,conversion_catch,redzone_catch,cross_attempt,cross_catch,curl_attempt,curl_catch,post_attempt,post_catch,underneath_screen_attempt,underneath_screen_catch,flat_attempt,flat_catch,slant_attempt,slant_catch,wr_screen_attempt,wr_screen_catch,comeback_attempt,comeback_catch,go_attempt,go_catch,in_attempt,in_catch,deep_attempt,deep_catch,large_yac_catch,play_action_attempt,play_action_catch,rpo_attempt,rpo_catch,tackle_breaker_catch,beast_catch,hurry_up_attempt,hurry_up_catch,deep_sideline_attempt,deep_sideline_catch,dpi_drawn,possession_saver_attempt,possession_saver_catch,adot,avg_yac,avg_yacon,cross_pct,curl_pct,post_pct,underneath_screen_pct,flat_pct,slant_pct,wr_screen_pct,comeback_pct,go_pct,in_pct,deep_pct,play_action_pct,rpo_pct,hurry_up_pct,deep_sideline_pct,possession_saver_pct,player_game_count,targets,receptions,contested_receptions,contested_targets,contested_catch_rate,drop_rate,drops,fumbles,grades_pass_block,pass_block_rate,inline_snaps,inline_rate,pass_blocks,pass_plays,routes,route_rate,slot_snaps,slot_rate,wide_snaps,wide_rate
0,Justin Jefferson,WR,2022.0,130.0,1809.0,1858.0,624.0,132.0,6.0,6.0,8.0,48,1,130,86,66,40,8,27,6,5,4,18,11,12,10,0,0,16,14,12,10,12,11,18,9,21,8,14,12,27,16,22,44,31,4,2,6,1,33,23,20,11,4,106,64,9.43,4.8,2.28,0.8,0.61,0.83,,0.88,0.83,0.92,0.5,0.38,0.86,0.59,0.7,0.5,0.7,0.55,0.6,17.0,176.0,128.0,22.0,39.0,0.56,5.2,7.0,0.0,,0.0,0.0,0.0,0.0,736.0,690.0,0.94,218.0,0.3,511.0,0.69
1,Tyreek Hill,WR,2022.0,124.0,1710.0,2107.0,482.0,58.0,5.0,8.0,7.0,38,0,132,88,46,33,3,22,2,16,13,18,15,19,8,0,0,7,7,18,13,14,12,17,16,23,12,9,7,38,20,17,69,50,25,15,4,0,5,5,15,10,4,111,71,11.51,3.89,0.95,0.81,0.83,0.42,,1.0,0.72,0.86,0.94,0.52,0.78,0.53,0.72,0.6,1.0,0.67,0.64,17.0,167.0,119.0,13.0,25.0,0.52,3.3,4.0,1.0,,0.0,7.0,0.01,0.0,568.0,534.0,0.94,239.0,0.42,309.0,0.54
2,Davante Adams,WR,2022.0,104.0,1516.0,2129.0,493.0,95.0,9.0,7.0,14.0,64,0,91,52,68,31,4,23,4,6,6,21,12,15,7,0,0,8,8,14,6,7,6,17,6,35,14,20,16,36,14,16,36,23,8,6,8,1,9,5,20,8,4,100,45,11.26,4.74,1.06,1.0,0.57,0.47,,1.0,0.43,0.86,0.35,0.4,0.8,0.39,0.64,0.75,0.56,0.4,0.45,17.0,168.0,100.0,15.0,34.0,0.44,5.7,6.0,1.0,66.1,0.0,0.0,0.0,1.0,657.0,618.0,0.94,197.0,0.3,457.0,0.7
3,A.J. Brown,WR,2022.0,94.0,1496.0,1754.0,548.0,192.0,7.0,6.0,11.0,45,0,75,49,56,30,2,17,4,4,4,16,10,2,1,1,1,6,5,35,26,6,5,13,6,22,9,14,8,28,13,18,47,27,43,32,7,4,24,12,21,11,1,84,44,11.24,5.83,3.92,1.0,0.62,0.5,1.0,0.83,0.74,0.83,0.46,0.41,0.57,0.46,0.57,0.74,0.5,0.52,0.52,17.0,137.0,88.0,15.0,30.0,0.5,6.4,6.0,2.0,,0.0,0.0,0.0,0.0,611.0,578.0,0.95,157.0,0.26,453.0,0.74
4,Stefon Diggs,WR,2022.0,111.0,1429.0,1729.0,419.0,100.0,2.0,8.0,11.0,36,1,126,81,58,35,5,17,7,5,3,20,17,12,6,2,2,15,13,22,18,9,9,14,8,16,9,10,6,23,12,12,46,37,26,20,2,2,16,12,14,8,3,87,55,10.42,3.77,2.56,0.6,0.85,0.5,1.0,0.87,0.82,1.0,0.57,0.56,0.6,0.52,0.8,0.77,0.75,0.57,0.63,16.0,149.0,108.0,12.0,24.0,0.5,7.7,9.0,1.0,,0.0,0.0,0.0,0.0,607.0,573.0,0.94,207.0,0.34,400.0,0.66


In [1028]:
aggregate['catch_rate'] = aggregate['receptions'] / aggregate['targets']
aggregate['yprr'] = aggregate['yards'] / aggregate['routes']

## NFL Combine

In [1029]:
import nfl_data_py as nfl

In [1030]:
combine = nfl.import_combine_data()

In [1033]:
combine.loc[combine['player_name'] == 'D.J. Moore', 'player_name'] = 'DJ Moore'
combine.loc[combine['player_name'] == 'Mark Ingram', 'player_name'] = 'Mark Ingram II'
combine.loc[combine['player_name'] == 'Marvin Jones', 'player_name'] = 'Marvin Jones Jr.'
combine.loc[combine['player_name'] == 'Allen Robinson', 'player_name'] = 'Allen Robinson II'
combine.loc[combine['player_name'] == 'Melvin Gordon', 'player_name'] = 'Melvin Gordon III'
combine.loc[combine['player_name'] == 'Ty Montgomery', 'player_name'] = 'Ty Montgomery II'
combine.loc[combine['player_name'] == 'D.J. Chark', 'player_name'] = 'DJ Chark Jr.'
combine.loc[combine['player_name'] == 'Ray-ray McCloud', 'player_name'] = 'Ray-Ray McCloud III'
combine.loc[combine['player_name'] == 'Trequan Smith', 'player_name'] = 'Tre\'Quan Smith'
combine.loc[combine['player_name'] == 'Jeff Wilson', 'player_name'] = 'Jeff Wilson Jr.'
combine.loc[combine['player_name'] == 'Darrell Henderson', 'player_name'] = 'Darrell Henderson Jr.'
combine.loc[combine['player_name'] == 'Irv Smith', 'player_name'] = 'Irv Smith Jr.'
combine.loc[combine['player_name'] == 'Benny Snell', 'player_name'] = 'Benny Snell Jr.'
combine.loc[combine['player_name'] == 'Gabriel Davis', 'player_name'] = 'Gabe Davis'
combine.loc[combine['player_name'] == 'Anthony McFarland', 'player_name'] = 'Anthony McFarland Jr.'
combine.loc[combine['player_name'] == 'Michael Pittman', 'player_name'] = 'Michael Pittman Jr.'
combine.loc[combine['player_name'] == 'Travis Etienne', 'player_name'] = 'Travis Etienne Jr.'
combine.loc[combine['player_name'] == 'Larry Rountree', 'player_name'] = 'Larry Rountree III'
combine.loc[combine['player_name'] == 'Velus Jones', 'player_name'] = 'Velus Jones Jr.'
combine.loc[combine['player_name'] == 'Brian Robinson', 'player_name'] = 'Brian Robinson Jr.'
combine.loc[combine['player_name'] == 'Pierre Strong', 'player_name'] = 'Pierre Strong Jr.'

In [1034]:
combine.head()

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player_name,pos,school,ht,wt,forty,bench,vertical,broad_jump,cone,shuttle
0,2000,2000.0,New York Jets,1.0,13.0,AbraJo00,,John Abraham,OLB,South Carolina,6-4,252.0,4.55,,,,,
1,2000,2000.0,Seattle Seahawks,1.0,19.0,AlexSh00,shaun-alexander-1,Shaun Alexander,RB,Alabama,6-0,218.0,4.58,,,,,
2,2000,2000.0,Kansas City Chiefs,6.0,188.0,AlfoDa20,,Darnell Alford,OT,Boston Col.,6-4,334.0,5.56,23.0,25.0,94.0,8.48,4.98
3,2000,,,,,,,Kyle Allamon,TE,Texas Tech,6-2,253.0,4.97,,29.0,104.0,7.29,4.49
4,2000,2000.0,Carolina Panthers,1.0,23.0,AndeRa21,,Rashard Anderson,CB,Jackson State,6-2,206.0,4.55,,34.0,123.0,7.18,4.15


In [1035]:
combine.dtypes

season           int32
draft_year     float64
draft_team      object
draft_round    float64
draft_ovr      float64
pfr_id          object
cfb_id          object
player_name     object
pos             object
school          object
ht              object
wt             float64
forty          float64
bench          float64
vertical       float64
broad_jump     float64
cone           float64
shuttle        float64
dtype: object

In [1036]:
combine['ht'].unique()

array(['6-4', '6-0', '6-2', '5-10', '6-3', '5-9', '6-6', '6-1', '5-11',
       '6-5', '5-8', '6-7', '5-7', '6-8', '5-6', '6-9', '6-10', '5-5',
       None, '5-4'], dtype=object)

In [1037]:
# combine['ht'].unique()
# combine.loc[combine['ht'].isnull()]['ht'] = 0
combine.loc[combine['ht'].isnull(), 'ht'] = '0-0'

In [1038]:
# combine[['feet', 'inches']] = combine['ht'].str.split('-', expand = True).astype(int)
combine[['feet', 'inches']] = combine['ht'].str.split('-', expand = True).astype(int)
combine['ht'] = combine['feet'] * 12 + combine['inches']
combine.drop(['feet', 'inches'], axis = 1, inplace = True)
combine.head()

Unnamed: 0,season,draft_year,draft_team,draft_round,draft_ovr,pfr_id,cfb_id,player_name,pos,school,ht,wt,forty,bench,vertical,broad_jump,cone,shuttle
0,2000,2000.0,New York Jets,1.0,13.0,AbraJo00,,John Abraham,OLB,South Carolina,76,252.0,4.55,,,,,
1,2000,2000.0,Seattle Seahawks,1.0,19.0,AlexSh00,shaun-alexander-1,Shaun Alexander,RB,Alabama,72,218.0,4.58,,,,,
2,2000,2000.0,Kansas City Chiefs,6.0,188.0,AlfoDa20,,Darnell Alford,OT,Boston Col.,76,334.0,5.56,23.0,25.0,94.0,8.48,4.98
3,2000,,,,,,,Kyle Allamon,TE,Texas Tech,74,253.0,4.97,,29.0,104.0,7.29,4.49
4,2000,2000.0,Carolina Panthers,1.0,23.0,AndeRa21,,Rashard Anderson,CB,Jackson State,74,206.0,4.55,,34.0,123.0,7.18,4.15


In [1039]:
combine = combine[['player_name', 'ht', 'wt', 'forty', 'bench', 'vertical', 'broad_jump', 'cone', 'shuttle']]

In [1040]:
aggregate = aggregate.merge(combine, how = 'left', on = 'player_name')

In [1041]:
aggregate.head()

Unnamed: 0,player_name,player_position,season_year,reception,yards,att_yards,yards_after_catch,yards_after_contact,broken_tackles,catchable,touchdown,difficult_attempt,difficult_catch,weather_attempt,weather_catch,qb_bf_attempt,qb_bf_catch,clutch_catch,conversion_catch,redzone_catch,cross_attempt,cross_catch,curl_attempt,curl_catch,post_attempt,post_catch,underneath_screen_attempt,underneath_screen_catch,flat_attempt,flat_catch,slant_attempt,slant_catch,wr_screen_attempt,wr_screen_catch,comeback_attempt,comeback_catch,go_attempt,go_catch,in_attempt,in_catch,deep_attempt,deep_catch,large_yac_catch,play_action_attempt,play_action_catch,rpo_attempt,rpo_catch,tackle_breaker_catch,beast_catch,hurry_up_attempt,hurry_up_catch,deep_sideline_attempt,deep_sideline_catch,dpi_drawn,possession_saver_attempt,possession_saver_catch,adot,avg_yac,avg_yacon,cross_pct,curl_pct,post_pct,underneath_screen_pct,flat_pct,slant_pct,wr_screen_pct,comeback_pct,go_pct,in_pct,deep_pct,play_action_pct,rpo_pct,hurry_up_pct,deep_sideline_pct,possession_saver_pct,player_game_count,targets,receptions,contested_receptions,contested_targets,contested_catch_rate,drop_rate,drops,fumbles,grades_pass_block,pass_block_rate,inline_snaps,inline_rate,pass_blocks,pass_plays,routes,route_rate,slot_snaps,slot_rate,wide_snaps,wide_rate,catch_rate,yprr,ht,wt,forty,bench,vertical,broad_jump,cone,shuttle
0,Justin Jefferson,WR,2022.0,130.0,1809.0,1858.0,624.0,132.0,6.0,6.0,8.0,48,1,130,86,66,40,8,27,6,5,4,18,11,12,10,0,0,16,14,12,10,12,11,18,9,21,8,14,12,27,16,22,44,31,4,2,6,1,33,23,20,11,4,106,64,9.43,4.8,2.28,0.8,0.61,0.83,,0.88,0.83,0.92,0.5,0.38,0.86,0.59,0.7,0.5,0.7,0.55,0.6,17.0,176.0,128.0,22.0,39.0,0.56,5.2,7.0,0.0,,0.0,0.0,0.0,0.0,736.0,690.0,0.94,218.0,0.3,511.0,0.69,0.73,2.62,73.0,202.0,4.43,,37.5,126.0,,
1,Tyreek Hill,WR,2022.0,124.0,1710.0,2107.0,482.0,58.0,5.0,8.0,7.0,38,0,132,88,46,33,3,22,2,16,13,18,15,19,8,0,0,7,7,18,13,14,12,17,16,23,12,9,7,38,20,17,69,50,25,15,4,0,5,5,15,10,4,111,71,11.51,3.89,0.95,0.81,0.83,0.42,,1.0,0.72,0.86,0.94,0.52,0.78,0.53,0.72,0.6,1.0,0.67,0.64,17.0,167.0,119.0,13.0,25.0,0.52,3.3,4.0,1.0,,0.0,7.0,0.01,0.0,568.0,534.0,0.94,239.0,0.42,309.0,0.54,0.71,3.2,,,,,,,,
2,Davante Adams,WR,2022.0,104.0,1516.0,2129.0,493.0,95.0,9.0,7.0,14.0,64,0,91,52,68,31,4,23,4,6,6,21,12,15,7,0,0,8,8,14,6,7,6,17,6,35,14,20,16,36,14,16,36,23,8,6,8,1,9,5,20,8,4,100,45,11.26,4.74,1.06,1.0,0.57,0.47,,1.0,0.43,0.86,0.35,0.4,0.8,0.39,0.64,0.75,0.56,0.4,0.45,17.0,168.0,100.0,15.0,34.0,0.44,5.7,6.0,1.0,66.1,0.0,0.0,0.0,1.0,657.0,618.0,0.94,197.0,0.3,457.0,0.7,0.6,2.45,73.0,212.0,4.56,14.0,39.5,123.0,6.82,4.3
3,A.J. Brown,WR,2022.0,94.0,1496.0,1754.0,548.0,192.0,7.0,6.0,11.0,45,0,75,49,56,30,2,17,4,4,4,16,10,2,1,1,1,6,5,35,26,6,5,13,6,22,9,14,8,28,13,18,47,27,43,32,7,4,24,12,21,11,1,84,44,11.24,5.83,3.92,1.0,0.62,0.5,1.0,0.83,0.74,0.83,0.46,0.41,0.57,0.46,0.57,0.74,0.5,0.52,0.52,17.0,137.0,88.0,15.0,30.0,0.5,6.4,6.0,2.0,,0.0,0.0,0.0,0.0,611.0,578.0,0.95,157.0,0.26,453.0,0.74,0.64,2.59,72.0,226.0,4.49,19.0,36.5,120.0,,
4,Stefon Diggs,WR,2022.0,111.0,1429.0,1729.0,419.0,100.0,2.0,8.0,11.0,36,1,126,81,58,35,5,17,7,5,3,20,17,12,6,2,2,15,13,22,18,9,9,14,8,16,9,10,6,23,12,12,46,37,26,20,2,2,16,12,14,8,3,87,55,10.42,3.77,2.56,0.6,0.85,0.5,1.0,0.87,0.82,1.0,0.57,0.56,0.6,0.52,0.8,0.77,0.75,0.57,0.63,16.0,149.0,108.0,12.0,24.0,0.5,7.7,9.0,1.0,,0.0,0.0,0.0,0.0,607.0,573.0,0.94,207.0,0.34,400.0,0.66,0.72,2.49,72.0,195.0,4.46,,35.0,115.0,7.03,4.32


## Next Gen Stats

In [1042]:
wr_ng = nfl.import_ngs_data(stat_type = 'receiving')
wr_ng.head()

Unnamed: 0,season,season_type,week,player_display_name,player_position,team_abbr,avg_cushion,avg_separation,avg_intended_air_yards,percent_share_of_intended_air_yards,receptions,targets,catch_percentage,yards,rec_touchdowns,avg_yac,avg_expected_yac,avg_yac_above_expectation,player_gsis_id,player_first_name,player_last_name,player_jersey_number,player_short_name
0,2016,REG,0,Tyreek Hill,WR,KC,7.82,3.52,7.87,16.71,61,83,73.49,599.0,6,4.67,5.33,-0.66,00-0033040,Tyreek,Hill,10,T.Hill
1,2016,REG,0,Richard Rodgers,TE,GB,7.61,2.69,8.38,6.88,30,47,63.83,271.0,2,3.35,3.27,0.07,00-0031384,Richard,Rodgers,82,
2,2016,REG,0,Travis Benjamin,WR,LAC,7.6,3.02,13.33,19.54,47,75,62.67,677.0,4,5.68,4.97,0.71,00-0029269,Travis,Benjamin,12,
3,2016,REG,0,Cordarrelle Patterson,WR,MIN,7.47,3.23,5.56,9.53,52,70,74.29,453.0,2,6.28,5.62,0.66,00-0030578,Cordarrelle,Patterson,84,C.Patterson
4,2016,REG,0,DeSean Jackson,WR,WAS,7.3,2.89,16.04,28.09,56,100,56.0,1005.0,4,4.87,5.05,-0.18,00-0026189,DeSean,Jackson,11,D.Jackson


In [1043]:
wr_ng.dtypes

season                                   int32
season_type                             object
week                                     int32
player_display_name                     object
player_position                         object
team_abbr                               object
avg_cushion                            float64
avg_separation                         float64
avg_intended_air_yards                 float64
percent_share_of_intended_air_yards    float64
receptions                               int32
targets                                  int32
catch_percentage                       float64
yards                                  float64
rec_touchdowns                           int32
avg_yac                                float64
avg_expected_yac                       float64
avg_yac_above_expectation              float64
player_gsis_id                          object
player_first_name                       object
player_last_name                        object
player_jersey

In [1044]:
wr_ng = wr_ng[(wr_ng['season'] == 2022) & (wr_ng['season_type'] == 'REG') & (wr_ng['week'] == 0)][['player_display_name', 'avg_cushion', 'avg_separation']]

In [1045]:
wr_ng.shape

(122, 3)

In [1046]:
ng_names = wr_ng[['player_display_name']]
ng_names.to_csv('/mnt/c/Data_Science/Personal_Projects/nfl_wr_knn/ng_names.csv', index=False)

In [1047]:
wr_ng.loc[wr_ng['player_display_name'] == 'D.J. Moore', 'player_display_name'] = 'DJ Moore'
wr_ng.loc[wr_ng['player_display_name'] == 'Michael Pittman', 'player_display_name'] = 'Michael Pittman Jr.'
wr_ng.loc[wr_ng['player_display_name'] == 'Josh Palmer', 'player_display_name'] = 'Joshua Palmer'
wr_ng.loc[wr_ng['player_display_name'] == 'Marvin Jones', 'player_display_name'] = 'Marvin Jones Jr.'
wr_ng.loc[wr_ng['player_display_name'] == 'D.J. Chark', 'player_display_name'] = 'DJ Chark Jr.'
wr_ng.loc[wr_ng['player_display_name'] == 'Terrace Marshall', 'player_display_name'] = 'Terrace Marshall Jr.'
wr_ng.loc[wr_ng['player_display_name'] == 'Allen Robinson', 'player_display_name'] = 'Allen Robinson II'

In [1048]:
# wr_ng['player_display_name']
wr_ng.rename(columns={'player_display_name': 'player_name'}, inplace = True)

In [1049]:
aggregate = aggregate.merge(wr_ng, how = 'left', on = 'player_name')

## Column re-ordering

In [1050]:
aggregate = aggregate[['player_name', 'player_position', 'season_year', 'player_game_count', 'receptions', 'targets', 'catch_rate', 'yards', 'att_yards', 'adot', 'yards_after_catch', 'avg_yac', 'yards_after_contact', 'avg_yacon', 'touchdown', 'drops', 'drop_rate', 'fumbles', 
                       'broken_tackles', 'grades_pass_block', 'pass_blocks', 'pass_block_rate', 'routes', 'route_rate', 'pass_plays', 'yprr', 'avg_cushion', 'avg_separation', 'contested_receptions', 'contested_targets', 'contested_catch_rate', 'weather_attempt',
                       'weather_catch', 'qb_bf_attempt', 'qb_bf_catch', 'hurry_up_attempt', 'hurry_up_catch', 'hurry_up_pct', 'possession_saver_attempt', 'possession_saver_catch', 'possession_saver_pct', 'clutch_catch', 'conversion_catch', 'redzone_catch', 'dpi_drawn', 
                       'deep_attempt', 'deep_catch', 'deep_pct', 'deep_sideline_attempt', 'deep_sideline_catch', 'deep_sideline_pct', 'large_yac_catch', 'tackle_breaker_catch', 'beast_catch', 'play_action_attempt', 'play_action_catch', 'play_action_pct', 'rpo_attempt',
                       'rpo_catch', 'rpo_pct', 'cross_attempt', 'cross_catch', 'cross_pct', 'curl_attempt', 'curl_catch', 'curl_pct', 'post_attempt', 'post_catch', 'post_pct', 'underneath_screen_attempt', 'underneath_screen_catch', 'underneath_screen_pct', 
                       'flat_attempt', 'flat_catch', 'flat_pct', 'slant_attempt', 'slant_catch', 'slant_pct', 'wr_screen_attempt', 'wr_screen_catch', 'wr_screen_pct', 'comeback_attempt', 'comeback_catch', 'comeback_pct', 'go_attempt', 'go_catch', 'go_pct', 'in_attempt', 
                       'in_catch', 'in_pct', 'inline_snaps', 'inline_rate', 'slot_snaps', 'slot_rate', 'wide_snaps', 'wide_rate', 'ht', 'wt', 'forty', 'bench', 'vertical', 'broad_jump', 'cone', 'shuttle']]

In [1051]:
aggregate.head()

Unnamed: 0,player_name,player_position,season_year,player_game_count,receptions,targets,catch_rate,yards,att_yards,adot,yards_after_catch,avg_yac,yards_after_contact,avg_yacon,touchdown,drops,drop_rate,fumbles,broken_tackles,grades_pass_block,pass_blocks,pass_block_rate,routes,route_rate,pass_plays,yprr,avg_cushion,avg_separation,contested_receptions,contested_targets,contested_catch_rate,weather_attempt,weather_catch,qb_bf_attempt,qb_bf_catch,hurry_up_attempt,hurry_up_catch,hurry_up_pct,possession_saver_attempt,possession_saver_catch,possession_saver_pct,clutch_catch,conversion_catch,redzone_catch,dpi_drawn,deep_attempt,deep_catch,deep_pct,deep_sideline_attempt,deep_sideline_catch,deep_sideline_pct,large_yac_catch,tackle_breaker_catch,beast_catch,play_action_attempt,play_action_catch,play_action_pct,rpo_attempt,rpo_catch,rpo_pct,cross_attempt,cross_catch,cross_pct,curl_attempt,curl_catch,curl_pct,post_attempt,post_catch,post_pct,underneath_screen_attempt,underneath_screen_catch,underneath_screen_pct,flat_attempt,flat_catch,flat_pct,slant_attempt,slant_catch,slant_pct,wr_screen_attempt,wr_screen_catch,wr_screen_pct,comeback_attempt,comeback_catch,comeback_pct,go_attempt,go_catch,go_pct,in_attempt,in_catch,in_pct,inline_snaps,inline_rate,slot_snaps,slot_rate,wide_snaps,wide_rate,ht,wt,forty,bench,vertical,broad_jump,cone,shuttle
0,Justin Jefferson,WR,2022.0,17.0,128.0,176.0,0.73,1809.0,1858.0,9.43,624.0,4.8,132.0,2.28,8.0,7.0,5.2,0.0,6.0,,0.0,0.0,690.0,0.94,736.0,2.62,5.43,3.09,22.0,39.0,0.56,130,86,66,40,33,23,0.7,106,64,0.6,8,27,6,4,27,16,0.59,20,11,0.55,22,6,1,44,31,0.7,4,2,0.5,5,4,0.8,18,11,0.61,12,10,0.83,0,0,,16,14,0.88,12,10,0.83,12,11,0.92,18,9,0.5,21,8,0.38,14,12,0.86,0.0,0.0,218.0,0.3,511.0,0.69,73.0,202.0,4.43,,37.5,126.0,,
1,Tyreek Hill,WR,2022.0,17.0,119.0,167.0,0.71,1710.0,2107.0,11.51,482.0,3.89,58.0,0.95,7.0,4.0,3.3,1.0,5.0,,0.0,0.0,534.0,0.94,568.0,3.2,6.38,3.31,13.0,25.0,0.52,132,88,46,33,5,5,1.0,111,71,0.64,3,22,2,4,38,20,0.53,15,10,0.67,17,4,0,69,50,0.72,25,15,0.6,16,13,0.81,18,15,0.83,19,8,0.42,0,0,,7,7,1.0,18,13,0.72,14,12,0.86,17,16,0.94,23,12,0.52,9,7,0.78,7.0,0.01,239.0,0.42,309.0,0.54,,,,,,,,
2,Davante Adams,WR,2022.0,17.0,100.0,168.0,0.6,1516.0,2129.0,11.26,493.0,4.74,95.0,1.06,14.0,6.0,5.7,1.0,9.0,66.1,1.0,0.0,618.0,0.94,657.0,2.45,5.55,2.95,15.0,34.0,0.44,91,52,68,31,9,5,0.56,100,45,0.45,4,23,4,4,36,14,0.39,20,8,0.4,16,8,1,36,23,0.64,8,6,0.75,6,6,1.0,21,12,0.57,15,7,0.47,0,0,,8,8,1.0,14,6,0.43,7,6,0.86,17,6,0.35,35,14,0.4,20,16,0.8,0.0,0.0,197.0,0.3,457.0,0.7,73.0,212.0,4.56,14.0,39.5,123.0,6.82,4.3
3,A.J. Brown,WR,2022.0,17.0,88.0,137.0,0.64,1496.0,1754.0,11.24,548.0,5.83,192.0,3.92,11.0,6.0,6.4,2.0,7.0,,0.0,0.0,578.0,0.95,611.0,2.59,5.58,2.6,15.0,30.0,0.5,75,49,56,30,24,12,0.5,84,44,0.52,2,17,4,1,28,13,0.46,21,11,0.52,18,7,4,47,27,0.57,43,32,0.74,4,4,1.0,16,10,0.62,2,1,0.5,1,1,1.0,6,5,0.83,35,26,0.74,6,5,0.83,13,6,0.46,22,9,0.41,14,8,0.57,0.0,0.0,157.0,0.26,453.0,0.74,72.0,226.0,4.49,19.0,36.5,120.0,,
4,Stefon Diggs,WR,2022.0,16.0,108.0,149.0,0.72,1429.0,1729.0,10.42,419.0,3.77,100.0,2.56,11.0,9.0,7.7,1.0,2.0,,0.0,0.0,573.0,0.94,607.0,2.49,5.36,2.83,12.0,24.0,0.5,126,81,58,35,16,12,0.75,87,55,0.63,5,17,7,3,23,12,0.52,14,8,0.57,12,2,2,46,37,0.8,26,20,0.77,5,3,0.6,20,17,0.85,12,6,0.5,2,2,1.0,15,13,0.87,22,18,0.82,9,9,1.0,14,8,0.57,16,9,0.56,10,6,0.6,0.0,0.0,207.0,0.34,400.0,0.66,72.0,195.0,4.46,,35.0,115.0,7.03,4.32


In [1052]:
aggregate.shape

(522, 104)

#### Remove non-WRs
- Although some running backs (e.e. Christian McCaffrey) and tight ends (e.g. Travis Kelce) are noteworthy pass-catchers, they are not officially wide receivers and are thus used differently. They should be excluded from this analysis.

In [1053]:
aggregate = aggregate[aggregate['player_position'] == 'WR']
aggregate.shape

(231, 104)

In [1054]:
# aggregate = aggregate[aggregate['targets'] >= 10]
aggregate.shape

(231, 104)

In [1055]:
# Find nulls
columns = aggregate.columns
field = [var for var in columns if aggregate[var].isnull().sum() != 0]
print(aggregate[field].isnull().sum())

avg_yac                    7
avg_yacon                 24
drop_rate                  6
grades_pass_block        160
avg_cushion              134
avg_separation           134
contested_catch_rate      36
hurry_up_pct              48
possession_saver_pct      17
deep_pct                  37
deep_sideline_pct         53
play_action_pct           41
rpo_pct                   63
cross_pct                 77
curl_pct                  38
post_pct                  76
underneath_screen_pct    178
flat_pct                  76
slant_pct                 56
wr_screen_pct             80
comeback_pct              73
go_pct                    42
in_pct                    64
ht                        67
wt                        69
forty                     85
bench                    128
vertical                  90
broad_jump                92
cone                     132
shuttle                  129
dtype: int64


In [1056]:
# aggregate[aggregate['play_action_pct'].isnull()]

### Integer, float, and percentage conversions

In [1058]:
int_columns = ['season_year','targets','receptions','yards','att_yards','yards_after_catch','yards_after_contact','broken_tackles', 'touchdown', 
               'weather_attempt','weather_catch','qb_bf_attempt','qb_bf_catch','clutch_catch','conversion_catch','redzone_catch','cross_attempt','cross_catch',
               'curl_attempt','curl_catch','post_attempt','post_catch','underneath_screen_attempt','underneath_screen_catch','flat_attempt','flat_catch','slant_attempt','slant_catch',
               'wr_screen_attempt','wr_screen_catch','comeback_attempt','comeback_catch','go_attempt','go_catch','in_attempt','in_catch','deep_attempt','deep_catch','large_yac_catch',
               'play_action_attempt','play_action_catch','rpo_attempt','rpo_catch','tackle_breaker_catch','beast_catch','hurry_up_attempt','hurry_up_catch','deep_sideline_attempt',
               'deep_sideline_catch','dpi_drawn','possession_saver_attempt','possession_saver_catch','player_game_count','receptions','contested_receptions','contested_targets','drops',
               'fumbles','inline_snaps','pass_blocks','pass_plays','routes','slot_snaps','wide_snaps', 'ht', 'wt']

for col in int_columns:
    aggregate[col] = aggregate[col].fillna(0)
    aggregate[col] = aggregate[col].astype(int)

In [1059]:
float_columns = ['adot', 'avg_yac', 'avg_yacon', 'cross_pct', 'curl_pct', 'post_pct', 'underneath_screen_pct', 'flat_pct', 'slant_pct', 'wr_screen_pct', 'comeback_pct', 'go_pct', 'in_pct', 'deep_pct', 
                 'play_action_pct', 'rpo_pct', 'hurry_up_pct', 'deep_sideline_pct', 'possession_saver_pct', 'contested_catch_rate', 'grades_pass_block', 'pass_block_rate', 'inline_rate', 'route_rate', 'slot_rate', 'wide_rate',
                'forty', 'bench', 'vertical', 'broad_jump', 'cone', 'shuttle', 'avg_cushion', 'avg_separation']

for col in float_columns:
    aggregate[col] = aggregate[col].fillna(0)
    aggregate[col] = aggregate[col].astype(float)

In [1060]:
pct_columns = ['catch_rate', 'cross_pct', 'curl_pct', 'post_pct', 'underneath_screen_pct', 'flat_pct', 'slant_pct', 'wr_screen_pct', 'comeback_pct', 'go_pct', 'in_pct', 'deep_pct', 'play_action_pct', 
               'rpo_pct', 'hurry_up_pct', 'deep_sideline_pct', 'possession_saver_pct', 'contested_catch_rate', 'drop_rate', 'pass_block_rate', 'inline_rate', 'route_rate', 'slot_rate', 'wide_rate']

for col in pct_columns:
    aggregate[col] = aggregate[col] * 100

In [1061]:
aggregate.head()

Unnamed: 0,player_name,player_position,season_year,player_game_count,receptions,targets,catch_rate,yards,att_yards,adot,yards_after_catch,avg_yac,yards_after_contact,avg_yacon,touchdown,drops,drop_rate,fumbles,broken_tackles,grades_pass_block,pass_blocks,pass_block_rate,routes,route_rate,pass_plays,yprr,avg_cushion,avg_separation,contested_receptions,contested_targets,contested_catch_rate,weather_attempt,weather_catch,qb_bf_attempt,qb_bf_catch,hurry_up_attempt,hurry_up_catch,hurry_up_pct,possession_saver_attempt,possession_saver_catch,possession_saver_pct,clutch_catch,conversion_catch,redzone_catch,dpi_drawn,deep_attempt,deep_catch,deep_pct,deep_sideline_attempt,deep_sideline_catch,deep_sideline_pct,large_yac_catch,tackle_breaker_catch,beast_catch,play_action_attempt,play_action_catch,play_action_pct,rpo_attempt,rpo_catch,rpo_pct,cross_attempt,cross_catch,cross_pct,curl_attempt,curl_catch,curl_pct,post_attempt,post_catch,post_pct,underneath_screen_attempt,underneath_screen_catch,underneath_screen_pct,flat_attempt,flat_catch,flat_pct,slant_attempt,slant_catch,slant_pct,wr_screen_attempt,wr_screen_catch,wr_screen_pct,comeback_attempt,comeback_catch,comeback_pct,go_attempt,go_catch,go_pct,in_attempt,in_catch,in_pct,inline_snaps,inline_rate,slot_snaps,slot_rate,wide_snaps,wide_rate,ht,wt,forty,bench,vertical,broad_jump,cone,shuttle
0,Justin Jefferson,WR,2022,17,128,176,72.73,1809,1858,9.43,624,4.8,132,2.28,8,7,520.0,0,6,0.0,0,0.0,690,93.75,736,2.62,5.43,3.09,22,39,56.41,130,86,66,40,33,23,69.7,106,64,60.38,8,27,6,4,27,16,59.26,20,11,55.0,22,6,1,44,31,70.45,4,2,50.0,5,4,80.0,18,11,61.11,12,10,83.33,0,0,0.0,16,14,87.5,12,10,83.33,12,11,91.67,18,9,50.0,21,8,38.1,14,12,85.71,0,0.0,218,29.62,511,69.43,73,202,4.43,0.0,37.5,126.0,0.0,0.0
1,Tyreek Hill,WR,2022,17,119,167,71.26,1710,2107,11.51,482,3.89,58,0.95,7,4,330.0,1,5,0.0,0,0.0,534,94.01,568,3.2,6.38,3.31,13,25,52.0,132,88,46,33,5,5,100.0,111,71,63.96,3,22,2,4,38,20,52.63,15,10,66.67,17,4,0,69,50,72.46,25,15,60.0,16,13,81.25,18,15,83.33,19,8,42.11,0,0,0.0,7,7,100.0,18,13,72.22,14,12,85.71,17,16,94.12,23,12,52.17,9,7,77.78,7,1.23,239,42.08,309,54.4,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,Davante Adams,WR,2022,17,100,168,59.52,1516,2129,11.26,493,4.74,95,1.06,14,6,570.0,1,9,66.1,1,0.15,618,94.06,657,2.45,5.55,2.95,15,34,44.12,91,52,68,31,9,5,55.56,100,45,45.0,4,23,4,4,36,14,38.89,20,8,40.0,16,8,1,36,23,63.89,8,6,75.0,6,6,100.0,21,12,57.14,15,7,46.67,0,0,0.0,8,8,100.0,14,6,42.86,7,6,85.71,17,6,35.29,35,14,40.0,20,16,80.0,0,0.0,197,29.98,457,69.56,73,212,4.56,14.0,39.5,123.0,6.82,4.3
3,A.J. Brown,WR,2022,17,88,137,64.23,1496,1754,11.24,548,5.83,192,3.92,11,6,640.0,2,7,0.0,0,0.0,578,94.6,611,2.59,5.58,2.6,15,30,50.0,75,49,56,30,24,12,50.0,84,44,52.38,2,17,4,1,28,13,46.43,21,11,52.38,18,7,4,47,27,57.45,43,32,74.42,4,4,100.0,16,10,62.5,2,1,50.0,1,1,100.0,6,5,83.33,35,26,74.29,6,5,83.33,13,6,46.15,22,9,40.91,14,8,57.14,0,0.0,157,25.7,453,74.14,72,226,4.49,19.0,36.5,120.0,0.0,0.0
4,Stefon Diggs,WR,2022,16,108,149,72.48,1429,1729,10.42,419,3.77,100,2.56,11,9,770.0,1,2,0.0,0,0.0,573,94.4,607,2.49,5.36,2.83,12,24,50.0,126,81,58,35,16,12,75.0,87,55,63.22,5,17,7,3,23,12,52.17,14,8,57.14,12,2,2,46,37,80.43,26,20,76.92,5,3,60.0,20,17,85.0,12,6,50.0,2,2,100.0,15,13,86.67,22,18,81.82,9,9,100.0,14,8,57.14,16,9,56.25,10,6,60.0,0,0.0,207,34.1,400,65.9,72,195,4.46,0.0,35.0,115.0,7.03,4.32


In [1062]:
aggregate = aggregate[~aggregate['drop_rate'].isnull()]

In [1063]:
aggregate.shape

(225, 104)

In [1064]:
# Find nulls
columns = aggregate.columns
field = [var for var in columns if aggregate[var].isnull().sum() != 0]
print(aggregate[field].isnull().sum())

Series([], dtype: float64)


In [1065]:
aggregate.columns

Index(['player_name', 'player_position', 'season_year', 'player_game_count',
       'receptions', 'targets', 'catch_rate', 'yards', 'att_yards', 'adot',
       'yards_after_catch', 'avg_yac', 'yards_after_contact', 'avg_yacon',
       'touchdown', 'drops', 'drop_rate', 'fumbles', 'broken_tackles',
       'grades_pass_block', 'pass_blocks', 'pass_block_rate', 'routes',
       'route_rate', 'pass_plays', 'yprr', 'avg_cushion', 'avg_separation',
       'contested_receptions', 'contested_targets', 'contested_catch_rate',
       'weather_attempt', 'weather_catch', 'qb_bf_attempt', 'qb_bf_catch',
       'hurry_up_attempt', 'hurry_up_catch', 'hurry_up_pct',
       'possession_saver_attempt', 'possession_saver_catch',
       'possession_saver_pct', 'clutch_catch', 'conversion_catch',
       'redzone_catch', 'dpi_drawn', 'deep_attempt', 'deep_catch', 'deep_pct',
       'deep_sideline_attempt', 'deep_sideline_catch', 'deep_sideline_pct',
       'large_yac_catch', 'tackle_breaker_catch', 'beast_c

# OBSERVATIONS
- No PBP data captures which receiver was held on a defensive holding penalty
- Seems like we can capture DPI
- Yardline needs to be fixed
  - Make a separate column based on the possession team and the side of the field, indicated by the location fields
  - Add 50 yards if the possession team alias equals the location team alias
- YFD equals yards from the goal line if `goaltogo == 1`

# PFF DATA
- Receiving grades
  - PASS: pass play snaps
  - RECV: snaps when route was run
  - FUM: fumbles
  - CTC: contested catches
  - CTT: contested targets
  - PBLK: pass blocking grade
  - SLT: snaps from the slot
  - WID: snaps from out wide
  - INL: snaps from inline
  - ADOT: average depth of target
  - DRP: drops
  - Receptions
 
- Receiving by coverage - Man vs Zone
  - Drops
  - CTC
  - CTT
  - Yards
  - TGT
  - REC
  - YDS
  - ADOT

# NFL COMBINE DATA
- player_name
- pos
- ht
- wt
- forty
- bench
- vertical
- broad_jump
- cone
- shuttle

# NFL NEXT GEN DATA
- season
- season_type
- week
  - Should be 0 for entire season
- player_display_name
- avg_cushion
- avg_separation
- avg_expected_yac

Calculate separately
- Average YAC
- % share of air yards