In [1]:
import pandas as pd


# NFL Data Breakdown

## 1st and Future - Turf

#### Database key info: 

- Injury Record:__PlayerKey__GameID__PlayKey
- PlayFile:________PlayerKey__GameID__PlayKey
- PlayerTrack:___________________________PlayKey

In [2]:
injuries = pd.read_csv("../NFL_Turf/InjuryRecord.csv")  # 105 rows
injuries.head()

Unnamed: 0,PlayerKey,GameID,PlayKey,BodyPart,Surface,DM_M1,DM_M7,DM_M28,DM_M42
0,39873,39873-4,39873-4-32,Knee,Synthetic,1,1,1,1
1,46074,46074-7,46074-7-26,Knee,Natural,1,1,0,0
2,36557,36557-1,36557-1-70,Ankle,Synthetic,1,1,1,1
3,46646,46646-3,46646-3-30,Ankle,Natural,1,0,0,0
4,43532,43532-5,43532-5-69,Ankle,Synthetic,1,1,1,1


#### injuries

- PlayerKey = Unique player ID
- GameID = Unique ID for that player's games (not in temporal order)
- PlayKey = Unique ID for that player's plays within a game (in sequential order)
- BodyPart = identifies injured body part
- Surface = playing surface at time of injury
- DM_M1 = one or more days missed due to injury
- DM_M7 = 7 or more days missed due to injury
- DM_M28 = 28 or more days due to injury
- DM_M42 = 42 or more days due to injury

In [4]:
players = pd.read_csv('../NFL_Turf/PlayerTrackData.csv')  # 76 million rows, this took 1.1 min to load
players.head()


Unnamed: 0,PlayKey,time,event,x,y,dir,dis,o,s
0,26624-1-1,0.0,huddle_start_offense,87.46,28.93,288.24,0.01,262.33,0.13
1,26624-1-1,0.1,,87.45,28.92,283.91,0.01,261.69,0.12
2,26624-1-1,0.2,,87.44,28.92,280.4,0.01,261.17,0.12
3,26624-1-1,0.3,,87.44,28.92,278.79,0.01,260.66,0.1
4,26624-1-1,0.4,,87.44,28.92,275.44,0.01,260.27,0.09


#### players

- PlayKey = index for player and game 
- time = in seconds since the start of the NGS track for the play = time index for player track
- event = play details as a function of time during the play
- x = player position along the long axis of the field in yards over time (0 - 120), so 50 yd line is at 60 and the Home Endzone is at 0
- y = player position along the short axis of the field in yards over time (0-53.3 yards)
- dir = direction - angle of player motion in degrees (0-360, where 0 degrees points to the visitor sideline, or up in the y-direction)
- dis = distance traveled from prior time point over the time index in yards
- o = orientation - the angle that the player is facing in degrees, same orientation as dir
- s = estimated speed at that particular point in time over the time index in yards per second

In [5]:
plays = pd.read_csv('../NFL_Turf/PlayList.csv') # 267,000 rows
plays.head()

Unnamed: 0,PlayerKey,GameID,PlayKey,RosterPosition,PlayerDay,PlayerGame,StadiumType,FieldType,Temperature,Weather,PlayType,PlayerGamePlay,Position,PositionGroup
0,26624,26624-1,26624-1-1,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,1,QB,QB
1,26624,26624-1,26624-1-2,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,2,QB,QB
2,26624,26624-1,26624-1-3,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Rush,3,QB,QB
3,26624,26624-1,26624-1-4,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Rush,4,QB,QB
4,26624,26624-1,26624-1-5,Quarterback,1,1,Outdoor,Synthetic,63,Clear and warm,Pass,5,QB,QB


#### plays

- PlayerKey (XXXX) = unique ID for player
- GameID (PlayerKey-X) = uniquely identifies player-game, not in temporal order
- PlayKey (PlayerKey-GameID-X) = uniquely identifies a player's plays within that game (in sequential order within the game)
- Roster position - player's position
- PlayerDay = int seuqnece that reflects the timeline of player's participation in games; this can be used to sequence player's participation
- PlayerGame = uniquely identifies player's games; matches last int of the GameID (not in order)
- StatiumType = open, closed, dome, etc.
- FieldType = Natural or Synthetic
- Temperature = on-field temp at the start of the game (not always provided as Domes and indoor are temp-controlled)
- Weather = description of weather (not always provided for domes)
- PlayType = pass, run, kickoff
- PlayerGamePlay = ordered index (int) denoting the running count of plays the player has participated in the game
- Position = categorical var denoting the player's position for that play - not always same as roster position
- PositionGroup = categorical var denoting the player's position group for the position held during the play

--- 

## Punt Analytics

#### Database key info: 

- Game Data:_________GameKey
- Play Information:____GameKey__PlayID
- Player Punt Data:_________________________GSISID
- Play Player Role:_____GameKey__PlayID___GSISID
- Video Review:________GameKey__PlayID___GSISID
- NGS:_________________GameKey___PlayID__GSISID

In [6]:
games = pd.read_csv("../NFL_Punt/game_data.csv") # 666 rows
games.head()

Unnamed: 0,GameKey,Season_Year,Season_Type,Week,Game_Date,Game_Day,Game_Site,Start_Time,Home_Team,HomeTeamCode,Visit_Team,VisitTeamCode,Stadium,StadiumType,Turf,GameWeather,Temperature,OutdoorWeather
0,1,2016,Pre,1,2016-08-07 00:00:00.000,Sunday,Indianapolis,20:00,Indianapolis Colts,IND,Green Bay Packers,GB,Tom Benson Hall of Fame Stadium,Outdoor,Turf,,,
1,2,2016,Pre,2,2016-08-13 00:00:00.000,Saturday,Los Angeles,17:00,Los Angeles Rams,LA,Dallas Cowboys,DAL,Los Angeles Memorial Coliseum,Outdoor,Grass,Sunny,79.0,Sunny
2,3,2016,Pre,2,2016-08-11 00:00:00.000,Thursday,Baltimore,19:30,Baltimore Ravens,BLT,Carolina Panthers,CAR,M&T Bank Stadium,Outdoor,Natural Grass,Party Cloudy,94.0,Partly Cloudy
3,4,2016,Pre,2,2016-08-12 00:00:00.000,Friday,Green Bay,19:00,Green Bay Packers,GB,Cleveland Browns,CLV,Lambeau Field,Outdoor,DD GrassMaster,,73.0,
4,5,2016,Pre,2,2016-08-11 00:00:00.000,Thursday,Chicago,19:00,Chicago Bears,CHI,Denver Broncos,DEN,Soldier Field,Outdoor,Grass,"Partly Cloudy, Chance of Rain 80%",88.0,


#### games

- Season_Year: NFL Season (YYYY)
- Season_Type: Preseason (Pre), Regular season (Reg), Post season (Post)
- GameKey: Numeric game identifier, unique across seasons (####)
- Game_Date: Date of game (MM/DD/YYYY)
- Week: Week number, defined within Season_Type (##)
- Game_Day: Day of the week (text)
- Game_Site: Hosting city (text)
- Start_Time: Time at start of game (24:00:00)
- Home_Team: Long format of hosting team (text)
- HomeTeamCode Team abbreviation (ABC)
- Visit_Team: Long format of visiting team (text)
- VisitTeamCode: Team abbreviation (ABC)
- Stadium: Stadium Name (text)
- StadiumType: Indoor, outdoor or closure type - semi categorical (text)
- Turf: Natural or type of synthetic turf (text)
- GameWeather: Description of indoor weather, same as "OutdoorWeather" for open stadiums (text)
- Temperature: Stadium temperature (Degrees F)
- OutdoorWeather: Description of outdoor weather (free-form text)

In [7]:
play_info = pd.read_csv("../NFL_Punt/play_information.csv") # 6681 rows
play_info.head()

Unnamed: 0,Season_Year,Season_Type,GameKey,Game_Date,Week,PlayID,Game_Clock,YardLine,Quarter,Play_Type,Poss_Team,Home_Team_Visit_Team,Score_Home_Visiting,PlayDescription
0,2016,Pre,2,08/13/2016,2,191,12:30,LA 47,1,Punt,LA,LA-DAL,0 - 7,"(12:30) J.Hekker punts 52 yards to DAL 1, Cent..."
1,2016,Pre,2,08/13/2016,2,1132,12:08,LA 29,2,Punt,LA,LA-DAL,7 - 21,"(12:08) J.Hekker punts 51 yards to DAL 20, Cen..."
2,2016,Pre,2,08/13/2016,2,1227,10:01,DAL 18,2,Punt,DAL,LA-DAL,7 - 21,"(10:01) C.Jones punts 40 yards to LA 42, Cente..."
3,2016,Pre,2,08/13/2016,2,1864,00:21,DAL 46,2,Punt,LA,LA-DAL,7 - 24,"(:21) J.Hekker punts 31 yards to DAL 15, Cente..."
4,2016,Pre,2,08/13/2016,2,2247,10:26,DAL 15,3,Punt,DAL,LA-DAL,14 - 24,"(10:26) M.Wile punts 40 yards to LA 45, Center..."


#### play_info

- Season_Year: NFL Season (YYYY)
- Season_Type: Preseason (Pre), Regular Season (Reg), Post Season (Post)
- GameKey: Numeric game identifier, unique across seasons (#####)
- Game_Date: Date of game (MM/DD/YYYY)
- Week: Week number, week is defined within Season_Type (##)
- PlayID: Numeric play identifier, not unique across games, requires GameKey (####)
- Game_Clock: Game clock at start of play (mm:ss)
- YardLine: Play level line of srimmage (ABC - ##)
- Quarter: First quarter, Second quarter, Third quarter, Fourth quarter, Overtime (1, 2, 3, 4, 5)
- Play_Type: Rushing / run play (Rush), Passing play (Pass), Kickoff play (Kickoff), Punt play (Punt), Extra Point (Extra Point), Field Goal (Field Goal)
- Poss_Team : Identifies team with possession (Same as "Team")
- Home_Team_Visit_Team: Abbreviation for Home - Visiting Team, use with "Score_Home_Visiting" (ABC-ABC)
- Score_Home_Visiting: Current score for Home - Visiting Team, play level game score (##-##)
- PlayDescription: Description of play events, searchable for specific events (Free form)


In [8]:
punt = pd.read_csv('../NFL_Punt/player_punt_data.csv') # 3259 rows
punt.head()

Unnamed: 0,GSISID,Number,Position
0,32069,36,SS
1,30095,11,WR
2,31586,22,FS
3,29520,35,SS
4,30517,51,OLB


#### punt

- GSISID: Unique player identification, unique across seasons (#####)
- Position: Typical player position - not punt specific (ABC)
- Number: Player jersey number (##)

In [9]:
play_player = pd.read_csv('../NFL_Punt/play_player_role_data.csv') # 146,573 rows
play_player.head()

Unnamed: 0,Season_Year,GameKey,PlayID,GSISID,Role
0,2017,414,188,33704,PDL2
1,2017,414,1107,33704,PDL2
2,2017,424,1113,33704,PDR3
3,2017,424,1454,33704,PLR2
4,2017,424,644,33704,PRG


#### play_player

- Season_Year: NFL Season (YYYY)
- GameKey: Numeric game identifier, unique across seasons (#####)
- PlayID: Numeric play identifier, not unique across games, requires Gamekey(####)
- GSISID: Unique player identification, unique across seasons (#####)
- Role: Punt specific player information (see diagram in appendix) (ABC)

In [10]:
video_review = pd.read_csv('../NFL_Punt/video_review.csv') # 37 rows
video_review.head()

Unnamed: 0,Season_Year,GameKey,PlayID,GSISID,Player_Activity_Derived,Turnover_Related,Primary_Impact_Type,Primary_Partner_GSISID,Primary_Partner_Activity_Derived,Friendly_Fire
0,2016,5,3129,31057,Tackling,No,Helmet-to-body,32482,Tackled,No
1,2016,21,2587,29343,Blocked,No,Helmet-to-helmet,31059,Blocking,No
2,2016,29,538,31023,Tackling,No,Helmet-to-body,31941,Tackled,No
3,2016,45,1212,33121,Tackling,No,Helmet-to-body,28249,Tackled,No
4,2016,54,1045,32444,Blocked,No,Helmet-to-body,31756,Blocked,Yes


#### video_review

- Season_Year: NFL Season (####)
- GameKey: Numeric game identifier, unique across seasons (#####)
- PlayID: Numeric play identifier, not unique across games, requires GameKey (####)
- GSISID: Unique player identification, unique across seasons (#####)
- Player_Activity_Derived: Player activity during primary injury causing event (6 unique values)
- Turnover_Related: Identifies concussions that were related to a turnover during the play (3 unique values)
- Primary_Partner_GSISID: Unique player identification, impacting player involved with primary helmet impact (not applicable for helmet to ground impacts) (#####)
- Primary_Partner_Activity_Derived (6 unique values)
- Friendly_Fire: Friendly fire occurs when the primary impact results from contact between two players on the same team (6 unique values)



In [8]:
# These each have 11 columns

ngs_2016_pre = pd.read_csv('../NFL_Punt/ngs-2016-pre.csv')  # 1 million rows
ngs_2016_early = pd.read_csv('../NFL_Punt/ngs-2016-reg-wk1-6.csv')  # 8.7 million rows
ngs_2016_mid = pd.read_csv('../NFL_Punt/ngs-2016-reg-wk7-12.csv')  # 8.4 million rows
ngs_2016_late = pd.read_csv('../NFL_Punt/ngs-2016-reg-wk13-17.csv')  # 7.6 million rows
ngs_2016_post = pd.read_csv('../NFL_Punt/ngs-2016-post.csv')  # 900,000 rows


  exec(code_obj, self.user_global_ns, self.user_ns)


In [None]:
# These each have 11 columns

ngs_2017_pre = pd.read_csv('../NFL_Punt/ngs-2017-pre.csv')  # 6.6 million rows
ngs_2017_early = pd.read_csv('../NFL_Punt/ngs-2017-reg-wk1-6.csv')  # 9.4 million rows
ngs_2017_mid = pd.read_csv('../NFL_Punt/ngs-2017-reg-wk7-12.csv')  # 8.6 million rows
ngs_2017_late = pd.read_csv('../NFL_Punt/ngs-2017-reg-wk13-17.csv')  # 8.3 million rows
ngs_2017_post = pd.read_csv('../NFL_Punt/ngs-2017-post.csv')  # 1 million rows


In [9]:
ngs_2016_early.head()

Unnamed: 0,Season_Year,GameKey,PlayID,GSISID,Time,x,y,dis,o,dir,Event
0,2016,66,2035,27675,2016-09-09 02:22:30.700,71.959999,30.809999,0.55,196.910004,282.0,
1,2016,66,2035,31550,2016-09-09 02:22:30.700,53.91,29.98,0.82,185.470001,275.890015,
2,2016,66,2035,30463,2016-09-09 02:22:30.700,57.77,22.26,0.71,176.639999,267.309998,
3,2016,66,2035,22824,2016-09-09 02:22:30.800,74.07,26.91,0.08,167.850006,19.540001,
4,2016,66,2035,32994,2016-09-09 02:22:30.800,56.540001,33.080002,0.86,193.350006,285.429993,


#### NGS datasets

- Season_Year: NFL Season (####)
- GameKey: Numeric game identifier, unique across seasons (#####)
- PlayID: Numeric play identifier, not unique across games, requires GameKey (####)
- GSISID: Unique player identification, unique across seasons (#####)
- Time: Time of day at start, does not equal game clock, should be set to zero at start of play (MM/DD/YYYY HH:MM:SS)
- x: Player position along the long axis of the field (yards), 0 - 120 yards (numeric)
- y: Player position along the short axis of the field (yards), 0 - 53.3 yards (numeric)
- dis: Distance traveled from prior time point, distance in yards (numeric)
- o: Orientation - angle that the player is facing (deg), 0 - 360 degrees (numeric)
- dir: Direction - angle of player motion (deg), 0 - 360 degrees (numeric)
- Event: Play details as a function of time during the play (text)

#### The Video_Footage files provide links to videos of the injuries at that time of the game

In [11]:
video_footage1 = pd.read_csv("../NFL_Punt/video_footage-control.csv")
video_footage1.head() 

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,season,Season_Type,Week,Home_team,Visit_Team,Qtr,PlayDescription,gamekey,playid,Preview Link
1,2017,Reg,2,Cincinnati Bengals,Houston Texans,1,"(14:14) S.Lechler punts 58 yards to CIN 13, Ce...",57250,124,http://a.video.nfl.com//films/vodzilla/153511/...
2,2017,Reg,2,Cincinnati Bengals,Houston Texans,1,"(12:09) K.Huber punts 58 yards to HST 3, Cente...",57250,299,http://a.video.nfl.com//films/vodzilla/153512/...
3,2017,Reg,2,Cincinnati Bengals,Houston Texans,1,"(10:19) S.Lechler punts 53 yards to CIN 37, Ce...",57250,398,http://a.video.nfl.com//films/vodzilla/153513/...
4,2017,Reg,2,Cincinnati Bengals,Houston Texans,1,"(8:23) K.Huber punts 31 yards to HST 15, Cente...",57250,526,http://a.video.nfl.com//films/vodzilla/153516/...


In [13]:
video_footage2 = pd.read_csv("../NFL_Punt/video_footage-injury.csv")
video_footage2.head()


Unnamed: 0,season,Type,Week,Home_team,Visit_Team,Qtr,PlayDescription,gamekey,playid,PREVIEW LINK (5000K)
0,2016,Pre,2,Chicago Bears,Denver Broncos,3,(3:44) (Punt formation) P.O'Donnell punts 58 y...,56840,3129,http://a.video.nfl.com//films/vodzilla/153233/...
1,2016,Pre,3,Tennessee Titans,Carolina Panthers,3,(5:52) (Punt formation) K.Redfern punts 36 yar...,56856,2587,http://a.video.nfl.com//films/vodzilla/153234/...
2,2016,Pre,3,Washington Redskins,New York Jets,1,"(4:46) L.Edwards punts 51 yards to WAS 27, Cen...",56864,538,http://a.video.nfl.com//films/vodzilla/153235/...
3,2016,Pre,4,New York Jets,New York Giants,2,"(8:29) B.Wing punts 44 yards to NYJ 10, Center...",56880,1212,http://a.video.nfl.com//films/vodzilla/153236/...
4,2016,Pre,5,Detroit Lions,Buffalo Bills,1,"(:38) C.Schmidt punts 46 yards to DET 8, Cente...",56895,905,http://a.video.nfl.com//films/vodzilla/153237/...
