In [None]:
%%HTML
<style type="text/css">
div.h1 {
    background-color:#eebbcb; 
    color: white; 
    padding: 8px; 
    padding-right: 300px; 
    font-size: 35px; 
    max-width: 1500px; 
    margin: auto; 
    margin-top: 50px;
}

div.h2 {
    background-color:#2ca9e1; 
    color: white; 
    padding: 8px; 
    padding-right: 300px; 
    font-size: 35px; 
    max-width: 1500px; 
    margin: auto; 
    margin-top: 50px;
}
</style>

# <div class="h1"> NFL Big Data Bowl

## **Content**
1. [Introduction](#1)
1. [Libraries and dataset](#2)
1. [Data overview](#3)
1. [Visualization](#4)

<a id="1"></a> <br>
# <div class="h2">Introduction</div>

In this competition, our goal is to use data science to better understand the schemes and players that make for a successful defense against passing plays.

In American football, there are a plethora of defensive strategies and outcomes. The National Football League (NFL) has used previous competitions to focus on offensive plays.

But this year, we focun on defense.

We’ll employ player tracking data for all drop-back pass plays from the **2018 regular season**. The goal of submissions is to identify unique and impactful approaches to measure defensive performance on these plays. 

### What is "drop-back pass plays"

> A drop-back pass or dropping back to pass is a passing style employed in American football in which the quarterback initially takes a three-step drop, backpedaling into the pocket to make a pass. It is the most common way of passing the ball in gridiron football.Kinds include a three-step drop, a five-step drop, and a seven-step drop.
https://en.wikipedia.org/wiki/Drop-back_pass

In [None]:
from IPython.display import YouTubeVideo
YouTubeVideo('UUqWQNGYGAA')

<a id="2"></a> <br>
# <div class="h2">Libraries and dataset</div>

In [None]:
from collections import Counter

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as py
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True) 

import warnings
warnings.filterwarnings('ignore')

The 2021 Big Data Bowl data contains player tracking, play, game, and player level information for all possible passing plays during the 2018 regular season. 

There are 4 kind of datas.

- Game data

- Player data

- Play data

- Tracking data

In [None]:
!ls ../input/nfl-big-data-bowl-2021

Here, I'll load.

In [None]:
df_games = pd.read_csv("../input/nfl-big-data-bowl-2021/games.csv")
df_plays = pd.read_csv("../input/nfl-big-data-bowl-2021/plays.csv")
df_players = pd.read_csv("../input/nfl-big-data-bowl-2021/players.csv")
df_week1 = pd.read_csv("../input/nfl-big-data-bowl-2021/week1.csv")

<a id="3"></a> <br>
# <div class="h2">Data overview</div>

Let's overview loaded data. Identify the columns and data types that exist to get a feel for the visualization.

### Game data

In [None]:
print(df_games.shape)
df_games.head()

In [None]:
df_games.info()

### Player data

In [None]:
print(df_players.shape)
df_players.head()

In [None]:
df_players.info()

### Play data

In [None]:
print(df_plays.shape)
df_plays.head()

In [None]:
df_plays.info()

### Tracking data

Each of the 17 week[week].csv files contain player tracking data from all passing plays during Week [week] of the 2018 regular season. Nearly all plays from each [gameId] are included; certain plays or games with insufficient data are dropped. Each team and player plays no more than 1 game in a given week.

![](https://www.googleapis.com/download/storage/v1/b/kaggle-user-content/o/inbox%2F3258%2F820e86013d48faacf33b7a32a15e814c%2FIncreasing%20Dir%20and%20O.png?generation=1572285857588233&alt=media)

In [None]:
print(df_week1.shape)
df_week1.head()

In [None]:
df_week1.info()

<a id="4"></a> <br>
# <div class="h2">Visualization</div>

I'll visualize 4 kind data. And check their trends and characteristics.

### Game data

There are data for each game hold in 2018. 

In [None]:
set(df_games["gameDate"].map(lambda x: x[6:10] ))

The year which game is hold is only 2018.

In [None]:
f,ax=plt.subplots(1,2,figsize=(13,5))

c = Counter(df_games["homeTeamAbbr"])
ax[0].pie(c.values(), labels=c.keys())
ax[0].set_title('homeTeamAbbr')

c = Counter(df_games["visitorTeamAbbr"])
ax[1].pie(c.values(), labels=c.keys())
ax[1].set_title('visitorTeamAbbr')

In [None]:
f,ax=plt.subplots(1,2,figsize=(13,5))
sns.countplot(df_games["gameDate"].map(lambda x: x[:2] ), ax=ax[0])

ax[0].set_xlabel("Month") 
ax[0].set_ylabel("Count") 
ax[0].set_title('The month the game hold')

sns.countplot(df_games["week"], ax=ax[1])


ax[1].set_xlabel("week") 
ax[1].set_ylabel("Count") 
ax[1].set_title('The week the game hold')


plt.show()

In [None]:
fig = plt.figure(figsize=(10, 5))
ax = plt.axes()
sns.countplot(df_games["gameDate"].map(lambda x: x[3:5]), ax=ax)
ax.set_title('The date the game hold')
plt.show()

In [None]:
fig = plt.figure(figsize=(5, 5))
ax = plt.axes()
sns.countplot(df_games["gameTimeEastern"].map(lambda x: x[:2] ), ax=ax)
ax.set_title('The Eastern time the game start')
plt.show()

### Player data

There are plyer's data in game.

In [None]:
dict_collegeName = Counter(df_players["collegeName"])
df_collegeName = pd.DataFrame.from_dict(dict_collegeName, orient='index', 
                                        columns=["Number"]).sort_values('Number', ascending=False)
df_collegeName.head(10)

In [None]:
# Inspired by https://www.kaggle.com/fatihbilgin/nfl-big-data-visualization
df_players["HeightFt"] = df_players["height"].str.split('-', expand=True)[0].astype(int)
df_players["HeightIn"] = df_players["height"].str.split('-', expand=True)[1].fillna(0).astype(int)
df_players["HeightCm"] = df_players["HeightFt"]*30.48 + df_players["HeightIn"]*2.54

df_players["WeightKg"] = df_players["weight"]*0.45359237

df_height = df_players.groupby(['height','HeightFt','HeightIn']).size().reset_index().sort_values(["HeightFt", "HeightIn"])

df_height.columns = ["height","HeightFt","HeightIn","Count"]

In [None]:
f,ax=plt.subplots(1,2,figsize=(13,5))
df_height.loc[:,["height","Count"]].set_index("height").plot(ax=ax[0],color='green', kind='bar')

ax[0].set_xlabel("Height") 
ax[0].set_ylabel("Count") 
ax[0].get_legend().remove()
ax[0].set_title('Player Height (ft-in)')

sns.distplot(df_players["weight"])

ax[1].set_xlabel("Weight") 
ax[1].set_title('Player Weight (lbs)')

plt.show()

### Play data

There are data for game play.

In [None]:
df_plays.head()

In [None]:
f,ax=plt.subplots(1,2,figsize=(13,5))

sns.countplot(df_plays["quarter"], ax=ax[0])
ax[0].set_title("o distribution")

sns.countplot(df_plays["down"], ax=ax[1])

plt.show()

In [None]:
f,ax=plt.subplots(1,2,figsize=(13,5))

g1 = sns.countplot(df_plays["yardsToGo"], ax=ax[0])
g1.set_xticklabels(g1.get_xticklabels(),rotation=90)

g2 = sns.countplot(df_plays["possessionTeam"], ax=ax[1])
g2.set_xticklabels(g2.get_xticklabels(),rotation=90)

plt.show()

In [None]:
f,ax=plt.subplots(1,2,figsize=(20,5))

g1 = sns.countplot(df_plays["yardlineSide"], ax=ax[0])
g1.set_xticklabels(g1.get_xticklabels(),rotation=90)

g2 = sns.countplot(df_plays["yardlineNumber"], ax=ax[1])
g2.set_xticklabels(g2.get_xticklabels(),rotation=90)

plt.show()

In [None]:
f,ax=plt.subplots(1,2,figsize=(13,5))

g1 = sns.countplot(df_plays["offenseFormation"], ax=ax[0])
g1.set_xticklabels(g1.get_xticklabels(),rotation=90)

g2 = sns.countplot(df_plays["defendersInTheBox"], ax=ax[1])
g2.set_xticklabels(g2.get_xticklabels(),rotation=90)

plt.show()

In [None]:
f,ax=plt.subplots(1,2,figsize=(13,5))

g1 = sns.countplot(df_plays["numberOfPassRushers"], ax=ax[0])
g1.set_xticklabels(g1.get_xticklabels(),rotation=90)

g2 = sns.countplot(df_plays["typeDropback"], ax=ax[1])
g2.set_xticklabels(g2.get_xticklabels(),rotation=90)

plt.show()

In [None]:
f,ax=plt.subplots(1,2,figsize=(20,5))

g1 = sns.countplot(df_plays["preSnapHomeScore"], ax=ax[0])
g1.set_xticklabels(g1.get_xticklabels(),rotation=90)

g2 = sns.countplot(df_plays["preSnapVisitorScore"], ax=ax[1])
g2.set_xticklabels(g2.get_xticklabels(),rotation=90)

plt.show()

In [None]:
fig = plt.figure(figsize=(10, 5))
ax = plt.axes()
sns.countplot(df_plays["passResult"])

In [None]:
c = Counter(df_plays["offensePlayResult"])
fig = px.bar(df_plays, x=c.keys(), y=c.values(),labels = {"x":"offensePlayResult", "y":"Count"})
fig.show()

In [None]:
c = Counter(df_plays["playResult"])
fig = px.bar(df_plays, x=c.keys(), y=c.values(),labels = {"x":"playResult", "y":"Count"})
fig.show()

### Tracking data

In [None]:
fig = px.scatter(x=df_week1["x"], y=df_week1["y"])
fig.show()

In [None]:
f,ax=plt.subplots(1,3,figsize=(13,5))
sns.distplot(df_week1["s"], ax=ax[0])

ax[0].set_xlabel("Height") 
ax[0].set_ylabel("Count") 
ax[0].set_title('Player Height (ft-in)')

sns.distplot(df_week1["a"], ax=ax[1])

ax[1].set_xlabel("Weight") 
ax[1].set_title('Player Weight (lbs)')

sns.distplot(df_week1["dis"], ax=ax[2])
ax[2].set_xlabel("Weight") 
ax[2].set_title('Player Weight (lbs)')

plt.show()

In [None]:
f,ax=plt.subplots(1,2,figsize=(13,5))

sns.distplot(df_week1["o"], ax=ax[0])
ax[0].set_title("o distribution")

sns.distplot(df_week1["dir"], ax=ax[1])
ax[1].set_xlabel("dir") 
ax[1].set_title('dir distribution')


plt.show()

In [None]:
f,ax=plt.subplots(1,2,figsize=(13,5))

g = sns.countplot(df_week1["event"], ax=ax[0])
g.set_xticklabels(g.get_xticklabels(),rotation=90)

sns.countplot(df_week1["position"], ax=ax[1])
ax[1].set_title("o distribution")

plt.show()

In [None]:
f,ax=plt.subplots(1,2,figsize=(13,5))

g1 = sns.countplot(df_week1["playDirection"], ax=ax[0])

g2 = sns.countplot(df_week1["route"], ax=ax[1])
ax[1].set_title("o distribution")
g2.set_xticklabels(g.get_xticklabels(),rotation=90)

plt.show()

# Updating...

## If you like, please Upvote😹

## Reference

1. https://www.kaggle.com/fatihbilgin/nfl-big-data-visualization