# Import Required Libraries

In [None]:
# Data Manipulation libraries:
import numpy as np
import pandas as pd
from copy import deepcopy

# Plotting libraries
import mplsoccer
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import matplotlib.patheffects as path_effects

from highlight_text import fig_text
from plotly.subplots import make_subplots

In [None]:
!pip install highlight_text

# Load the Data

In [None]:
eventDataWC18 = pd.read_csv("../data/matchwise_events_data_WC18.csv",
                              low_memory=False)

In [None]:
seasonMetaDataWC18 = pd.read_csv("../data/season_matchwise_data_WC18.csv",
                              low_memory=False)

In [None]:
pd.set_option("display.max_columns", 50)
pd.set_option("display.max_rows", 100)

# Feature Engineering

In [None]:
eventDataWC18.shape

In [None]:
seasonMetaDataWC18.shape

## `startX`, `startY`

In [None]:
# Start location for any action:
eventDataWC18["startX"] = eventDataWC18["location"]\
    .str.split(", ", expand=True)[0].str[1:].apply(pd.to_numeric)
eventDataWC18["startY"] = eventDataWC18["location"]\
    .str.split(", ", expand=True)[1].str[:-1].apply(pd.to_numeric)

## `winner` Column

In [None]:
seasonMetaDataWC18.columns

In [None]:
homeWinCond = seasonMetaDataWC18["home_score"] > seasonMetaDataWC18["away_score"]
awayWinCond = seasonMetaDataWC18["home_score"] < seasonMetaDataWC18["away_score"]

seasonMetaDataWC18["winner"] = np.where(homeWinCond,
                                        seasonMetaDataWC18["home_team.home_team_name"],
                                        np.where(awayWinCond,
                                                 seasonMetaDataWC18["away_team.away_team_name"],
                                                 "Draw"))

In [None]:
seasonMetaDataWC18.head()

## `started` Column

In [None]:
from ast import literal_eval

In [None]:
""" Get the Line-up Data """
finalLineUpdf = pd.DataFrame()
for m_id in eventDataWC18["match_id"].unique():
    print(m_id, "\n")
    match_df = eventDataWC18[(eventDataWC18["match_id"] == m_id)
                                    & (eventDataWC18["type.id"] == 35)]["tactics.lineup"].apply(literal_eval)
    
    df1 = pd.json_normalize(match_df.iloc[0])
    df2 = pd.json_normalize(match_df.iloc[1])

    df = df1.append(df2)
    df.insert(0, "match_id", m_id)
    
    df["started"] = "Yes"
    
    finalLineUpdf = finalLineUpdf.append(df)

In [None]:
df.columns

In [None]:
""" Merge Line-up data with Events Data """
eventDataWC18 = pd.merge(eventDataWC18,
                         finalLineUpdf[['match_id', 'player.id', 'jersey_number', 'started']],
                         how="left",
                         on=['match_id', 'player.id'])

In [None]:
eventDataWC18.shape

In [None]:
eventDataWC18.drop_duplicates(subset=["match_id", "player.id"])\
    .groupby(["match_id"]).apply(lambda x: (x["started"] == "Yes").sum()).unique()

## `minsPlayed` Column

In [None]:
""" Compute the minute when the sub came on to the pitch  """

# Initialize sub-in data:
subInData = eventDataWC18[["match_id", "substitution.replacement.id", "minute"]]\
.dropna(subset=["substitution.replacement.id"])
# Rename columns:
subInData.rename(columns={"minute": "minMinute",
                        "substitution.replacement.id": "subID"}, inplace=True)
# Merge newly computed data to events data:
eventDataWC18 = pd.merge(eventDataWC18, subInData, how="left",
                         left_on=["match_id", "player.id"],
                         right_on=["match_id", "subID"])

In [None]:
""" Compute the minute when the sub left the pitch  """

# Initialize sub-in data:
subOutData = eventDataWC18[eventDataWC18['substitution.outcome.id'].notnull()]\
            [["match_id", "player.id", "minute"]]
# Rename columns:
subOutData.rename(columns={"minute": "maxMinute"}, inplace=True)
# Merge newly computed data to events data:
eventDataWC18 = pd.merge(eventDataWC18, subOutData,
                         how="left",
                         on=["match_id", "player.id"])

In [None]:
""" Computing minimum and maximum value of mins for every player """

eventDataWC18["maxMinsMatch"] =\
    eventDataWC18.groupby(["match_id"])["minute"].transform(lambda x: x.max())

eventDataWC18["minMinute"] = np.where(eventDataWC18["started"] == "Yes",
                                             0, eventDataWC18["minMinute"])

eventDataWC18["maxMinute"] = np.where(eventDataWC18["maxMinute"].isnull(),
                                             eventDataWC18["maxMinsMatch"],
                                             eventDataWC18["maxMinute"])

In [1]:
""" Compute Final total Mins Played for every player in every match """

eventDataWC18["minsPlayed"] = eventDataWC18["maxMinute"].subtract(eventDataWC18["minMinute"])

eventDataWC18.head(10)

NameError: name 'eventDataWC18' is not defined

## Merge Season Meta data to Event Data

In [None]:
seasonMetaDataWC18.head()

In [None]:
seasonMetaDataWC18.info()

In [None]:
seasonMetaDataWC18["match_date"].sort_values()

In [None]:
pd.to_datetime(seasonMetaDataWC18["match_date"])

In [None]:
seasonMetaDataWC18["match_date"] = pd.to_datetime(seasonMetaDataWC18["match_date"])

In [None]:
seasonMetaDataWC18["match_date"].sort_values()

In [None]:
seasonMetaDataWC18.sort_values(["match_date"])

In [None]:
seasonMetaDataWC18.sort_values(["match_date"], inplace=True)

In [None]:
(seasonMetaDataWC18["match_id"].astype(str) + seasonMetaDataWC18["match_date"].astype(str))

In [None]:
uniqueStr = seasonMetaDataWC18["match_id"].astype(str) + seasonMetaDataWC18["match_date"].astype(str)

In [None]:
pd.factorize(["a", "b", "CA"])[0]

In [None]:
pd.factorize(uniqueStr)[0] + 1

In [None]:
seasonMetaDataWC18["match_no"] = pd.factorize(uniqueStr)[0] + 1

In [None]:
eventDataWC18Merged = pd.merge(eventDataWC18,
                               seasonMetaDataWC18,
                               how="left",
                               on=["match_id"],
                               suffixes=("", "_meta"))

# Road to the Final

In [None]:
seasonMetaDataWC18["winner"]

In [None]:
sum(seasonMetaDataWC18["winner"] == "France")

In [None]:
pd.get_dummies(seasonMetaDataWC18["winner"])

In [None]:
pd.get_dummies(seasonMetaDataWC18["winner"]).cumsum()

In [None]:
updatedSeasonData = pd.concat([seasonMetaDataWC18,
                               pd.get_dummies(seasonMetaDataWC18["winner"]).cumsum()], axis=1)

In [None]:
updatedSeasonData


In [None]:
pd.get_dummies(seasonMetaDataWC18["winner"]).columns

In [None]:
teamNamesList = pd.get_dummies(seasonMetaDataWC18["winner"]).columns.tolist()

In [None]:
updatedSeasonData["Argentina"]

In [None]:
fig = go.Figure()

for team in teamNamesList:
    if team == "Draw":
        continue
    fig.add_trace(go.Scatter(x=updatedSeasonData["match_date"],
                             y=updatedSeasonData[team],
                             name=team))
fig.update_layout(width=900, height=700,
                 xaxis={"tickmode": "linear"},
                 xaxis_showgrid=False, yaxis_showgrid=False,
                 plot_bgcolor='#383838')

# Attacking Analysis | Croatia

## Shot Maps

In [None]:
eventDataWC18Merged[["team.id", "team.name"]].drop_duplicates()

In [None]:
croatiaTeamid = 785

In [None]:
croatiaShotData = eventDataWC18Merged[(eventDataWC18Merged["type.id"] == 16)
                                      & (eventDataWC18Merged["team.id"] == croatiaTeamid)]

In [None]:
""" Shot Distribution of Croatia """
colormap = "seismic"

pitch = mplsoccer.VerticalPitch(line_color='white',
                                     line_zorder=2,
                                     pitch_color='#101010',
                                     half=True,
                                     pad_bottom=10)
fig, ax = pitch.draw(nrows=1, ncols=3, figsize=(14, 25))
kdeplot = pitch.kdeplot(croatiaShotData["startX"],
                        croatiaShotData["startY"],
                        ax=ax[0], shade=True,
                        levels=100, cmap=colormap)
hexmap = pitch.hexbin(croatiaShotData["startX"],
                      croatiaShotData["startY"],
                      ax=ax[1], edgecolors='#f4f4f4',
                      gridsize=(15, 15), cmap=colormap)
scatter = pitch.scatter(croatiaShotData["startX"],
                        croatiaShotData["startY"],
                        ax=ax[2],
                        s=50, c="white", edgecolors="red")

NOTE: To know more about colormaps in matplotlib --> [matplotlib colormap doc](https://matplotlib.org/stable/tutorials/colors/colormaps.html)

In [None]:
eventDataWC18Merged["period"].unique()

In [None]:
""" Shot Data excluding Penalty Shoot-outs """
croatiaShotData = eventDataWC18Merged[(eventDataWC18Merged["type.id"] == 16)
                                & (eventDataWC18Merged["team.id"] == croatiaTeamid)
                                & (eventDataWC18Merged["period"] != 5)]

In [None]:
""" Shot Distribution excluding Penalty Shoot-outs """
colormap = "seismic"

pitch = mplsoccer.VerticalPitch(line_color='white',
                                     line_zorder=2,
                                     pitch_color='#101010',
                                     half=True,
                                     pad_bottom=10)
fig, ax = pitch.draw(nrows=1, ncols=3, figsize=(14, 20))
kdeplot = pitch.kdeplot(croatiaShotData["startX"],
                                  croatiaShotData["startY"],
                                  ax=ax[0], shade=True,
                                  levels=100, cmap=colormap)
hexmap = pitch.hexbin(croatiaShotData["startX"], croatiaShotData["startY"],
                      ax=ax[1], edgecolors='#f4f4f4',
                      gridsize=(15, 15), cmap=colormap)
scatter = pitch.scatter(croatiaShotData["startX"], croatiaShotData["startY"],
                        ax=ax[2],
                        s=30, c="white", edgecolors="red")

In [None]:
croatiaShotData["match_id"].nunique()

In [None]:
seasonMetaDataWC18.head()

In [None]:
""" Match-wise Shot Distribution Maps """
colormap = "bwr"

pitch = mplsoccer.VerticalPitch(line_color='white',
                                pitch_color='#101010',
                                half=True,
                                line_zorder=2)
fig, axs = pitch.grid(nrows=2, ncols=4, space=0.1, figheight=40,
                      title_height=0, endnote_height=0,
                      grid_width=0.9, grid_height=0.98,
                      bottom=0.01, left=0.05)
matchIDs = croatiaShotData.sort_values("match_no")["match_id"].unique()
for idx, ax in enumerate(axs['pitch'].flat):
    if idx < len(matchIDs):
        # Subset data for match in current iteration:
        matchShotsData = croatiaShotData[croatiaShotData["match_id"] == matchIDs[idx]]
        # Plot hexmap of shot distribution:
        hexmap = pitch.hexbin(matchShotsData["startX"],
                              matchShotsData["startY"],
                              ax=ax, edgecolors='#f4f4f4',
                              gridsize=(15, 15), cmap=colormap)
        # Set title:
        matchNo = matchShotsData["match_no"].unique().item()
        matchStage = matchShotsData["competition_stage.name"].unique().item()
        ax.set_title(str(matchNo) + " | " + matchStage,
                     fontsize=100)

In [1]:
""" Match-wise Shot Distribution Maps (goals and non-goal shots separated)"""
colormap = "bwr"

pitch = mplsoccer.VerticalPitch(line_color='white',
                                pitch_color='#101010',
                                half=True,
                                line_zorder=2)
fig, axs = pitch.grid(nrows=4, ncols=2, space=0.1, figheight=40,
                      title_height=0, endnote_height=0,
                      grid_width=0.9, grid_height=0.98,
                      bottom=0.01, left=0.05)
matchIDs = croatiaShotData.sort_values("match_no")["match_id"].unique()
for idx, ax in enumerate(axs['pitch'].flat):
    if idx < len(matchIDs):
        # Subset data for match in current iteration:
        matchShotsData = croatiaShotData[croatiaShotData["match_id"] == matchIDs[idx]]
        ngplayerData = matchShotsData[matchShotsData["shot.outcome.id"] != 97]
        gplayerData = matchShotsData[matchShotsData["shot.outcome.id"] == 97]

        sc = pitch.scatter(ngplayerData["startX"], ngplayerData["startY"],
                           s=(ngplayerData["shot.statsbomb_xg"] * 5000) + 100,
                           c="red",
                           edgecolors='#ffffff',  # give the markers a white border
                           marker="s",
                           ax=ax)
        sc2 = pitch.scatter(gplayerData["startX"], gplayerData["startY"],
                           s=(gplayerData["shot.statsbomb_xg"] * 5000) + 100,
                           c="blue",
                           edgecolors='#383838',  # give the markers a charcoal border
                           marker="football",
                           ax=ax)
        # Set title:
        matchNo = matchShotsData["match_no"].unique().item()
        matchStage = matchShotsData["competition_stage.name"].unique().item()
        ax.set_title(str(matchNo) + " | " + matchStage,
                     fontsize=50)

NameError: name 'mplsoccer' is not defined

# Defensive Analysis | Croatia

## Def Action Map

In [None]:
""" Def Action Data """
croatiaDefActionData = eventDataWC18Merged[(eventDataWC18Merged["type.id"].isin([4, 9, 10, 22])
                                            & (eventDataWC18Merged["team.id"] == croatiaTeamid))]

In [None]:
colormap = "seismic"

pitch = mplsoccer.VerticalPitch(line_color='white',
                                     line_zorder=2,
                                     pitch_color='#101010',
                                     pad_bottom=10)
fig, ax = pitch.draw(nrows=1, ncols=3, figsize=(14, 25))
kdeplot = pitch.kdeplot(croatiaDefActionData["startX"],
                        croatiaDefActionData["startY"],
                        ax=ax[0], shade=True,
                        levels=100, cmap=colormap)
hexmap = pitch.hexbin(croatiaDefActionData["startX"],
                      croatiaDefActionData["startY"],
                      ax=ax[1], edgecolors='#f4f4f4',
                      gridsize=(3, 3), cmap=colormap)
scatter = pitch.scatter(croatiaDefActionData["startX"],
                        croatiaDefActionData["startY"],
                        ax=ax[2],
                        s=20, c="white", edgecolors="red")

In [None]:
""" DefAction Data excluding Clearances """
croatiaDefActionData = eventDataWC18Merged[(eventDataWC18Merged["type.id"].isin([4, 10, 22])
                                & (eventDataWC18Merged["team.id"] == croatiaTeamid))]

In [None]:
colormap = "seismic"

pitch = mplsoccer.VerticalPitch(line_color='white',
                                     line_zorder=2,
                                     pitch_color='#101010',
                                     pad_bottom=10)
fig, ax = pitch.draw(nrows=1, ncols=3, figsize=(14, 20))
kdeplot = pitch.kdeplot(croatiaDefActionData["startX"],
                                  croatiaDefActionData["startY"],
                                  ax=ax[0], shade=True,
                                  levels=100, cmap=colormap)
hexmap = pitch.hexbin(croatiaDefActionData["startX"], croatiaDefActionData["startY"],
                      ax=ax[1], edgecolors='#f4f4f4',
                      gridsize=(5, 5), cmap=colormap)
scatter = pitch.scatter(croatiaDefActionData["startX"], croatiaDefActionData["startY"],
                        ax=ax[2],
                        s=20, c="white", edgecolors="red")

In [None]:
""" Match-wise Def Actions Distribution Maps """
colormap = "bwr"

pitch = mplsoccer.VerticalPitch(line_color='white',
                                pitch_color='#101010',
                                line_zorder=2)
fig, axs = pitch.grid(nrows=2, ncols=4, space=0.1, figheight=40,
                      title_height=0, endnote_height=0,
                      grid_width=0.9, grid_height=0.98,
                      bottom=0.01, left=0.05)
matchIDs = croatiaDefActionData.sort_values("match_no")["match_id"].unique()
for idx, ax in enumerate(axs['pitch'].flat):
    if idx < len(matchIDs):
        # Subset data for match in current iteration:
        matchDefData = croatiaDefActionData[croatiaDefActionData["match_id"] == matchIDs[idx]]
        # Plot hexmap of shot distribution:
        hexmap = pitch.hexbin(matchDefData["startX"],
                              matchDefData["startY"],
                              ax=ax, edgecolors='#f4f4f4',
                              gridsize=(5, 5), cmap=colormap)
        # Set title:
        matchNo = matchDefData["match_no"].unique().item()
        matchStage = matchDefData["competition_stage.name"].unique().item()
        ax.set_title(str(matchNo) + " | " + matchStage,
                     fontsize=100)

## Pressure Map

In [None]:
""" Pressure Data """
croatiaPressureData = eventDataWC18Merged[(eventDataWC18Merged["type.id"] == 17)
                                & (eventDataWC18Merged["team.id"] == croatiaTeamid)]

In [None]:
""" Distribution of Pressure on a Pitch Map """

# Set the Text Effects:
path_eff = [path_effects.Stroke(linewidth=3, foreground='black'),
            path_effects.Normal()]

"""
Distribution of Pressure on a Pitch Map 
(Horizontal, Vertical and Full)
"""

# Set the Pitch Parameters:
pitch = mplsoccer.VerticalPitch(pitch_color='#101010', line_zorder=2, line_color='#ffffff')
# Draw the pitch according to the set Pitch Parameters:
fig, axs = pitch.grid(nrows=1, ncols=3, title_height=0.08,
                     axis=False)

pitchPos = ["horizontal", "vertical", "full"]
for idx, ax in enumerate(axs["pitch"]):
    pos = pitchPos[idx]
    # Calculating the pressure difference
    bin_statistic = pitch.bin_statistic_positional(croatiaPressureData["startX"],
                                                   croatiaPressureData["startY"],
                                                   statistic='count',
                                                   positional=pos,
                                                   normalize=True)
    # Plot the Heatmap according to the positions selected above
    pitch.heatmap_positional(bin_statistic,
                             ax=ax,
                             cmap='coolwarm',
                             edgecolors='#22312b')
    # Plot the points at the exact location of where the pressure was applied:
    pitch.scatter(croatiaPressureData["startX"],
                  croatiaPressureData["startY"],
                  c='white', s=5, ax=ax, alpha=0.3)
    # Add the Distribution count for each section of the pitch:
    labels = pitch.label_heatmap(bin_statistic, color='white', fontsize=20,
                                 ax=ax, ha='center', va='center',
                                 str_format='{:.0%}', path_effects=path_eff)
    axs['title'].text(0.5, 0.5, "Positional Pressure Maps", color='#dee6ea',
                  va='center', ha='center', path_effects=path_eff,
                  fontsize=25)
    ax.set_title(pos.capitalize())
    

In [None]:
colormap = "seismic"

pitch = mplsoccer.VerticalPitch(line_color='white',
                                     line_zorder=2,
                                     pitch_color='#101010',
                                     pad_bottom=10)
fig, ax = pitch.draw(nrows=1, ncols=3, figsize=(14, 20))
kdeplot = pitch.kdeplot(croatiaPressureData["startX"],
                                  croatiaPressureData["startY"],
                                  ax=ax[0], shade=True,
                                  levels=100, cmap=colormap)
hexmap = pitch.hexbin(croatiaPressureData["startX"], croatiaPressureData["startY"],
                      ax=ax[1], edgecolors='#f4f4f4',
                      gridsize=(5, 5), cmap=colormap)
scatter = pitch.scatter(croatiaPressureData["startX"], croatiaPressureData["startY"],
                        ax=ax[2],
                        s=20, c="white", edgecolors="red")

In [None]:
""" Match-wise Def Actions Distribution Maps """
colormap = "bwr"

pitch = mplsoccer.VerticalPitch(line_color='white',
                                pitch_color='#101010',
                                line_zorder=2)
fig, axs = pitch.grid(nrows=2, ncols=4, space=0.1, figheight=40,
                      title_height=0, endnote_height=0,
                      grid_width=0.9, grid_height=0.98,
                      bottom=0.01, left=0.05)
matchIDs = croatiaDefActionData.sort_values("match_no")["match_id"].unique()
for idx, ax in enumerate(axs['pitch'].flat):
    if idx < len(matchIDs):
        # Subset data for match in current iteration:
        matchDefData = croatiaPressureData[croatiaPressureData["match_id"] == matchIDs[idx]]
        # Calculating the pressure difference
        bin_statistic = pitch.bin_statistic_positional(matchDefData["startX"],
                                                       matchDefData["startY"],
                                                       statistic='count',
                                                       positional="vertical",
                                                       normalize=True)
        # Plot the Heatmap according to the positions selected above
        pitch.heatmap_positional(bin_statistic,
                                 ax=ax,
                                 cmap='coolwarm',
                                 edgecolors='#22312b')
        # Plot the points at the exact location of where the pressure was applied:
        pitch.scatter(matchDefData["startX"],
                      matchDefData["startY"],
                      c='white', s=150, ax=ax, alpha=0.3)
        # Add the Distribution count for each section of the pitch:
        labels = pitch.label_heatmap(bin_statistic, color='white', fontsize=80,
                                     ax=ax, ha='center', va='center',
                                     str_format='{:.0%}', path_effects=path_eff)
# Set title:
        matchNo = matchDefData["match_no"].unique().item()
        matchStage = matchDefData["competition_stage.name"].unique().item()
        ax.set_title(str(matchNo) + " | " + matchStage,
                     fontsize=100)        

# Comparison Analysis | All Teams

## Attacking Comparison | All Teams

Attacking Parameters:
1. Goals
1. xG
1. Shots
1. Shots OT
1. Shot Accuracy
1. Goal Conversion
1. Fouls Won

In [None]:
attParams = ["Goals", "xG", "totShots", "shotsOT", "foulsWon"]

In [None]:
attParamsPerMatch = [col + "PerMatch" for col in attParams]

In [None]:
attParamsPerMatch

In [None]:
finalAttParams = attParamsPerMatch + ["shotAcc", "gcRate"]

In [None]:
finalAttParams

### Compute Params for all teams

In [None]:
eventDataWC18Merged.groupby(["team.id"]).agg({"team.name": "first",
                                              "match_id": "nunique",
                                              "shot.outcome.id": [lambda x: (x == 97).sum(),
                                                                  lambda x: x.isin([97, 100]).sum(),
                                                                  "count"],
                                              "shot.statsbomb_xg": "sum",
                                              "type.id": lambda x: (x == 21).sum()})

In [None]:
allTeamsAttData =\
    eventDataWC18Merged.groupby(["team.id"]).agg({"team.name": "first",
                                            "match_id": "nunique",
                                            "shot.outcome.id": [lambda x: (x == 97).sum(),
                                                                lambda x: x.isin([97, 100]).sum(),
                                                                "count"],
                                            "shot.statsbomb_xg": "sum",
                                            "type.id": lambda x: (x == 21).sum()})

In [None]:
allTeamsAttData.columns = ["teamName", "nMatches", "Goals", "shotsOT", "totShots", "xG", "foulsWon"]

In [None]:
allTeamsAttData["shotAcc"] = allTeamsAttData["shotsOT"].\
    divide(allTeamsAttData["totShots"]).multiply(100).round(1)

In [None]:
allTeamsAttData["gcRate"] = allTeamsAttData["Goals"]\
    .divide(allTeamsAttData["totShots"]).multiply(100).round(1)

In [None]:
allTeamsAttData.head()

In [None]:
for col in attParams:
    allTeamsAttData[col + "PerMatch"] = allTeamsAttData[col].divide(allTeamsAttData["nMatches"])

In [None]:
allTeamsAttData

In [None]:
allTeamsAttData.mean()

## Comparison using Pizza Plot

In [None]:
params = finalAttParams

In [None]:
pizzaData = pd.DataFrame(index=["Croatia", "League Avg."],
                         columns=params)

In [None]:
pizzaData

In [None]:
allTeamsAttData.loc[croatiaTeamid, params].astype(float)

In [None]:
""" Get stats for Croatia """
pizzaData.loc["Croatia"] = allTeamsAttData.loc[croatiaTeamid, params].astype(float).round(1).tolist()

In [None]:
pizzaData

In [None]:
allTeamsAttData.loc[(allTeamsAttData.index != croatiaTeamid),
                                                   params].round(1)

In [None]:
allTeamsAttData.loc[(allTeamsAttData.index != croatiaTeamid),
                                                   params].mean().round(1).tolist()

In [None]:
""" Get stats for Rest of the Teams """
pizzaData.loc["League Avg."] = allTeamsAttData.loc[(allTeamsAttData.index != croatiaTeamid),
                                                   params].mean().round(1).tolist()

In [None]:
pizzaData

In [None]:
pizzaData.loc[["Croatia", "League Avg."]].min()

In [None]:
values = pizzaData.loc["Croatia"].tolist()
values_2 = pizzaData.loc["League Avg."].tolist()

In [None]:
values

In [None]:
values_2

In [None]:
allTeamsAttData[params].min()

In [None]:
minRange = allTeamsAttData[params].min().tolist()
maxRange = allTeamsAttData[params].max().tolist()

In [None]:
minRange

In [None]:
maxRange

In [None]:
finalAttParams

In [None]:
# instantiate PyPizza class
baker = mplsoccer.PyPizza(
    params=params,                  # list of parameters
    min_range=minRange,
    max_range=maxRange,
    background_color="#383838",     # background color
    straight_line_color="#222222",  # color for straight lines
    straight_line_lw=1,             # linewidth for straight lines
    last_circle_lw=1,               # linewidth of last circle
    last_circle_color="#222222",    # color of last circle
    other_circle_ls="-.",           # linestyle for other circles
    other_circle_lw=1               # linewidth for other circles
)

# plot pizza
fig, ax = baker.make_pizza(
    values,                     # list of values
    compare_values=values_2,    # comparison values
    figsize=(12, 8),             # adjust figsize according to your need
    kwargs_slices=dict(
        facecolor="#1A78CF", edgecolor="#222222",
        zorder=2, linewidth=1
    ),                          # values to be used when plotting slices
    kwargs_compare=dict(
        facecolor="#FF9300", edgecolor="#222222",
        zorder=2, linewidth=1,
    ),
    kwargs_params=dict(
        color="#ffffff", fontsize=12,
        va="center"
    ),                          # values to be used when adding parameter
    kwargs_values=dict(
        color="#000000", fontsize=12,
        zorder=3,
        bbox=dict(
            edgecolor="#000000", facecolor="cornflowerblue",
            boxstyle="round,pad=0.2", lw=1
        )
    ),                          # values to be used when adding parameter-values labels
    kwargs_compare_values=dict(
        color="#000000", fontsize=12, zorder=3,
        bbox=dict(edgecolor="#000000", facecolor="#FF9300", boxstyle="round,pad=0.2", lw=1)
    ),                          # values to be used when adding parameter-values labels
)

# adjust the texts
params_offset = [False, False, False, False, False, True, False]
# to adjust text for comparison-values-text pass adj_comp_values=True
baker.adjust_texts(params_offset, offset=-0.2)

# add title
fig_text(
    0.515, 0.99, "<Croatia> vs <League Average>",
    size=20, fig=fig,
    highlight_textprops=[{"color": '#1A78CF'}, {"color": '#FF9300'}],
    ha="center", fontproperties=font_bold.prop, color="#F2F2F2"
)

# add subtitle
fig.text(
    0.515, 0.932,
    "Attacking Comparison | 2018",
    size=15,
    ha="center", color="#ffffff"
)

## Defensive Comparison | All Teams

Defensive Parameters:
1. Tackles
1. Challenges
1. Aerial Duels Won %
1. Fouls Committed
1. Interceptions
1. Pressure

In [None]:
defParams = ["Tackles", "Challenges", "foulsCommitted",
             "Interceptions", "Pressure"]

In [None]:
defParamsPerMatch = [col + "PerMatch" for col in defParams]

In [None]:
defParamsPerMatch

In [None]:
finalDefParams = defParamsPerMatch + ["aerialWinPer"]

In [None]:
eventDataWC18Merged["clearance.aerial_won"].unique()

In [None]:
eventDataWC18Merged.groupby(["team.id"]).agg({"team.name": "first",
                                              "match_id": "nunique",
                                              "duel.type.id": [lambda x: (x == 11).sum(),
                                                               "count",
                                                               lambda x: (x == 10).sum()],
                                              "clearance.aerial_won": "sum",
                                              "type.id": [lambda x: (x == 22).sum(),
                                                          lambda x: (x == 10).sum(),
                                                          lambda x: (x == 17).sum()]})

In [None]:
allTeamsDefData =\
    eventDataWC18Merged.groupby(["team.id"]).agg({"team.name": "first",
                                                  "match_id": "nunique",
                                                  "duel.type.id": [lambda x: (x == 11).sum(),
                                                                   "count",
                                                                   lambda x: (x == 10).sum(),],
                                                  "clearance.aerial_won": "sum",
                                                  "type.id": [lambda x: (x == 22).sum(),
                                                              lambda x: (x == 10).sum(),
                                                              lambda x: (x == 17).sum()]})

In [None]:
allTeamsDefData.columns = ["teamName", "nMatches", "Tackles", "Challenges",
                           "aerialLost", "aerialWon", "foulsCommitted",
                           "Interceptions", "Pressure"]

In [None]:
allTeamsDefData["aerialWinPer"] = allTeamsDefData["aerialWon"].\
    divide(allTeamsDefData[["aerialLost", "aerialWon"]].sum(axis=1)).multiply(100).round(1)

In [None]:
for col in defParams:
    allTeamsDefData[col + "PerMatch"] = allTeamsDefData[col].divide(allTeamsDefData["nMatches"])

In [None]:
allTeamsDefData

In [None]:
allTeamsDefData.mean()

## Comparison using Pizza Plot

In [None]:
params = finalDefParams

In [None]:
pizzaData = pd.DataFrame(index=["Croatia", "League Avg."],
                         columns=params)

In [None]:
allTeamsDefData.loc[croatiaTeamid, params].astype(float)

In [None]:
""" Get stats for Croatia """
pizzaData.loc["Croatia"] = allTeamsDefData.loc[croatiaTeamid, params].astype(float).round(1).tolist()

In [2]:
allTeamsDefData.loc[(allTeamsDefData.index != croatiaTeamid),
                                                   params].round(1)

NameError: name 'allTeamsDefData' is not defined

In [None]:
allTeamsDefData.loc[(allTeamsDefData.index != croatiaTeamid),
                                                   params].mean().round(1).tolist()

In [None]:
""" Get stats for Rest of the Teams """
pizzaData.loc["League Avg."] = allTeamsDefData.loc[(allTeamsDefData.index != croatiaTeamid),
                                                   params].mean().round(1).tolist()

In [None]:
pizzaData.loc[["Croatia", "League Avg."]].min()

In [None]:
pizzaData.loc["valuesMin"] = pizzaData.loc[["Croatia", "League Avg."]].min()
pizzaData.loc["valuesMax"] = pizzaData.loc[["Croatia", "League Avg."]].max()

In [None]:
values = pizzaData.loc["Croatia"].tolist()
values_2 = pizzaData.loc["League Avg."].tolist()

In [None]:
values

In [None]:
values_2

In [None]:
allTeamsDefData[params].min()

In [None]:
minRange = allTeamsDefData[params].min().tolist()
maxRange = allTeamsDefData[params].max().tolist()

In [None]:
minRange

In [None]:
maxRange

In [None]:
params

In [None]:
# instantiate PyPizza class
baker = mplsoccer.PyPizza(
    params=params,                  # list of parameters
    min_range=minRange,
    max_range=maxRange,
    background_color="#383838",     # background color
    straight_line_color="#222222",  # color for straight lines
    last_circle_lw=1,               # linewidth of last circle
    last_circle_color="#222222",    # color of last circle
    other_circle_lw=0               # linewidth for other circles
)

# plot pizza
fig, ax = baker.make_pizza(
    values,                     # list of values
    compare_values=values_2,    # comparison values
    figsize=(12, 8),             # adjust figsize according to your need
    kwargs_slices=dict(
        facecolor="#1A78CF", edgecolor="#222222",
        zorder=2, linewidth=1
    ),                          # values to be used when plotting slices
    kwargs_compare=dict(
        facecolor="#FF9300", edgecolor="#222222",
        zorder=2, linewidth=1,
    ),
    kwargs_params=dict(
        color="#ffffff", fontsize=12,
        va="center"
    ),                          # values to be used when adding parameter
    kwargs_values=dict(
        color="#000000", fontsize=12,
        zorder=3,
        bbox=dict(
            edgecolor="#000000", facecolor="cornflowerblue",
            boxstyle="round,pad=0.2", lw=1
        )
    ),                          # values to be used when adding parameter-values labels
    kwargs_compare_values=dict(
        color="#000000", fontsize=12, zorder=3,
        bbox=dict(edgecolor="#000000", facecolor="#FF9300", boxstyle="round,pad=0.2", lw=1)
    ),                          # values to be used when adding parameter-values labels
)

# adjust the texts
params_offset = [False, True, False, False, False, False]
# to adjust text for comparison-values-text pass adj_comp_values=True
baker.adjust_texts(params_offset, offset=-0.2)

# add title
fig_text(
    0.515, 0.99, "<Croatia> vs <League Average>",
    size=20, fig=fig,
    highlight_textprops=[{"color": '#1A78CF'}, {"color": '#FF9300'}],
    ha="center", fontproperties=font_bold.prop, color="#F2F2F2"
)

# add subtitle
fig.text(
    0.515, 0.932,
    "Defensive Comparison | 2018",
    size=15,
    ha="center", color="#ffffff"
)

# Comparison Analysis | Other Underdogs

## Who are the Underdogs?

Let's consider teams who have never won the WC as Underdogs

In [None]:
eventDataWC18Merged["team.name"].unique()

In [None]:
underdogsList = ['Croatia', 'Denmark', 'Nigeria', 'Iceland', 'Poland', 'Colombia',
                 'Costa Rica', 'Mexico', 'Australia', 'Peru', 'Serbia', 'Senegal',
                 'Panama', 'Tunisia', 'Switzerland', 'Russia', 'Belgium', 'Japan',
                 'Egypt', 'Saudi Arabia', 'South Korea', 'Morocco', 'Portugal',
                 'Iran', 'Sweden']

## Attacking Comparison

In [None]:
udEventData = eventDataWC18Merged[eventDataWC18Merged["team.name"].isin(underdogsList)]

In [None]:
allTeamsAttData =\
    udEventData.groupby(["team.id"]).agg({"team.name": "first",
                                            "match_id": "nunique",
                                            "shot.outcome.id": [lambda x: (x == 97).sum(),
                                                                lambda x: x.isin([97, 100]).sum(),
                                                                "count"],
                                            "shot.statsbomb_xg": "sum",
                                            "type.id": lambda x: (x == 21).sum()})

In [None]:
allTeamsAttData.columns = ["teamName", "nMatches", "Goals", "shotsOT", "totShots", "xG", "foulsWon"]

In [None]:
allTeamsAttData["shotAcc"] = allTeamsAttData["shotsOT"].\
    divide(allTeamsAttData["totShots"]).multiply(100).round(1)

In [None]:
allTeamsAttData["gcRate"] = allTeamsAttData["Goals"]\
    .divide(allTeamsAttData["totShots"]).multiply(100).round(1)

In [None]:
for col in attParams:
    allTeamsAttData[col + "PerMatch"] = allTeamsAttData[col].divide(allTeamsAttData["nMatches"])

### Analysing using Pizza Plot

In [None]:
params = finalAttParams

In [None]:
pizzaData = pd.DataFrame(index=["Croatia", "League Avg."],
                         columns=params)

In [None]:
""" Get stats for Croatia """
pizzaData.loc["Croatia"] = allTeamsAttData.loc[croatiaTeamid, params].astype(float).round(1).tolist()

In [None]:
""" Get stats for Rest of the Teams """
pizzaData.loc["League Avg."] = allTeamsAttData.loc[(allTeamsAttData.index != croatiaTeamid),
                                                   params].mean().round(1).tolist()

In [None]:
values = pizzaData.loc["Croatia"].tolist()
values_2 = pizzaData.loc["League Avg."].tolist()

In [None]:
minRange = allTeamsAttData[params].min().tolist()
maxRange = allTeamsAttData[params].max().tolist()

In [None]:
# instantiate PyPizza class
baker = mplsoccer.PyPizza(
    params=params,                  # list of parameters
    min_range=minRange,
    max_range=maxRange,
    background_color="#383838",     # background color
    straight_line_color="#222222",  # color for straight lines
    straight_line_lw=1,             # linewidth for straight lines
    last_circle_lw=1,               # linewidth of last circle
    last_circle_color="#222222",    # color of last circle
    other_circle_ls="-.",           # linestyle for other circles
    other_circle_lw=1               # linewidth for other circles
)

# plot pizza
fig, ax = baker.make_pizza(
    values,                     # list of values
    compare_values=values_2,    # comparison values
    figsize=(12, 8),             # adjust figsize according to your need
    kwargs_slices=dict(
        facecolor="#1A78CF", edgecolor="#222222",
        zorder=2, linewidth=1
    ),                          # values to be used when plotting slices
    kwargs_compare=dict(
        facecolor="#FF9300", edgecolor="#222222",
        zorder=2, linewidth=1,
    ),
    kwargs_params=dict(
        color="#ffffff", fontsize=12,
        va="center"
    ),                          # values to be used when adding parameter
    kwargs_values=dict(
        color="#000000", fontsize=12,
        zorder=3,
        bbox=dict(
            edgecolor="#000000", facecolor="cornflowerblue",
            boxstyle="round,pad=0.2", lw=1
        )
    ),                          # values to be used when adding parameter-values labels
    kwargs_compare_values=dict(
        color="#000000", fontsize=12, zorder=3,
        bbox=dict(edgecolor="#000000", facecolor="#FF9300", boxstyle="round,pad=0.2", lw=1)
    ),                          # values to be used when adding parameter-values labels
)

# add title
fig_text(
    0.515, 0.99, "<Croatia> vs <Underdog Average>",
    size=20, fig=fig,
    highlight_textprops=[{"color": '#1A78CF'}, {"color": '#FF9300'}],
    ha="center", color="#F2F2F2"
)

# add subtitle
fig.text(
    0.515, 0.932,
    "Attacking Comparison | 2018",
    size=15,
    ha="center", color="#ffffff"
)

## Defensive Comparison

In [None]:
allTeamsDefData =\
    udEventData.groupby(["team.id"]).agg({"team.name": "first",
                                                  "match_id": "nunique",
                                                  "duel.type.id": [lambda x: (x == 11).sum(),
                                                                   "count",
                                                                   lambda x: (x == 10).sum(),],
                                                  "clearance.aerial_won": "sum",
                                                  "type.id": [lambda x: (x == 22).sum(),
                                                              lambda x: (x == 10).sum(),
                                                              lambda x: (x == 17).sum()]})

In [None]:
allTeamsDefData.columns = ["teamName", "nMatches", "Tackles", "Challenges",
                           "aerialLost", "aerialWon", "foulsCommitted",
                           "Interceptions", "Pressure"]

In [None]:
allTeamsDefData["aerialWinPer"] = allTeamsDefData["aerialWon"].\
    divide(allTeamsDefData[["aerialLost", "aerialWon"]].sum(axis=1)).multiply(100).round(1)

In [None]:
for col in defParams:
    allTeamsDefData[col + "PerMatch"] = allTeamsDefData[col].divide(allTeamsDefData["nMatches"])

### Analysing using Pizza Plot

In [None]:
params = finalDefParams

In [None]:
pizzaData = pd.DataFrame(index=["Croatia", "League Avg."],
                         columns=params)

In [None]:
""" Get stats for Croatia """
pizzaData.loc["Croatia"] = allTeamsDefData.loc[croatiaTeamid, params].astype(float).round(1).tolist()

In [None]:
""" Get stats for Rest of the Teams """
pizzaData.loc["League Avg."] = allTeamsDefData.loc[(allTeamsDefData.index != croatiaTeamid),
                                                   params].mean().round(1).tolist()

In [None]:
values = pizzaData.loc["Croatia"].tolist()
values_2 = pizzaData.loc["League Avg."].tolist()

In [None]:
minRange = allTeamsDefData[params].min().tolist()
maxRange = allTeamsDefData[params].max().tolist()

In [None]:
# instantiate PyPizza class
baker = mplsoccer.PyPizza(
    params=params,                  # list of parameters
    min_range=minRange,
    max_range=maxRange,
    background_color="#383838",     # background color
    straight_line_color="#222222",  # color for straight lines
    last_circle_lw=1,               # linewidth of last circle
    last_circle_color="#222222",    # color of last circle
    other_circle_lw=0               # linewidth for other circles
)

# plot pizza
fig, ax = baker.make_pizza(
    values,                     # list of values
    compare_values=values_2,    # comparison values
    figsize=(12, 8),             # adjust figsize according to your need
    kwargs_slices=dict(
        facecolor="#1A78CF", edgecolor="#222222",
        zorder=2, linewidth=1
    ),                          # values to be used when plotting slices
    kwargs_compare=dict(
        facecolor="#FF9300", edgecolor="#222222",
        zorder=2, linewidth=1,
    ),
    kwargs_params=dict(
        color="#ffffff", fontsize=12,
        va="center"
    ),                          # values to be used when adding parameter
    kwargs_values=dict(
        color="#000000", fontsize=12,
        zorder=3,
        bbox=dict(
            edgecolor="#000000", facecolor="cornflowerblue",
            boxstyle="round,pad=0.2", lw=1
        )
    ),                          # values to be used when adding parameter-values labels
    kwargs_compare_values=dict(
        color="#000000", fontsize=12, zorder=3,
        bbox=dict(edgecolor="#000000", facecolor="#FF9300", boxstyle="round,pad=0.2", lw=1)
    ),                          # values to be used when adding parameter-values labels
)

# add title
fig_text(
    0.515, 0.99, "<Croatia> vs <Underdog Average>",
    size=20, fig=fig,
    highlight_textprops=[{"color": '#1A78CF'}, {"color": '#FF9300'}],
    ha="center", fontproperties=font_bold.prop, color="#F2F2F2"
)

# add subtitle
fig.text(
    0.515, 0.932,
    "Defensive Comparison | 2018",
    size=15,
    ha="center", color="#ffffff"
)