<a href="https://colab.research.google.com/github/guhanakilan/data-visualization/blob/main/Fifawc22_analyser_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
df = pd.read_csv('/content/Fifa_world_cup_matches.csv')
df.head()

In [None]:
df.info()

In [None]:
fix_name = {'completed defensive line breaksteam1': 'completed defensive line breaks team1',
           'attempts inside the penalty area  team2': 'attempts inside the penalty area team2',
           'completed line breaksteam1': 'completed line breaks team1',
           'completed defensive line breaksteam1': 'completed defensive line breaks team1',
           'attempts inside the penalty area  team2': 'attempts inside the penalty area team2',
           'completed line breaksteam1': 'completed line breaks team1',
           'completed defensive line breaksteam1': 'completed defensive line breaks team1'}
df = df.rename(fix_name, axis = 1)

In [None]:
df['possession team1'] = df['possession team1'].str[:-1].astype('float64')
df['possession team2'] = df['possession team2'].str[:-1].astype('float64')
df['possession in contest'] = df['possession in contest'].str[:-1].astype('float64')

In [None]:
df['conversion rate team1'] =  np.round(df['number of goals team1'] / df['total attempts team1'], 2).fillna(0)
df['conversion rate team2'] = np.round(df['number of goals team2'] / df['total attempts team2'], 2).fillna(0)

In [None]:
team_1_col = []
team_2_col = []
same_col = []
for col in df.columns:
    if '1' in col:
        team_1_col.append(col)
    elif '2' in col:
        team_2_col.append(col)
    else:
        same_col.append(col)

In [None]:
countries =set()
ls = df['team1'].unique().tolist() + df['team2'].unique().tolist()
for c in ls:
    countries.add(c)

def create_country_stat(country):
    team_1 = df[df['team1'] == country]
    t1 = dict()
    for c in team_1_col:
        t1[c] = c[:-1]
    for c in team_2_col:
        t1[c] = c[:-1] + '_opponent'
    team_1 = team_1.rename(columns = t1)

    team_2 = df[df['team2'] == country]
    t2 = dict()
    for c in team_2_col:
        t2[c] = c[:-1]
    for c in team_1_col:
        t2[c] = c[:-1] + '_opponent'
    team_2 = team_2.rename(columns = t2)

    team = pd.concat([team_1, team_2], axis = 0)
    team.reset_index(drop=True, inplace = True)
    return team

In [None]:
country_collection = dict()
for country in countries:
    country_df = create_country_stat(country)
    country_collection[country] = country_df

In [None]:
def change_label(stat):
    stat_ls = stat.split(' ')
    if 'team_opponent' in stat_ls:
        stat_ls.pop()
        stat_ls.insert(0, "opponent's")
    elif 'team' in stat_ls:
        stat_ls.pop()
        stat_ls.insert(0, "team's")
    return ' '.join(stat_ls)

In [None]:
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt

def plot_paired_team_stats(stat1=None, stat2=None, y_min=None, y_max=None, color1=sns.color_palette("tab10")[0], color2=sns.color_palette("tab10")[3]):
    def plot(team, stat1, stat2, ax, y_min, y_max, color1, color2):
        name = team
        team = country_collection[team]

        x = np.arange(len(team))
        # Plot first stat
        ax.bar(x-0.2, team[stat1], color=color1, width=0.35, label = stat1)
               # Plot second stat
        ax.bar(x+0.2, team[stat2], color=color2, width=0.35, label = stat2)

        ax.set_ylim(y_min, y_max)
        ax.set_ylabel(stat1[:-5])
        # Set x-ticks and labels and titles
        ax.set_xticks(x)
        ax.set_xticklabels(team['team_opponent'], rotation=45)
        ax.set_title(f'{name} goals scored and conceded versus')

        # Set legend
        patch1 = mpatches.Patch(color=color1, label= 'team')
        patch2 = mpatches.Patch(color=color2, label='opponent')
        ax.legend(handles=[patch1, patch2])

        # Adjust layout
        plt.subplots_adjust(hspace = 0.5 ,wspace=0.25)



    fig, axes = plt.subplots(8, 4, figsize=(20, 40))
    for i, country in enumerate(country_collection.keys()):
        plot(team=country, stat1=stat1, stat2=stat2, ax=axes[i//4, i%4], y_min=y_min, y_max=y_max, color1=color1, color2=color2)
    plt.show()

In [None]:
def plot_team_stats_single(team, stat, ax, y_min, y_max, color = sns.color_palette("tab10")[0] ):
    name = team
    team = country_collection[team]
    sns.barplot(data = team, x = team.index, y = stat, color = color, width = 0.5, ax = ax)
    if ax:
        ax.bar_label(ax.containers[0], fontsize=10)
        ax.set_xticks(ax.get_xticks(), labels = team['team_opponent'], rotation = 45)
        ax.set_xlabel(f"{name} versus", fontsize = 15)
        ax.set_ylabel(stat)
        ax.set_ylim(y_min, y_max)

In [None]:
def plot_team_stats(stat = None, y_min = None, y_max = None, color = sns.color_palette("tab10")[0]):
    def plot(team, stat, ax, y_min, y_max ):
        name = team
        team = country_collection[team]
        sns.barplot(data = team, x = team.index, y = stat, color = color, width = 0.5, ax = ax)
        if ax:
            ax.bar_label(ax.containers[0], fontsize=10)
            ax.set_xticks(ax.get_xticks(), labels = team['team_opponent'], rotation = 45)
            ax.set_xlabel(f"{name} versus", fontsize = 15)
            ax.set_ylabel(stat)
            ax.set_ylim(y_min, y_max)

    fig, axes = plt.subplots(8,4, figsize = (20,40))
    for i, country in enumerate(country_collection.keys()):
        plot(team = country, stat = stat, ax= axes[i//4, i%4], y_min= y_min, y_max = y_max)
    fig.subplots_adjust(hspace = 1, wspace = 0.25)

In [None]:
def plot_avg_stats(stat, top_n = 5):

    # Get mean
    def teamwise_avg_stats(country_df, stat = None):
        return np.round(country_df[stat].mean(),1)

    # Make Series
    def make_avg_series(stat, top_n = top_n):
        stats_avg = dict()
        for country_name, country_df in country_collection.items():
            avg = teamwise_avg_stats(country_df, stat = stat)
            stats_avg[country_name] = avg
        stats_avg_series = pd.Series(stats_avg.values(),stats_avg.keys()).sort_values(ascending = False).head(top_n)
        return stats_avg_series
    stats_avg_series = make_avg_series(stat)
    # Plot
    def plot(data, xlabel = None):
        '''
        data = make_avg_series() return,
        xlabel = 'name of the stats' like 'possession team'
        '''
        sns.set_palette('pastel')
        fig, ax = plt.subplots(1,1)
        sns.barplot(data = data.reset_index(), x = data.values, y =data.index, ax = ax, orient = 'h')
        ax.bar_label(ax.containers[0], fontsize=10)
        ax.set_ylabel('Team')
        ax.set_xlabel(xlabel)
        ax.set_title(xlabel, fontsize = 20)

        ax.set_yticks(range(len(data.index)),data.index)

    # call the plot function
    plot(stats_avg_series, xlabel = change_label(stat) + ' per match')
    plt.show()

In [None]:
plot_team_stats('possession team', y_min = 0, y_max = 100)

In [None]:
plot_avg_stats(stat = 'possession team', top_n = 10)

In [None]:
plot_paired_team_stats(stat1='number of goals team', stat2='number of goals team_opponent', y_min=0, y_max=10)

In [None]:
plot_avg_stats(stat = 'number of goals team', top_n = 10)

In [None]:
plot_avg_stats(stat = 'number of goals team_opponent', top_n = 10)

In [None]:
plot_avg_stats(stat = 'total attempts team', top_n = 10)

In [None]:
plot_avg_stats(stat = 'on target attempts team', top_n = 10)

In [None]:
plot_avg_stats(stat = 'penalties scored team', top_n = 10)

In [None]:
plot_avg_stats(stat ='defensive pressures applied team', top_n = 10)

In [None]:
plot_avg_stats('fouls against team', top_n = 10)

In [None]:
plot_avg_stats('yellow cards team', top_n = 10)

In [None]:
final_df = pd.DataFrame()
for c,c_df in country_collection.items():
    final_df = pd.concat([final_df, c_df], axis = 0)
final_df.head()