# EDA & Visualization of FIFA 21 dataset

FIFA is a series of association football simulation video games developed and released annually by Electronic Arts under the EA Sports label. This FIFA dataset comes from FIFA 21 released
This project aims to show the player distribution in terms of nationality, clubs, position, age etc. in FIFA 21 database.

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [3]:
fifa = pd.read_csv('../input/fifa-21-complete-player-dataset/fifa21_male2.csv')
fifa.head()

In [4]:
fifa.info()

In [5]:
fifa.describe()

In [6]:
fifa.dtypes

In [7]:
fifa.isnull().sum()

In [8]:
fifa['Volleys'].replace({np.NaN:fifa['Volleys'].mean()}, inplace=True)
fifa['Curve'].replace({np.NaN:fifa['Curve'].mean()}, inplace=True)
fifa['Agility'].replace({np.NaN:fifa['Agility'].mean()}, inplace=True)
fifa['Balance'].replace({np.NaN:fifa['Balance'].mean()}, inplace=True)
fifa['Jumping'].replace({np.NaN:fifa['Jumping'].mean()}, inplace=True)
fifa['Interceptions'].replace({np.NaN:fifa['Interceptions'].mean()}, inplace=True)
fifa['Positioning'].replace({np.NaN:fifa['Positioning'].mean()}, inplace=True)
fifa['Vision'].replace({np.NaN:fifa['Vision'].mean()}, inplace=True)
fifa['Composure'].replace({np.NaN:fifa['Composure'].mean()}, inplace=True)
fifa['Sliding Tackle'].replace({np.NaN:fifa['Sliding Tackle'].mean()}, inplace=True)

# Nationwise distribution of players in FIFA

In [9]:
nat_cnt = fifa.groupby('Nationality').apply(lambda x: x['ID'].count()).reset_index(name='Counts')
nat_cnt.sort_values(by='Counts', ascending=False, inplace=True)
top_20_nat_cnt = nat_cnt[:20]
fig = px.bar(top_20_nat_cnt, x='Nationality', y='Counts',
             color='Counts', title='Nationwise Distribution of players in FIFA')
fig.show()

# Nations having most number of players with high OVA

In [10]:
nat_best_avg = fifa.groupby('Nationality').apply(lambda x: np.average(x['OVA'])).reset_index(name = 'Overall Ratings')
nat_best_cnt = fifa.groupby('Nationality').apply(lambda x: x['ID'].count()).reset_index(name = 'Player Count')
nat_best = pd.merge(nat_best_avg,nat_best_cnt, how='inner', left_on='Nationality', right_on='Nationality')
nat_best = nat_best[nat_best['Player Count']>=200]
nat_best.sort_values(by=['Overall Ratings','Player Count'], ascending=[False,False], inplace=True)
px.scatter(nat_best, x='Overall Ratings', y='Player Count', color='Player Count', 
          size='Overall Ratings', hover_data=['Nationality'], title='Nationwise player count and overall average')


# Which Club has most number of players and highest number of players with high OVA

In [11]:
club_cnt = fifa.groupby('Club').apply(lambda x:x['ID'].count()).reset_index(name='Club Player Count')
club_cnt.sort_values(by='Club Player Count', ascending=False, inplace=True)
#top_200_club_cnt = club_cnt[:200]
fig = px.scatter(club_cnt, x='Club', y='Club Player Count', color='Club Player Count', title='Clubwise Player Count')
fig.show()

## Clubs with most number of player with hight OVA

In [12]:
club_best_avg = fifa.groupby("Club").apply(lambda x: np.average(x["OVA"])).reset_index(name='Ratings')
club_best_cnt = fifa.groupby("Club").apply(lambda x: x["ID"].count()).reset_index(name='Count')
club_best = pd.merge(club_best_avg, club_best_cnt, how='inner', left_on='Club', right_on='Club')
club_best = club_best[club_best['Count']>25]
club_best.sort_values(by=['Ratings','Count'], ascending=[False,False], inplace=True)
px.scatter(club_best, x='Ratings', y='Count', color='Count', size='Ratings',hover_data=['Club'],
          title='Clubwise distribution of players with high OVA')

# Position wise player distribution

In [13]:
pos_cnt = fifa.groupby('BP').apply(lambda x: x['ID'].count()).reset_index(name='Count')
pos_cnt.sort_values(by = 'Count', ascending=False, inplace=True)
fig = px.bar(pos_cnt, x='BP', y='Count', color='Count', title='Position wise player count')
fig.show()

# Players value vs Wage comparision

In [14]:
wage_val = fifa[['Name','Club','Nationality','Wage','Value','BP']]
wage_val['FactW'] = np.where(wage_val.Wage.str[-1]=='K',1000,1)
wage_val['Wage'] = wage_val.Wage.str.strip('K')
wage_val['Wage'] = wage_val.Wage.str.strip('€')
wage_val['Wage'] = pd.to_numeric(wage_val['Wage'])
wage_val['Wage in €'] = wage_val['Wage']*wage_val['FactW']
wage_val['FactV'] = np.where(wage_val.Value.str[-1]=='K',1000,
                             np.where(wage_val.Value.str[-1]=='M',1000000,1))
wage_val['Value'] = wage_val.Value.str.strip('K')
wage_val['Value'] = wage_val.Value.str.strip('M')
wage_val['Value'] = wage_val.Value.str.strip('€')
wage_val['Value'] = pd.to_numeric(wage_val['Value'])
wage_val['Value in €'] = wage_val['Value']*wage_val['FactV']
fig = px.scatter(wage_val, x='Value in €', y='Wage in €', color='Value in €',
                size='Wage in €', hover_data=['Name','Nationality','Club','BP'],
                title='Player value vs Wage comparision')
fig.show()

# Agewise player distrubution

In [15]:
age_cnt = fifa.groupby('Age').apply(lambda x: x['ID'].count()).reset_index(name='Count')
fig = px.bar(age_cnt, x='Age', y='Count', color='Count', title='Agewise Player Distribution')
fig.show()