In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
fifa20_df = pd.read_csv('data/fifa20_sample.csv')

## Get Top 10 Represented Countries

In [None]:
cnt_df = fifa20_df[['Country', 'Name']].groupby('Country').count().rename(columns={'Name':'Number of Players'})

In [None]:
cnt_df_sorted = cnt_df.sort_values(by='Number of Players', ascending=False)

In [None]:
top_10_df = cnt_df_sorted.head(10)

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig, ax = plt.subplots(figsize=(10,8))
ax.bar(top_10_df.index, top_10_df['Number of Players'], color='green')
ax.set_ylabel('Number of Players by Country')
ax.set_xticklabels(top_10_df.index, rotation=45)
plt.show()
plt.savefig('mty_path')

In [None]:
fifa20_df.head()

## Histogram
### Can we infer some distribution in the data?

In [None]:
def normalize_data(fifa20_df):
  normalized_df=(fifa20_df['Overall']-fifa20_df['Overall'].mean() )/fifa20_df['Overall'].std()
  normalized_df = normalized_df[~normalized_df.isna()].to_frame() 
  return normalized_df

In [None]:
normalized_df = normalize_data(fifa20_df)

In [None]:
fig, ax = plt.subplots()
ax.hist(normalized_df['Overall'], label='overall skill')
ax.set_xlabel('Normalized Wage')
ax.set_ylabel('Number of observations')
ax.set_xlim([-5,5])
plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.hist(normalized_df['Overall'], label='overall skill', bins=70)
ax.set_xlabel('Normalized Wage')
ax.set_ylabel('Number of observations')
ax.set_xlim([-5,5])
plt.legend()
plt.show()

In [None]:
england = normalize_data(fifa20_df[fifa20_df["Country"]=='England'].reset_index(drop=True))
italy = normalize_data(fifa20_df[fifa20_df["Country"]=='Italy'].reset_index(drop=True))
spain = normalize_data(fifa20_df[fifa20_df["Country"]=='Spain'].reset_index(drop=True))

In [None]:
fig, ax = plt.subplots()
ax.hist(england['Overall'], label='England - Overall', bins=70)
ax.hist(italy['Overall'], label='Italy - Overall', bins=70)
ax.hist(spain['Overall'], label='Spain - Overall', bins=70)
ax.set_xlabel('Normalized Wage')
ax.set_ylabel('Number of observations')
ax.set_xlim([-5,5])
plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.hist(england['Overall'], label='England - Overall',bins=70, histtype='step', color='purple')
ax.hist(italy['Overall'], label='Italy - Overall', bins=70, histtype='step') 
ax.hist(spain['Overall'], label='Spain - Overall', bins=70, histtype='step')
ax.set_xlabel('Normalized Wage')
ax.set_ylabel('Number of observations')
ax.set_xlim([-5,5])
plt.legend()
plt.show()

# Box Plot
## The statistical way to visualize data

In [None]:
fig, ax = plt.subplots()
bp = ax.boxplot(
[england['Overall'], italy['Overall'], spain['Overall']], showfliers=True, patch_artist=True
)
ax.set_xticklabels(['England', 'Italy', 'Spain'])
ax.set_ylabel('Overall Skill')
plt.show()

## End