# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Loading Data

In [None]:
EPL_df = pd.read_csv("/kaggle/input/english-premier-league202021/EPL_20_21.csv")

# Data Exploration

In [None]:
EPL_df.head(5)

In [None]:
EPL_df.info()

In [None]:
EPL_df.describe()

# Minutes per Matches & Goals per Matches

In [None]:
EPL_df["mins_per_match"] = (EPL_df["Mins"]/EPL_df["Matches"]).astype(int)

In [None]:
EPL_df["goals_per_match"] = (EPL_df["Goals"]/EPL_df["Matches"]).astype(float)

In [None]:
EPL_df.head(10)

# Total Goals of the Season

In [None]:
total_goals = EPL_df["Goals"].sum()
print(total_goals)

# Total Penalty Goals of the Season

In [None]:
total_penalty_goals = EPL_df["Penalty_Goals"].sum()
print(total_penalty_goals)

# Total Penalty Attempts

In [None]:
total_penalty_attempts = EPL_df["Penalty_Attempted"].sum()
print(total_penalty_attempts)

# Plot the Percentage between total scored & total missed Penalties

In [None]:
not_scored_penalties = total_penalty_attempts - total_penalty_goals
#print(not_scored_penalties)
penalty_data = [not_scored_penalties,total_penalty_goals]
labels = ["missed", "scored"]
plt.pie(penalty_data, labels = labels, autopct='%.0f%%')
plt.show()

# List of "FW" Players

In [None]:
EPL_df[EPL_df["Position"]=="FW"]

# Sum of Different Nations of EPL Players

In [None]:
np.size(EPL_df["Nationality"].unique())

In [None]:
len(EPL_df["Nationality"].unique())

# Which Country has the most Players

In [None]:
Nations = EPL_df.groupby("Nationality").size().sort_values(ascending = False)
Nations.head(10)

# Plot the Top 10 Nations that have the most number of Players

In [None]:
Nations.head(10).plot(kind = 'bar')
plt.show()
#plt.bar(Nations.head(10),height = 50,width = 5)

# Teams that have the most numbers of players

In [None]:
Clubs = EPL_df["Club"].value_counts()
Clubs

# Plot the Top 5 Teams that have the most Numbers of Players

In [None]:
Clubs.head(5).plot(kind = 'barh')
plt.show()

# Plot the Top 5 Teams that have the least Numbers of Players

In [None]:
Clubs.tail(5).plot(kind = 'barh')
plt.show()

# Group Players based on their Age

In [None]:
Under_20 = EPL_df[EPL_df["Age"]<=20]
btwn_20_25 = EPL_df[(EPL_df["Age"]>20)&(EPL_df["Age"]<=25)]
btwn_25_30 = EPL_df[(EPL_df["Age"]>25)&(EPL_df["Age"]<=30)]
Above_30 = EPL_df[EPL_df["Age"]>30]
Above_30

# Plot Age Groups

In [None]:
Age_Groups = np.array([Under_20["Name"].count(),btwn_20_25["Name"].count(),btwn_25_30["Name"].count(),Above_30["Name"].count()])
labels = ['Under_20','btwn_20_25','btwn_25_30','Above_30']
plt.pie(Age_Groups,labels = labels, autopct='%.0f%%')
plt.show()

# Players above 30 in Arsenal

In [None]:
Above_30[Above_30['Club'] == 'Arsenal']

# Average age of Players in each Club

In [None]:
#plt.figure(figsize=(20,20))
sns.boxplot(data = EPL_df, x = 'Club', y = 'Age')
plt.xticks(rotation = 90)
plt.show()