# ISL Defender  Comparative Analysis

## Import all required libraries

In [1]:
# Data Manipulation libraries:
import io
import pandas as pd
from copy import deepcopy
from google.colab import files
from sklearn.preprocessing import MinMaxScaler

# Plotting libraries:
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Import required data

This code is to import and read a csv file stored in your local system

In [2]:
uploaded = files.upload()

Saving isl_player_final.csv to isl_player_final (2).csv


In [3]:
isl_df = pd.read_csv(io.BytesIO(uploaded['isl_player_final.csv']))
isl_df.head()

Unnamed: 0,tour_id,tour_name,id,jersey_no,name,short_name,position_id,position,position_short,team_id,...,goaltenders.catches,goaltenders.punches,goaltenders.penalty_kicks.shots_faced,goaltenders.penalty_kicks.goals_allowed,goaltenders.penalty_kicks.saves,country_id,country_name,dob,player_foot,height
0,148,ISL6,1514,3,Asamoah Gyan,Asamoah Gyan,2,Forward,FWD,504,...,0,0,0,0,0,43,Ghana,,,
1,148,ISL6,2475,26,Deshorn Dwayne Brown,Deshorn Brown,2,Forward,FWD,656,...,0,0,0,0,0,56,Jamaica,12/22/1990,Right,187.0
2,148,ISL6,2900,9,Amine Chermiti,Amine Chermiti,2,Forward,FWD,506,...,0,0,0,0,0,106,Tunisia,12/26/1987,left,177.0
3,148,ISL6,3753,7,Francisco Medina Luna,Piti,3,Midfielder,MF,1159,...,0,0,0,0,0,19,Spain,5/26/1981,left,179.0
4,148,ISL6,3809,4,Rafael Lopez Gomez,Rafael Lopez Gomez,1,Defender,DEF,1536,...,0,0,0,0,0,19,Spain,4/9/1985,right,183.0


## Data Preparation

Explore and prepare your data for the required analysis to be performed. Eg., we are going to analyse goalkeepers, so we need to filter out and remove all the data from positions other than GK.

In [4]:
# Check all the unique positions in the data:
isl_df.drop_duplicates(subset=["position_id"])[["position_id", "position"]]

Unnamed: 0,position_id,position
0,2,Forward
3,3,Midfielder
4,1,Defender
9,4,Goalkeeper


### Slicing and Storing for only Defender data

Pro-tip: It is an expert practice to always use [deepcopy](https://www.geeksforgeeks.org/copy-python-deep-copy-shallow-copy/) to save sliced dataframes in a new variable

In [5]:
# Slice (Filter) dataframe only to retain information about GKs:
isl_df[(isl_df["minutes_played"] > 1500) & (isl_df["country_id"] == 1)].shape
isl_df[(isl_df["position_id"] == 1) & (isl_df["minutes_played"] > 1000) & (isl_df["country_id"] == 1)]


Unnamed: 0,tour_id,tour_name,id,jersey_no,name,short_name,position_id,position,position_short,team_id,...,goaltenders.catches,goaltenders.punches,goaltenders.penalty_kicks.shots_faced,goaltenders.penalty_kicks.goals_allowed,goaltenders.penalty_kicks.saves,country_id,country_name,dob,player_foot,height
26,148,ISL6,10256,21,Narayan Das,Narayan Das,1,Defender,DEF,1499,...,0,0,0,0,0,1,India,9/25/1993,Left,175.0
28,148,ISL6,10624,33,Prabir Das,Prabir Das,1,Defender,DEF,499,...,0,0,0,0,0,1,India,12/20/1993,Right,172.0
44,148,ISL6,10694,10,Harmanjot Singh Khabra,Harmanjot Khabra,1,Defender,DEF,656,...,0,0,0,0,0,1,India,3/18/1988,right,181.0
47,148,ISL6,10712,20,Pritam Kotal,Pritam Kotal,1,Defender,DEF,499,...,0,0,0,0,0,1,India,8/9/1993,Right,179.0
50,148,ISL6,10770,5,Raju Eknath Gaikwad,Raju Gaikwad,1,Defender,DEF,498,...,0,0,0,0,0,1,India,9/25/1990,Right,180.0
67,148,ISL6,16577,13,Gurtej Singh,Gurtej Singh,1,Defender,DEF,1536,...,0,0,0,0,0,1,India,5/2/1990,right,181.0
76,148,ISL6,19129,12,Keisham Reagan Singh,Reagan Singh,1,Defender,DEF,504,...,0,0,0,0,0,1,India,4/1/1991,Right,173.0
91,148,ISL6,21201,2,Rahul Shankar Bheke,Rahul Bheke,1,Defender,DEF,656,...,0,0,0,0,0,1,India,12/6/1990,right,179.0
107,148,ISL6,26616,2,Pratik Prabhakar Chaudhari,Pratik Chaudhari,1,Defender,DEF,506,...,0,0,0,0,0,1,India,10/4/1989,right,186.0
111,148,ISL6,26713,18,Jerry Lalrinzuala,Jerry Lalrinzuala,1,Defender,DEF,505,...,0,0,0,0,0,1,India,7/13/1998,Left,0.0


In [6]:
# Save the sliced dataframe in a variable for further use:
indian_defenders= deepcopy(isl_df[(isl_df["position_id"] == 1) & (isl_df["minutes_played"] > 1000) & (isl_df["country_id"] == 1)])

In [7]:
indian_defenders.reset_index(drop=True,inplace=True)

### Exploration of defender data

#### Meta-Level information

In [8]:
# First thing to always do before starting analysis is to see generic meta-level information about your dataframe:
indian_defenders.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43 entries, 0 to 42
Data columns (total 93 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   tour_id                                  43 non-null     int64  
 1   tour_name                                43 non-null     object 
 2   id                                       43 non-null     int64  
 3   jersey_no                                43 non-null     int64  
 4   name                                     43 non-null     object 
 5   short_name                               43 non-null     object 
 6   position_id                              43 non-null     int64  
 7   position                                 43 non-null     object 
 8   position_short                           43 non-null     object 
 9   team_id                                  43 non-null     int64  
 10  team_name                                43 non-null

#### Basic Statistical exploration

In [9]:
# After you have fairly had an overview of your dataframe, next step is to look at some basic statistics for numerical columns:
indian_defenders.describe().round(1)

Unnamed: 0,tour_id,id,jersey_no,position_id,team_id,is_started,is_onbench,is_goalkeeper,is_substitute,minutes_played,...,goaltenders.shots_on_goal_faced,goaltenders.goals_allowed,goaltenders.saves,goaltenders.catches,goaltenders.punches,goaltenders.penalty_kicks.shots_faced,goaltenders.penalty_kicks.goals_allowed,goaltenders.penalty_kicks.saves,country_id,height
count,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,...,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,34.0
mean,176.9,28949.3,16.0,1.0,878.1,16.2,3.2,0.0,3.4,1443.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,131.2
std,27.3,16937.0,11.0,0.0,513.9,3.2,2.4,0.0,3.2,270.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0
min,148.0,3988.0,2.0,1.0,496.0,11.0,0.0,0.0,0.0,1002.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
25%,148.0,10741.0,5.0,1.0,501.5,13.0,1.0,0.0,1.5,1183.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,42.8
50%,202.0,30980.0,15.0,1.0,506.0,17.0,3.0,0.0,3.0,1504.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,175.5
75%,202.0,36316.0,21.0,1.0,1499.0,18.0,5.0,0.0,4.5,1620.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,179.0
max,202.0,68806.0,44.0,1.0,1874.0,22.0,7.0,0.0,13.0,1980.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,187.0


#### Display all columns
Pro-tip: If all the columns of a datarame are not displayed, use the pandas set_option function to display any number of columns you wish.

In [10]:

# We will set display max columns to 85 since we have 83 columns in our gk_df:
pd.set_option("display.max_columns", 85)

In [11]:
# Let's run the describe function again to see the change:
indian_defenders.describe().round(1)

Unnamed: 0,tour_id,id,jersey_no,position_id,team_id,is_started,is_onbench,is_goalkeeper,is_substitute,minutes_played,bonus_points,injury_minutes_played,actual_minutes_played,goaltenders,events.goals,events.own_goals,events.assists,events.key_passes,events.chances_created,events.shots,events.shots_on_target,events.shots_off_target,events.fouls_committed,events.fouls_suffered,events.yellow_cards,events.red_cards,events.is_second_yellow_card,events.offsides,events.corner_kicks,events.crosses,events.free_kicks,events.throw_in,events.punches,events.catches,events.goals_conceded,events.clean_sheet,events.cleansheet,events.fantasy_assists,events.penalties_saved,events.penalties_earned,events.penalties_conceded,events.penalties_missed,events.freekicks_conceded_goal,events.freekicks_earned_goal,events.blocked_shots,events.defensive_blocked_shots,events.penalty_kicks.total,events.penalty_kicks.goals,touches.total,touches.total_passes,touches.good_passes,touches.bad_passes,touches.interceptions,touches.blocks,touches.tackles,touches.successful_tackles,touches.unsuccessful_tackles,touches.clearance,touches.saves,touches.take_on_total,touches.take_on_successful,touches.take_on_unsuccessful,touches.last_man_tackle_successful,touches.last_man_tackle_unsuccessful,touches.successful_passes,touches.interceptions_won,touches.aerial_duel.total,touches.aerial_duel.won,touches.aerial_duel.lost,touches.ground_duel.total,touches.ground_duel.won,touches.ground_duel.lost,goaltenders.shots_faced,goaltenders.shots_on_goal_faced,goaltenders.goals_allowed,goaltenders.saves,goaltenders.catches,goaltenders.punches,goaltenders.penalty_kicks.shots_faced,goaltenders.penalty_kicks.goals_allowed,goaltenders.penalty_kicks.saves,country_id,height
count,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,43.0,34.0
mean,176.9,28949.3,16.0,1.0,878.1,16.2,3.2,0.0,3.4,1443.7,0.0,84.3,1534.2,0.0,0.2,0.0,0.8,7.9,8.6,5.8,1.8,2.8,18.3,15.2,3.3,0.1,0.1,0.8,2.1,27.2,8.2,127.1,0.0,0.0,18.3,0.0,5.2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,0.0,0.0,0.0,840.2,549.2,372.5,176.7,27.1,25.9,52.0,18.6,33.4,67.0,0.0,31.6,6.6,2.8,0.1,0.0,0.0,0.0,19.6,10.3,9.3,0.3,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,131.2
std,27.3,16937.0,11.0,0.0,513.9,3.2,2.4,0.0,3.2,270.7,0.0,24.0,288.3,0.0,0.4,0.2,1.2,4.2,5.0,4.0,1.8,2.0,7.9,7.6,1.7,0.4,0.3,1.0,7.2,16.9,7.1,64.2,0.0,0.3,5.8,0.0,3.3,1.3,0.0,0.0,0.0,0.0,0.0,0.0,1.6,0.0,0.0,0.0,189.1,133.8,105.7,57.3,9.7,9.6,18.8,7.6,12.7,32.5,0.0,26.7,5.7,2.9,0.3,0.0,0.0,0.0,11.1,7.1,5.2,0.5,0.5,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0
min,148.0,3988.0,2.0,1.0,496.0,11.0,0.0,0.0,0.0,1002.0,0.0,45.0,1063.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,500.0,295.0,158.0,64.0,11.0,10.0,17.0,5.0,12.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
25%,148.0,10741.0,5.0,1.0,501.5,13.0,1.0,0.0,1.5,1183.0,0.0,64.0,1261.0,0.0,0.0,0.0,0.0,5.0,5.5,2.0,1.0,1.0,13.0,9.0,2.0,0.0,0.0,0.0,0.0,15.0,3.5,86.0,0.0,0.0,14.5,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,659.5,434.5,293.5,125.0,20.5,18.0,39.0,13.0,25.0,45.5,0.0,7.0,3.5,0.0,0.0,0.0,0.0,0.0,11.0,5.0,5.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,42.8
50%,202.0,30980.0,15.0,1.0,506.0,17.0,3.0,0.0,3.0,1504.0,0.0,83.0,1593.0,0.0,0.0,0.0,1.0,7.0,7.0,5.0,1.0,2.0,18.0,15.0,4.0,0.0,0.0,1.0,0.0,28.0,7.0,129.0,0.0,0.0,18.0,0.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,837.0,547.0,374.0,179.0,26.0,25.0,48.0,18.0,32.0,59.0,0.0,28.0,5.0,2.0,0.0,0.0,0.0,0.0,17.0,10.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,175.5
75%,202.0,36316.0,21.0,1.0,1499.0,18.0,5.0,0.0,4.5,1620.0,0.0,103.0,1727.5,0.0,0.0,0.0,1.0,10.0,10.5,8.5,2.5,4.0,21.0,19.5,4.0,0.0,0.0,1.0,0.0,39.0,11.0,172.0,0.0,0.0,22.0,0.0,7.5,1.5,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,994.5,655.0,442.0,221.5,32.5,32.0,65.0,24.0,42.0,83.0,0.0,58.5,8.5,4.0,0.0,0.0,0.0,0.0,26.0,13.5,12.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,179.0
max,202.0,68806.0,44.0,1.0,1874.0,22.0,7.0,0.0,13.0,1980.0,0.0,139.0,2119.0,0.0,1.0,1.0,5.0,20.0,26.0,17.0,8.0,8.0,45.0,33.0,7.0,1.0,1.0,4.0,38.0,66.0,34.0,228.0,0.0,2.0,36.0,0.0,12.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,1250.0,827.0,625.0,319.0,55.0,51.0,101.0,38.0,63.0,149.0,0.0,78.0,24.0,10.0,1.0,0.0,0.0,0.0,55.0,30.0,25.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,187.0


#### Level the playing Field



In [12]:
# Numerical exploration:
defender_mins_df = indian_defenders.groupby(["id", "name"])["minutes_played"].sum().reset_index()

In [13]:
defender_mins_df

Unnamed: 0,id,name,minutes_played
0,3988,Gurjinder Kumar,1519
1,10255,Mandar Rao Dessai,1082
2,10256,Narayan Das,2982
3,10624,Prabir Das,2860
4,10641,Sandesh Jhingan,1892
5,10694,Harmanjot Singh Khabra,1619
6,10710,Ashutosh Mehta,1406
7,10712,Pritam Kotal,3870
8,10770,Raju Eknath Gaikwad,1022
9,16577,Gurtej Singh,1035


In [14]:
px.bar(x="name", y="minutes_played", text="minutes_played", data_frame=defender_mins_df)

In [15]:
indian_defenders["interceptionsPer90"] = indian_defenders["touches.interceptions"].divide(indian_defenders["minutes_played"]).multiply(90)

In [16]:
indian_defenders["blocksPer90"] = indian_defenders["touches.blocks"].divide(indian_defenders["minutes_played"]).multiply(90)
indian_defenders["tacklesPer90"] = indian_defenders["touches.tackles"].divide(indian_defenders["minutes_played"]).multiply(90)
indian_defenders["clearancePer90"] = indian_defenders["touches.clearance"].divide(indian_defenders["minutes_played"]).multiply(90)
indian_defenders["tackles_sucessPer90"] = indian_defenders["touches.successful_tackles"].divide(indian_defenders["minutes_played"]).multiply(90)

In [17]:
px.bar(x="name", y="blocksPer90", text="blocksPer90", data_frame=indian_defenders)

In [18]:
px.bar(x="name", y="tacklesPer90", text="tacklesPer90", data_frame=indian_defenders)

In [19]:
px.bar(x="name", y="interceptionsPer90", text="interceptionsPer90", data_frame=indian_defenders)

In [20]:
px.bar(x="name", y="clearancePer90", text="clearancePer90", data_frame=indian_defenders)

In [21]:
px.bar(x="name", y="tackles_sucessPer90", text="tackles_sucessPer90", data_frame=indian_defenders)

In [22]:
cols_for_radar=["interceptionsPer90","blocksPer90","tacklesPer90","clearancePer90","tackles_sucessPer90"]


### Normalization of Values

In [23]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
indian_defenders[cols_for_radar] = scaler.fit_transform(indian_defenders[cols_for_radar])

In [24]:
for i, row in indian_defenders.iterrows():
    print(row["name"])
    fig = px.line_polar(indian_defenders, r=indian_defenders.loc[(indian_defenders["id"] == row["id"]), cols_for_radar].sum(),
                        theta=cols_for_radar, line_close=True,
                        title=row["name"])
    fig.update_traces(fill='toself')
    fig.show()

Narayan Das


Prabir Das


Harmanjot Singh Khabra


Pritam Kotal


Raju Eknath Gaikwad


Gurtej Singh


Keisham Reagan Singh


Rahul Shankar Bheke


Pratik Prabhakar Chaudhari


Jerry Lalrinzuala


Nishu Kumar Tashni


Seriton Benny Fernandes


Subhasish Prodyut Bose


Sarthak Golui


Rakesh Pradhan


Asish Rai


Sumit Rathi


Shubham Sarangi


Mohamad Rakip


Jessel Allan Carneiro


Gurjinder Kumar


Mandar Rao Dessai


Narayan Das


Prabir Das


Sandesh Jhingan


Ashutosh Mehta


Pritam Kotal


Keisham Reagan Singh


Rahul Shankar Bheke


Chinglensana Singh Konsham


Jerry Lalrinzuala


Amey Ganesh Ranawade


Seriton Benny Fernandes


Subhasish Prodyut Bose


Laldinliana Renthlei


Ricky Lallawmawma


Deepak Tangri


Gaurav Bora


Asish Rai


Saviour Gama


Soraisham Sandeep Singh


Akash Mishra


Jessel Allan Carneiro


## Summary

We have analyzed various aspect for finding best defender among all the parameters that we have sets. important feature selection had done on the data and tackles,interception,clearnance,blocks are very import parameter to decide a best defenders.After this I can say that Ashish rai is front runner in terms of statics and followed by Subhasish Prodyut Bose,Pritam Kotal.