In [13]:
# importing necessary libraries.
import pandas as pd
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot  as plt
import seaborn as sns

In [14]:
# importing dataset....
bowler_t20 = pd.read_csv('bowler data t20i.csv')
bowler_t20.head()

Unnamed: 0,Player Name,Country,Time Period,Matches,Played,Overs,Maiden Overs,Runs,Wickets,Best Figure,Bowling Average,Economy Rate,Strike Rate,Four Wickets,Five Wickets
0,B Kumar,India,2016-2022,6,6,23.0,3,123,13,5/4,9.46,5.34,10.6,1,1
1,Amjad Javed,UAE,2016-2016,7,7,23.0,0,169,12,3/25,14.08,7.34,11.5,0,0
2,Al-Amin Hossain,Bangladesh,2016-2016,5,5,16.5,0,134,11,3/25,12.18,7.96,9.1,0,0
3,Mohammad Naveed,UAE,2016-2016,7,7,27.4,2,145,11,3/14,13.18,5.24,15.0,0,0
4,Rashid Khan,Afghanistan,2016-2022,8,8,31.0,0,202,11,3/22,18.36,6.51,16.9,0,0


In [15]:
features = ['Player Name', 'Wickets','Bowling Average','Economy Rate'] # features considered.
bowler_t20 = bowler_t20.dropna(subset=features)
bowler_t20 = bowler_t20[features].copy()
bowler_t20

Unnamed: 0,Player Name,Wickets,Bowling Average,Economy Rate
0,B Kumar,13,9.46,5.34
1,Amjad Javed,12,14.08,7.34
2,Al-Amin Hossain,11,12.18,7.96
3,Mohammad Naveed,11,13.18,5.24
4,Rashid Khan,11,18.36,6.51
5,HH Pandya,11,18.81,7.01
6,PWH de Silva,9,18.88,7.39
7,Shadab Khan,8,14.12,6.05
8,Haris Rauf,8,19.12,7.65
9,Mohammad Nawaz,8,21.75,7.05


In [16]:
# scaling the data.
scaler = StandardScaler()
features = ['Wickets','Bowling Average','Economy Rate']
scaled_data = pd.DataFrame( scaler.fit_transform(bowler_t20[features]) , columns = features )
scaled_data

Unnamed: 0,Wickets,Bowling Average,Economy Rate
0,2.62608,-1.372356,-1.28023
1,2.221308,-0.830858,0.131725
2,1.816537,-1.053552,0.569431
3,1.816537,-0.936345,-1.350828
4,1.816537,-0.32921,-0.454236
5,1.816537,-0.276467,-0.101248
6,1.006993,-0.268262,0.167024
7,0.602221,-0.82617,-0.778986
8,0.602221,-0.240132,0.350578
9,0.602221,0.068123,-0.073008


In [17]:
''' To find how many clusers are to be formed '''
def elbow_plot( min_k, max_k, k_max_iter):
    sum_squared_distances = []
    k_range = range(min_k,max_k+1)
    for k in k_range:
        km = KMeans(n_clusters=k, max_iter=k_max_iter, n_init=50)
        km.fit(scaled_data)
        sum_squared_distances.append(km.inertia_)
        
    # Plot the score for each value of k
    plt.plot(k_range, sum_squared_distances, 'bx-')
    plt.xlabel('k')
    plt.ylabel('Sum of squared distances')
    plt.title('Elbow Method For Optimal k')
    plt.show()

In [18]:
# elbow_plot(2,12,15)

In [19]:
# Clustering the players using k-means algorithm.
km = KMeans(n_clusters=3,n_init=50)
scaled_data['Cluster'] = km.fit_predict(scaled_data) # assigning the cluster number for each datapoint in the dataframe.
scaled_data



Unnamed: 0,Wickets,Bowling Average,Economy Rate,Cluster
0,2.62608,-1.372356,-1.28023,2
1,2.221308,-0.830858,0.131725,2
2,1.816537,-1.053552,0.569431,2
3,1.816537,-0.936345,-1.350828,2
4,1.816537,-0.32921,-0.454236,2
5,1.816537,-0.276467,-0.101248,2
6,1.006993,-0.268262,0.167024,2
7,0.602221,-0.82617,-0.778986,1
8,0.602221,-0.240132,0.350578,1
9,0.602221,0.068123,-0.073008,1


In [20]:
scaled_data.insert(0,'Player Name',bowler_t20['Player Name']) # adding player name to the cluster...
scaled_data

Unnamed: 0,Player Name,Wickets,Bowling Average,Economy Rate,Cluster
0,B Kumar,2.62608,-1.372356,-1.28023,2
1,Amjad Javed,2.221308,-0.830858,0.131725,2
2,Al-Amin Hossain,1.816537,-1.053552,0.569431,2
3,Mohammad Naveed,1.816537,-0.936345,-1.350828,2
4,Rashid Khan,1.816537,-0.32921,-0.454236,2
5,HH Pandya,1.816537,-0.276467,-0.101248,2
6,PWH de Silva,1.006993,-0.268262,0.167024,2
7,Shadab Khan,0.602221,-0.82617,-0.778986,1
8,Haris Rauf,0.602221,-0.240132,0.350578,1
9,Mohammad Nawaz,0.602221,0.068123,-0.073008,1


In [21]:
# spliting the dataframe into diffrent clusters.
b0 = scaled_data.loc[scaled_data['Cluster']==0]
b1 = scaled_data.loc[scaled_data['Cluster']==1]
b2 = scaled_data.loc[scaled_data['Cluster']==2]

In [22]:
len(b0)

13

In [23]:
len(b1)

21

In [24]:
len(b2)

7