<a href="https://colab.research.google.com/github/bdominique/Real-Madrid/blob/master/silhouette.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#Code to generate the average Silhouette Score for each value of k from 2 to 10.
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import pandas as pd




In [0]:

def silhouette(filename):

  data = pd.read_csv(filename)
  range_n_clusters = [2, 3, 4, 5, 6, 7, 8, 9, 10]

  for n_clusters in range_n_clusters:
    plt.figure()

    # The silhouette coefficient can range from -1, 1
    plt.xlim([-1, 1])

    # The (n_clusters+1)*10 is for inserting blank space between silhouette
    # plots of individual clusters, to demarcate them clearly.
    
    plt.ylim([0, len(data) + (n_clusters + 1) * 10])

    # Initialize the clusterer with n_clusters value and a random generator
    # seed of 10 for reproducibility.
    clusterer = KMeans(n_clusters=n_clusters, random_state=10)
    cluster_labels = clusterer.fit_predict(data)

    # The silhouette_score gives the average value for all the samples.
    # This gives a perspective into the density and separation of the formed
    # clusters

    silhouette_avg = silhouette_score(data, cluster_labels)
    print("For n_clusters =", n_clusters,
          "The average silhouette_score is :", silhouette_avg)

    # Compute the silhouette scores for each sample
    sample_silhouette_values = silhouette_samples(data, cluster_labels)

    y_lower = 10
    for i in range(n_clusters):
        # Aggregate the silhouette scores for samples belonging to
        # cluster i, and sort them
        ith_cluster_silhouette_values = \
            sample_silhouette_values[cluster_labels == i]

        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.nipy_spectral(float(i) / n_clusters)
        plt.fill_betweenx(np.arange(y_lower, y_upper),
                          0, ith_cluster_silhouette_values,
                          facecolor=color, edgecolor=color, alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
        plt.text(-0.15, y_lower + 0.5 * size_cluster_i, str(i))
        
        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    plt.title("The silhouette plot for the various clusters.")
    plt.xlabel("The silhouette coefficient values")
    plt.ylabel("Cluster label")

    # The vertical line for average silhouette score of all the values
    plt.axvline(x=silhouette_avg, color="red", linestyle="--")

    plt.yticks([])  # Clear the yaxis labels / ticks
    plt.xticks([-1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1])


    plt.title(("Silhouette analysis for KMeans clustering on Dataset "
                  "with n_clusters = %d" % n_clusters),
                 fontsize=14, fontweight='bold')

  plt.show()
  return

In [0]:
fcb_data = ["fcb_data_pt1_v1.csv", "fcb_data_pt2_v1.csv", "fcb_data_all_v1.csv", "fcb_data_guardiola.csv", "fcb_data_enrique.csv", "fcb_data_title_wins.csv", "fcb_data_title_losses.csv"]
rm_data = ["rm_data_pt1_v1.csv", "rm_data_pt2_v1.csv", "rm_data_all_v1.csv", "rm_data_mourinho.csv", "rm_data_ancelotti.csv", "rm_data_zidane.csv", "rm_data_title_wins.csv", "rm_data_title_losses.csv"]

In [0]:
#2009/10-2013/14 seasons for each team
print("-------------------------------- Silhouette Values for 2009/10-2013/14 Seasons (Barcelona) --------------------------------")
silhouette(fcb_data[0])
print("-------------------------------- Silhouette Values for 2009/10-2013/14 Seasons (Real Madrid) --------------------------------")
silhouette(rm_data[0])



In [0]:
#2014/15-2017/18 seasons for each team
print("-------------------------------- Silhouette Values for 2014/15-2017/18 Seasons (Barcelona) --------------------------------")
silhouette(fcb_data[1])
print("-------------------------------- Silhouette Values for 2014/15-2017/18 Seasons (Real Madrid) --------------------------------")
silhouette(rm_data[1])



In [0]:
#2009/10-2017-18 seasons for each team
print("-------------------------------- Silhouette Values for 2009/10-2017/18 Seasons (Barcelona) --------------------------------")
silhouette(fcb_data[2])
print("-------------------------------- Silhouette Values for 2009/10-2017/18 Seasons (Real Madrid) --------------------------------")
silhouette(rm_data[2])



In [0]:
#Real Madrid Managers
print("-------------------------------- Silhouette Values for Mourinho --------------------------------")
silhouette(rm_data[3])

print("-------------------------------- Silhouette Values for Ancelotti --------------------------------")
silhouette(rm_data[4])

print("-------------------------------- Silhouette Values for Zidane --------------------------------")
silhouette(rm_data[5])



In [0]:
#Barcelona Managers
print("-------------------------------- Silhouette Values for Guardiola --------------------------------")
silhouette(fcb_data[3])

print("-------------------------------- Silhouette Values for Enrique --------------------------------")
silhouette(fcb_data[4])



In [0]:
#Title Winning Seasons for Barcelona
print("-------------------------------- Silhouette Values for Title Winning Seasons (Barcelona) --------------------------------")
silhouette(fcb_data[5])

#Title Winning Seasons for Real Madrid
print("-------------------------------- Silhouette Values for Title Winning Seasons (Real Madrid) --------------------------------")
silhouette(rm_data[6])


In [0]:
#Title Losing Seasons for Barcelona
print("-------------------------------- Silhouette Values for Title Losing Seasons (Barcelona) --------------------------------")
silhouette(fcb_data[6])

#Title Losing Seasons for Real Madrid
print("-------------------------------- Silhouette Values for Title Losing Seasons (Real Madrid) --------------------------------")
silhouette(rm_data[7])