In [1]:
# We will make use of KMC to find the cluster centers of each dataset #
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np

In [2]:
def KMC(filename, n):
  data = pd.read_csv(filename)
  kmeans = KMeans(n_clusters = n, random_state=0)
  kmeans.fit(data)
  centers = kmeans.cluster_centers_
  # Print features that correspond to the center of the clusters #
  for c in range(len(centers)):
    print("---------Values for Cluster", c+1, "---------")
    for i in range(len(centers[c])):
      print((data.columns.values[i]), ": ", np.round(centers[c][i],4))
    print(np.count_nonzero(kmeans.labels_ == c), "Games included in this cluster")
  return

In [5]:
united_data = ["/content/united_pt1.csv","/content/united_pt2.csv","/content/united_moyes.csv","/content/united_lvg_2.csv","/content/united_lvg_1.csv","/content/united_jose_x_ole.csv","/content/united_combined.csv"]
city_data = ["/content/city_pt1.csv","/content/city_pt2.csv","/content/city_pep.csv","/content/city_pellegrini_2.csv","/content/city_pellegrini_1.csv","/content/13-14 Title.csv","/content/17-18 and 18:19 Titles.csv","/content/city_combined.csv"]

In [None]:
#2013-2015 Seasons --------------- Pre-xG Era ----------------------------------------- United and City #
print("-------------------------------- Clusters for 2013/14-2014/15 Seasons (United) --------------------------------")
KMC(united_data[0],2)
print("-------------------------------- Clusters for 2013/14-2014/15 Seasons (City) --------------------------------")
KMC(city_data[0],2)

-------------------------------- Clusters for 2013/14-2014/15 Seasons (United) --------------------------------
---------Values for Cluster 1 ---------
matchday :  18.0
home_0_away_1 :  0.6154
points_won :  0.5
GF :  0.8462
GA :  1.4615
elo_diff :  189.7388
united_win_odds :  1.8319
opp_final_pos :  11.3077
days_since_last_game_united :  6.8462
days_since_last_game_opp :  7.2308
26 Games included in this cluster
---------Values for Cluster 2 ---------
matchday :  21.5455
home_0_away_1 :  0.5455
points_won :  0.3636
GF :  0.3636
GA :  1.7273
elo_diff :  -23.8882
united_win_odds :  3.4491
opp_final_pos :  2.3636
days_since_last_game_united :  7.3636
days_since_last_game_opp :  7.0909
11 Games included in this cluster
-------------------------------- Clusters for 2013/14-2014/15 Seasons (City) --------------------------------
---------Values for Cluster 1 ---------
matchday :  21.2
home_0_away_1 :  0.7
points_won :  0.4
GF :  1.1
GA :  1.9
elo_diff :  48.086
city_win_odds :  2.564
opp_fin

In [None]:
#2015-2019 Seasons --------------- xG Era ----------------------------------------- United and City #
print("-------------------------------- Clusters for 2015/16-2018/19 Seasons (United) --------------------------------")
KMC(united_data[1],2)
print("-------------------------------- Clusters for 2015/16-2018/19 Seasons (City) --------------------------------")
KMC(city_data[1],2)

-------------------------------- Clusters for 2015/16-2018/19 Seasons (United) --------------------------------
---------Values for Cluster 1 ---------
matchday :  19.625
home_0_away_1 :  0.575
Poss :  62.6
points_won :  0.6
GF :  0.775
GA :  1.35
xG_diff :  0.4742
elo_diff :  212.897
united_win_odds :  1.6702
opp_final_pos :  13.3
days_since_last_game_united :  7.575
days_since_last_opp_game :  7.325
40 Games included in this cluster
---------Values for Cluster 2 ---------
matchday :  20.2258
home_0_away_1 :  0.5484
Poss :  48.5806
points_won :  0.4839
GF :  0.5806
GA :  1.5806
xG_diff :  -0.4974
elo_diff :  -15.4748
united_win_odds :  3.5223
opp_final_pos :  3.9677
days_since_last_game_united :  8.129
days_since_last_opp_game :  8.1935
31 Games included in this cluster
-------------------------------- Clusters for 2015/16-2018/19 Seasons (City) --------------------------------
---------Values for Cluster 1 ---------
matchday :  20.8182
home_0_away_1 :  0.5455
Poss :  59.2727
points_w

In [None]:
# United Managers #
print("-------------------------------- Clusters for David Moyes (2013/14) --------------------------------")
KMC(united_data[2],2)
print("-------------------------------- Clusters for LVG (2014/15) --------------------------------")
KMC(united_data[4],2)
print("-------------------------------- Clusters for LVG with xG (2015/16) --------------------------------")
KMC(united_data[3],2)
print("-------------------------------- Clusters for Jose and OGS Interim Period (2016/17, 2017/18 and 2018/19) --------------------------------")
KMC(united_data[5],2)


-------------------------------- Clusters for David Moyes (2013/14) --------------------------------
---------Values for Cluster 1 ---------
matchday :  18.0
home_0_away_1 :  0.5455
points_won :  0.2727
GF :  0.4545
GA :  1.9091
elo_diff :  43.5727
united_win_odds :  2.9991
opp_final_pos :  3.4545
days_since_last_game_united :  6.7273
days_since_last_game_opp :  6.5455
11 Games included in this cluster
---------Values for Cluster 2 ---------
matchday :  15.0
home_0_away_1 :  0.3333
points_won :  0.5
GF :  1.1667
GA :  1.6667
elo_diff :  261.925
united_win_odds :  1.47
opp_final_pos :  13.8333
days_since_last_game_united :  8.1667
days_since_last_game_opp :  8.1667
6 Games included in this cluster
-------------------------------- Clusters for LVG (2014/15) --------------------------------
---------Values for Cluster 1 ---------
matchday :  18.0714
home_0_away_1 :  0.7857
points_won :  0.5714
GF :  0.7857
GA :  1.4286
elo_diff :  180.745
united_win_odds :  1.8293
opp_final_pos :  12.1429

In [6]:
# City Managers #
print("-------------------------------- Clusters for Pellegrini (2013/14 and 2014/15) --------------------------------")
KMC(city_data[4],2)
print("-------------------------------- Clusters for Pellegrini (2015/2016) --------------------------------")
KMC(city_data[3],3)
print("-------------------------------- Clusters for Pep (2016/17, 2017/18 and 2018/19) --------------------------------")
KMC(city_data[2],2)

-------------------------------- Clusters for Pellegrini (2013/14 and 2014/15) --------------------------------
---------Values for Cluster 1 ---------
matchday :  21.2
home_0_away_1 :  0.7
points_won :  0.4
GF :  1.1
GA :  1.9
elo_diff :  48.086
city_win_odds :  2.564
opp_final_pos :  3.0
days_since_last_game_city :  8.2
days_since_last_game_opp :  8.1
10 Games included in this cluster
---------Values for Cluster 2 ---------
matchday :  15.7333
home_0_away_1 :  0.7333
points_won :  0.5333
GF :  1.0
GA :  1.4667
elo_diff :  281.4533
city_win_odds :  1.4593
opp_final_pos :  14.4
days_since_last_game_city :  7.1333
days_since_last_game_opp :  8.1333
15 Games included in this cluster
-------------------------------- Clusters for Pellegrini (2015/2016) --------------------------------
---------Values for Cluster 1 ---------
matchday :  22.5
home_0_away_1 :  0.7
Poss :  56.1
points_won :  0.4
GF :  0.8
GA :  2.3
xG_diff :  -0.27
elo_diff :  153.432
city_win_odds :  1.915
opp_final_pos :  6.

In [7]:
# City's Title Winning Seasons #
print("-------------------------------- Clusters for Pellegrini (2013/14) --------------------------------")
KMC(city_data[5],3)
print("-------------------------------- Clusters for Pep (2017/18 and 2018/19) --------------------------------")
KMC(city_data[6],2)

-------------------------------- Clusters for Pellegrini (2013/14) --------------------------------
---------Values for Cluster 1 ---------
matchday :  24.75
home_0_away_1 :  0.75
points_won :  0.25
GF :  1.0
GA :  1.75
elo_diff :  35.3775
city_win_odds :  2.525
opp_final_pos :  3.0
days_since_last_game_city :  6.25
days_since_last_game_opp :  6.0
4 Games included in this cluster
---------Values for Cluster 2 ---------
matchday :  16.0
home_0_away_1 :  0.75
points_won :  0.5
GF :  1.0
GA :  1.5
elo_diff :  308.6175
city_win_odds :  1.3425
opp_final_pos :  16.5
days_since_last_game_city :  5.5
days_since_last_game_opp :  6.75
4 Games included in this cluster
---------Values for Cluster 3 ---------
matchday :  8.3333
home_0_away_1 :  1.0
points_won :  0.6667
GF :  1.0
GA :  1.3333
elo_diff :  201.4433
city_win_odds :  1.6567
opp_final_pos :  10.6667
days_since_last_game_city :  7.6667
days_since_last_game_opp :  8.0
3 Games included in this cluster
-------------------------------- Cluste