In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import AffinityPropagation
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.model_selection import train_test_split
from collections import Counter

data = pd.read_csv('spotify_data/processed_data.csv')

train = data.drop(['genres', 'name'], axis=1)

genres = data['genres']

X_train, X_test = train_test_split(train, test_size=0.9) 
X_train.shape

(13687, 14)

In [None]:
scaler = StandardScaler()
scaler.fit(train)
train_scale = scaler.transform(X_train)

s_scores = []
ch_scores = []
db_scores = []

model = AffinityPropagation(random_state=0)
y_pred = model.fit_predict(train_scale)
s_scores.append(silhouette_score(train_scale, y_pred))
ch_scores.append(calinski_harabasz_score(train_scale, y_pred))
db_scores.append(davies_bouldin_score(train_scale, y_pred))
print(f'{s_scores[-1]}, {ch_scores[-1]}, {db_scores[-1]}')

In [None]:
x_avg = train_scale.mean(axis=0)
dists = np.absolute(x_avg - model.cluster_centers_)
indices = np.argsort(dists, axis=1)[:,-5:]

features = train.columns

for cluster in range(model.cluster_centers_.shape[0]):
    print(f'Cluster {cluster} Top Features:')
    for i in range(4, -1, -1):
        print(f'\t{features[indices[cluster, i]]}')