In [1]:

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import numpy as np
import pandas as pd
import warnings
import time
import json
import random

from sklearn.model_selection import train_test_split

from sklearn import preprocessing

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D
from sklearn import metrics

from itertools import cycle
from itertools import combinations

from tqdm import tqdm
import apr_constants
import apr_functions_ul
import common_functions

In [4]:
Parameters = ['Compared_Genre', 'With_Correlation', 'Inertia', 'Homo', 'Compl', 'V-Meas', 'Ari', 'Ami', 'Silhouette', 'Clusters']
k_means_metrics = pd.DataFrame(columns=Parameters)

FEATURES_FILE_NAME = '20MFCC_10000_10GEN_GTZAN.csv'
MFCCS_FOLDER = '20'

DATA_PATH = 'test_data/' + apr_constants.DATA
print('DATA_PATH: ', DATA_PATH)

SAVE_ROOT = 'test_data/' + apr_constants.ROOT_SAVE_PATH_NOTEBOOKS
print('SAVE_ROOT: ', SAVE_ROOT)
common_functions.check_create_directory(SAVE_ROOT)

SAVING_FILE_NAME = 'K_MEANS_METRICS_'+FEATURES_FILE_NAME

FEATURES_FILE_PATH = DATA_PATH+FEATURES_FILE_NAME
IMAGE_FILE_NAME = FEATURES_FILE_NAME.replace('.csv', '')

CLUSTERS_NUM = 2

permutations_to_eval = pd.DataFrame(apr_functions_ul.getPermutation(apr_constants.GENRE_TARGET_NAMES, 2), columns=['Compared_Genre','With_Correlation'])

DATA_PATH:  test_data/Data/
SAVE_ROOT:  test_data/notebooks_results/


In [None]:
for i, row in tqdm(permutations_to_eval.iterrows(), total=len(permutations_to_eval.index)):
  start_time = time.time()

  Parameters = ['Compared_Genre', 'With_Correlation', 'Inertia', 'Homo', 'Compl', 'V-Meas', 'Ari', 'Ami', 'Silhouette', 'Clusters']
  temp_k_means_metrics = pd.DataFrame(columns=Parameters)
  With_Correlation = False

  original_file = pd.read_csv(FEATURES_FILE_PATH)
  df = original_file.copy()

  compared_genre = []

  new_genre = row.to_list()[0]
  corr_info = row.to_list()[1]

  compared_genre.append(new_genre[0] + '-' + new_genre[1])

  df = df[(df["genre"] == new_genre[0]) | (df["genre"] == new_genre[1])]
  normalization = 'min_max'

  ord_enc = preprocessing.OrdinalEncoder()
  df['genre'] = ord_enc.fit_transform(df[['genre']])

  # Split df into x and Y
  target_col = 'genre'
  X = df.loc[:, df.columns != target_col]
  y = df.loc[:, target_col]

  x_cols = X.columns
  if normalization == 'std':
      # NORMALIZE X WITH STANDARD SCALER #
      resized_data = preprocessing.StandardScaler()
      np_scaled = resized_data.fit_transform(X)
  elif normalization == 'min_max':
      # NORMALIZE X WITH Min Max SCALER #
      resized_data = preprocessing.MinMaxScaler()
      np_scaled = resized_data.fit_transform(X)
  elif normalization == None:
      np_scaled = X

  X = pd.DataFrame(np_scaled, columns=x_cols)
  y = pd.DataFrame(y).fillna(0).astype(int)

  if corr_info:
    # Get Correlation Matrix and Plot
    apr_functions_ul.getCorrelatedFeatures(X, 0.8, True, False, False, IMAGE_FILE_NAME)
    With_Correlation = True

  cluster_num = 2
  # Get K-means results
  labels, y_clust, centroids, k_means = apr_functions_ul.runKmeans(X, CLUSTERS_NUM, save_model=False)

  temp_k_means_metrics['Compared_Genre'] = compared_genre
  temp_k_means_metrics['With_Correlation'] = With_Correlation
  temp_k_means_metrics['Inertia'] = k_means.inertia_
  temp_k_means_metrics['Homo'] = metrics.homogeneity_score(y.genre, y_clust)
  temp_k_means_metrics['Compl'] = metrics.completeness_score(y.genre, y_clust)
  temp_k_means_metrics['V-Meas'] = metrics.v_measure_score(y.genre, y_clust)
  temp_k_means_metrics['Ari'] = metrics.adjusted_rand_score(y.genre, y_clust)
  temp_k_means_metrics['Ami'] = metrics.adjusted_mutual_info_score(y.genre, y_clust)
  temp_k_means_metrics['Silhouette'] = metrics.silhouette_score(X, y_clust, metric='euclidean')
  temp_k_means_metrics['Clusters'] = cluster_num

  k_means_metrics = k_means_metrics.append(temp_k_means_metrics, ignore_index = True)

k_means_metrics.to_csv(SAVE_ROOT+SAVING_FILE_NAME, index=False)
executionTime = time.time() - start_time
print()
print('Save CSV!')
print('Execution Time: ', executionTime)