In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
import librosa 

In [2]:
X_train = np.load('X_train.npy')
y_train = np.load('y_train.npy')

In [13]:
# train test split 
from sklearn.model_selection import train_test_split
X_train, X_part, y_train, y_part = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_part, y_part, test_size=0.5, random_state=42)

In [25]:
# number of clusters 
K = 3

In [26]:
def assign_centroid(X_train, centroids, distance_type = "l2"):

    m = X_train.shape[0]
    idx = np.zeros((m,))
    
    if distance_type == "l2":
        for i in range(m):
            min_idx = np.argmin(np.linalg.norm(centroids - X_train[i], axis=1))
            idx[i] = min_idx
        return idx
    elif distance_type == "inf":
        for i in range(m):
            min_idx = np.argmin(np.linalg.norm(centroids - X_train[i], axis=1, ord="inf"))
            idx[i] = min_idx
        return idx

In [27]:
def update_centroid(X_train, idx, K):
    m, n = X_train.shape
    centroids = np.zeros((K, n))

    for i in range(K):
        curr_len = len(X_train[idx == i])
        if curr_len == 0:
            centroids[i] = X_train[np.random.choice(m, 1)]
        else :
            centroids[i] = np.sum(X_train[idx == i], axis=0)/curr_len

    return centroids

In [28]:
def compute_cost(X_train, idx, centroids):
    m = X_train.shape[0]
    K = centroids.shape[0]
    loss = 0
    for i  in range(K):
        loss += np.sum((X_train[idx == i] - centroids[i])**2)

    return loss/m

In [29]:
losses = []
centroids_hist = []

for i in range(10):   ## Using 10 random Initializations corrs. to 10 independent runs of KMeans
    c0, c1, c2 = np.random.choice(X_train.shape[0], 3)
    centroids = X_train[[c0,c1,c2]]
    
    for iter in range(20):    ## Iterating over the dataset 20 times
        idx = assign_centroid(X_train, centroids)
        centroids = update_centroid(X_train, idx, 3)
    loss = compute_cost(X_train, idx, centroids)
    losses.append(loss)
    centroids_hist.append(centroids)

In [30]:
index = np.argmin(losses) 

In [31]:
centroid = centroids_hist[index]

### Saving the centroids 

In [32]:
np.save('saved_model/kmc/centroid', centroid)

In [39]:
# finding out the cluster for music 
total = 0 
correct = 0 

for i in range(X_valid.shape[0]):
    if(y_valid[i][0] != 1):
        continue 
    else : 
        total += 1 
        x = X_valid[i] 
        d0 = np.linalg.norm(centroid[0] - x)
        d1 = np.linalg.norm(centroid[1] - x)
        d2 = np.linalg.norm(centroid[2] - x)
        if d2 <= d0 and d2 <= d1 : 
            correct += 1 
        
print(correct/total)

# We get the best accuracy for music = centroid 2 cluster 
# for music = centroid 0, accuracy = 0
# music = centroid 1, accuracy = 0.36
# music = centroid 2, accuracy = 0.64



0.6430921052631579


In [44]:
# finding out the cluster for speech 
total = 0 
correct = 0 

for i in range(X_valid.shape[0]):
    if(y_valid[i][1] != 1):
        continue 
    else : 
        total += 1 
        x = X_valid[i] 
        d0 = np.linalg.norm(centroid[0] - x)
        d1 = np.linalg.norm(centroid[1] - x)
        d2 = np.linalg.norm(centroid[2] - x)
        if d1 <= d2 and d1 <= d0 : 
            correct += 1 
        
print(correct/total)

# We get the best accuracy for speech = centroid 1 cluster 
# for speech = centroid 0, accuracy = 0
# speech = centroid 1, accuracy = 0.70
# speech = centroid 2, accuracy = 0.29


0.7028571428571428


From the above analysis we get that the music is the centroid 2, speech is the centroid 1 and silence is the centroid 0 

### Testing the Model

In [46]:
total = 0 
correct = 0 

for i in range(X_test.shape[0]):
    total += 1 
    x = X_test[i] 
    d0 = np.linalg.norm(centroid[0] - x)
    d1 = np.linalg.norm(centroid[1] - x)
    d2 = np.linalg.norm(centroid[2] - x)
    
    if y_test[i][0] == 1:
        if d2 <= d1 and d2 <= d0 : 
            correct += 1 
        
    if y_test[i][1] == 1:
        if d1 <= d0 and d1 <= d2 : 
            correct += 1 

    if y_test[i][2] == 1:
        if d0 <= d1 and d0 <= d2 : 
            correct += 1 

print(correct/total)


0.7759290072102052
