In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
import librosa 

In [2]:
X_train = np.load('X_train.npy')
y_train = np.load('y_train.npy')

In [3]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

### Making data 0 mean and computing covariance matrix

In [4]:
mu = np.mean(X_train, axis=0)
X_train_subset = X_train - mu

In [5]:
covariance_matrix = np.cov(X_train_subset.T)
covariance_matrix[:4]

array([[549.04376947, 513.55137845, 479.7201979 , ..., 532.60787173,
        532.36929241, 512.93402293],
       [513.55137845, 560.28908995, 528.87662373, ..., 518.74699087,
        518.67379822, 500.18344376],
       [479.7201979 , 528.87662373, 548.7854355 , ..., 488.42532325,
        488.26022602, 471.04514173],
       [427.39846149, 464.25677258, 475.06124134, ..., 434.47056589,
        434.11619038, 419.07782982]])

In [6]:
evalues, evectors = np.linalg.eig(covariance_matrix)
evalues[:5]

array([263897.73881923,   5563.57262242,   2553.758691  ,   1931.52841899,
         1127.76136041])

In [7]:
explained_variances = []
for i in range(len(evalues)):
    explained_variances.append(evalues[i] / np.sum(evalues))
 
print(np.sum(explained_variances))
print(explained_variances)

1.0000000000000004
[0.8856646583586443, 0.018671852468060895, 0.008570644935088129, 0.006482383914948712, 0.0037848690346806847, 0.00309791935702875, 0.0023078710843184437, 0.0019813441272988905, 0.0018670796663160856, 0.0013421823776278677, 0.0011909857553348256, 0.0010844205612322287, 0.0010256450734964075, 0.0009346807043260315, 0.0009091055436895487, 0.0008974495897302061, 0.0008389658277320779, 0.000834137882240317, 0.0007749799421074719, 0.0007069390793947006, 0.0006644133185751791, 0.0006263606744890566, 0.000616549119244162, 0.0005878863678340898, 0.0005677438762313714, 0.0005200385892733857, 0.0005178207020070637, 0.0005046660442152526, 0.0004928425866161622, 0.0004768169678091593, 0.0004599695932692947, 0.0004439570082783314, 0.0004374433455004572, 0.00042508303903935653, 0.0004194542339699272, 0.00040721812737647134, 0.00039798631685968505, 0.00038643244308436025, 0.0003838062915468922, 0.00037173515188966945, 0.0003619666262318171, 0.0003645488252589191, 0.00035079409311879

### Choosing the first K dimensions for new data

In [8]:
K = 100

In [9]:
U = evectors[:K]
X_transformed = np.dot(X_train_subset,U.T)

In [10]:
print(X_transformed.shape)

(112680, 100)


In [11]:
from sklearn.model_selection import train_test_split
X_train, X_part, y_train, y_part = train_test_split(X_transformed, y_train, test_size=0.2, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_part, y_part, test_size=0.5, random_state=42)

In [12]:
# number of clusters 
K = 3

In [13]:
def assign_centroid(X_train, centroids, distance_type = "l2"):

    m = X_train.shape[0]
    idx = np.zeros((m,))
    
    if distance_type == "l2":
        for i in range(m):
            min_idx = np.argmin(np.linalg.norm(centroids - X_train[i], axis=1))
            idx[i] = min_idx
        return idx
    elif distance_type == "inf":
        for i in range(m):
            min_idx = np.argmin(np.linalg.norm(centroids - X_train[i], axis=1, ord="inf"))
            idx[i] = min_idx
        return idx

In [14]:
def update_centroid(X_train, idx, K):
    m, n = X_train.shape
    centroids = np.zeros((K, n))

    for i in range(K):
        curr_len = len(X_train[idx == i])
        if curr_len == 0:
            centroids[i] = X_train[np.random.choice(m, 1)]
        else :
            centroids[i] = np.sum(X_train[idx == i], axis=0)/curr_len

    return centroids

In [15]:
def compute_cost(X_train, idx, centroids):
    m = X_train.shape[0]
    K = centroids.shape[0]
    loss = 0
    for i  in range(K):
        loss += np.sum((X_train[idx == i] - centroids[i])**2)

    return loss/m

In [16]:
losses = []
centroids_hist = []

for i in range(10):   ## Using 10 random Initializations corrs. to 10 independent runs of KMeans
    c0, c1, c2 = np.random.choice(X_train.shape[0], 3)
    centroids = X_train[[c0,c1,c2]]
    
    for iter in range(20):    ## Iterating over the dataset 20 times
        idx = assign_centroid(X_train, centroids)
        centroids = update_centroid(X_train, idx, 3)
    loss = compute_cost(X_train, idx, centroids)
    losses.append(loss)
    centroids_hist.append(centroids)

In [17]:
index = np.argmin(losses) 

In [18]:
centroid = centroids_hist[index]

### Saving the centroids 

In [19]:
np.save('saved_model/kmc+pca/centroid', centroid)

In [22]:
# finding out the cluster for music 
total = 0 
correct = 0 

for i in range(X_valid.shape[0]):
    if(y_valid[i][0] != 1):
        continue 
    else : 
        total += 1 
        x = X_valid[i] 
        d0 = np.linalg.norm(centroid[0] - x)
        d1 = np.linalg.norm(centroid[1] - x)
        d2 = np.linalg.norm(centroid[2] - x)
        if d0 <= d1 and d0 <= d2 : 
            correct += 1 
        
print(correct/total)

# We get the best accuracy for music = centroid 0 cluster 
# for music = centroid 0, accuracy = 0.66
# music = centroid 1, accuracy = 0.34
# music = centroid 2, accuracy = 0



0.656732592793281


In [23]:
# finding out the cluster for speech 
total = 0 
correct = 0 

for i in range(X_valid.shape[0]):
    if(y_valid[i][1] != 1):
        continue 
    else : 
        total += 1 
        x = X_valid[i] 
        d0 = np.linalg.norm(centroid[0] - x)
        d1 = np.linalg.norm(centroid[1] - x)
        d2 = np.linalg.norm(centroid[2] - x)
        if d1 <= d2 and d1 <= d0 : 
            correct += 1 
        
print(correct/total)

# We get the best accuracy for speech = centroid 1 cluster 
# for speech = centroid 0, accuracy = 0
# speech = centroid 1, accuracy = 0.67
# speech = centroid 2, accuracy = 0.33


0.6706067769897557


From the above analysis we get that the music is the centroid 0, speech is the centroid 1 and silence is the centroid 2 

### Testing the Model

In [24]:
total = 0 
correct = 0 

for i in range(X_test.shape[0]):
    total += 1 
    x = X_test[i] 
    d0 = np.linalg.norm(centroid[0] - x)
    d1 = np.linalg.norm(centroid[1] - x)
    d2 = np.linalg.norm(centroid[2] - x)
    
    if y_test[i][0] == 1:
        if d0 <= d1 and d0 <= d1 : 
            correct += 1 
        
    if y_test[i][1] == 1:
        if d1 <= d0 and d1 <= d2 : 
            correct += 1 

    if y_test[i][2] == 1:
        if d2 <= d1 and d2 <= d0 : 
            correct += 1 

print(correct/total)


0.7784877529286475
