In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
import librosa 

In [2]:
X_train = np.load('X_train.npy')
y_train = np.load('y_train.npy')

In [3]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

X_train_subset = X_train[:10000]
y_train_subset = y_train[:10000]

### Making data 0 mean and computing covariance matrix

In [4]:
mu = np.mean(X_train_subset, axis=0)
X_train_subset = X_train_subset - mu

In [5]:
covariance_matrix = np.cov(X_train_subset.T)
covariance_matrix[:4]

array([[549.29860001, 513.36601593, 477.06816083, ..., 530.49830076,
        529.61574461, 511.27850877],
       [513.36601593, 558.54660462, 524.30431997, ..., 515.27249275,
        514.43665514, 496.87785177],
       [477.06816083, 524.30431997, 542.38025823, ..., 480.63872586,
        479.74065634, 463.05682171],
       [427.68536239, 463.7834444 , 471.80014971, ..., 431.62508288,
        430.42791702, 415.3173658 ]])

In [6]:
evalues, evectors = np.linalg.eig(covariance_matrix)
evalues[:5]

array([264521.50819269,   5687.66425022,   2534.02481444,   1915.00670257,
         1099.15973955])

In [7]:
explained_variances = []
for i in range(len(evalues)):
    explained_variances.append(evalues[i] / np.sum(evalues))
 
print(np.sum(explained_variances))
print(explained_variances)

0.9999999999999996
[0.8860556828459801, 0.019051710635786714, 0.008488107839125436, 0.006414610982281197, 0.0036818054616359285, 0.003040511851901905, 0.0022811019267993953, 0.0019761736566456987, 0.0018502975357352638, 0.0012935351688686781, 0.001155758001531887, 0.00108516919438287, 0.0010264170984062725, 0.0009293155007250596, 0.0009046694558863024, 0.0008705702092405998, 0.0008520123894388361, 0.0008323747995988331, 0.0007709397655240504, 0.0007182434586022405, 0.000669393580138956, 0.0006274164084264372, 0.0006227567298273284, 0.000584002154767179, 0.0005662075968315397, 0.0005337608825147483, 0.0005198317980555218, 0.0005046133141362033, 0.000495678313945689, 0.0004917904878203296, 0.00046058050890171914, 0.0004476816636790948, 0.0004381087692930121, 0.0004293399342143859, 0.0004205009843536306, 0.00041766750803782176, 0.0004105809062308242, 0.00039784831293328824, 0.00038977485471001697, 0.00037162241503379157, 0.0003702920928287853, 0.0003592994279879058, 0.00034994105697639505

### Choosing the first K dimensions for new data

In [8]:
K = 100

In [9]:
U = evectors[:K]
X_transformed = np.dot(X_train_subset,U.T)

In [10]:
print(X_transformed.shape)

(10000, 100)


#### Converting the y values to Integer values rather than vectors 

In [11]:
y_train_final = []
for i in range(y_train_subset.shape[0]):
    if y_train_subset[i][0] == 1 :
        y_train_final.append(0)
    elif y_train_subset[i][1] == 1 :
        y_train_final.append(1)
    else:
        y_train_final.append(2)

y_train_final = np.array(y_train_final)
X_train_final = X_transformed

In [12]:
print(X_train_final.shape, y_train_final.shape)

(10000, 100) (10000,)


### Creating the Model

In [13]:
# y = Wx + b
# 3x1 = (3,100) * (100,1) + (3,1)
W = np.random.rand(3, 100)
b = np.random.rand(3, 1)

In [14]:
def forward_pass(x, W, b):
    y_pred = np.array([np.zeros([3]) for i in range(x.shape[0])])

    for i in range(x.shape[0]): 
        y_pred[i] = (W.dot(x[i].reshape(-1,1)) + b).reshape(-1)

    y_pred_prob = np.array([np.zeros([3]) for i in range(y_pred.shape[0])])

    from scipy.special import softmax
    
    for i in range(y_pred.shape[0]):
        y_pred_prob[i] = softmax(y_pred[i])

    predictions = np.array([np.argmax(i) for i in y_pred_prob])

    return y_pred_prob, predictions

In [15]:
y_pred_prob, predictions = forward_pass(X_train_final, W, b)
print(predictions.shape)

(10000,)


In [16]:
def accuracy(predictions, target):
    correct = 0
    for i in range(len(predictions)):
        if predictions[i] == target[i]:
            correct += 1
    accuracy = correct/len(predictions)*100
    return accuracy

In [17]:
accuracy = accuracy(predictions, y_train_final) #calculating accuracy for our model
print(accuracy)

68.32000000000001


### Making the training and test sets 

In [18]:
# train test split 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_train_final, y_train_final, test_size=0.2, random_state=42)

### Training and Evaluating

In [19]:
def train(x,y,W,b,lr,epochs):
    for i in range(epochs):
        y_pred_prob, _ = forward_pass(x, W, b)
        y_pred_prob[np.arange(x.shape[0]),y] -= 1

        grad_W = y_pred_prob.T.dot(x) 
        grad_b = np.sum(y_pred_prob, axis = 0).reshape(-1,1)

        W -= (lr * grad_W)
        b -= (lr * grad_b)

    return W, b

In [21]:
W, b = train(X_train_final, y_train_final, W, b, 0.01, 300)

In [22]:
testProbabilities, testPredictions = forward_pass(X_test, W, b)

correctPreds = 0
for i in range(len(testPredictions)):
    if testPredictions[i] == y_test[i]:
        correctPreds += 1
acc = correctPreds / len(testPredictions) * 100
print("Model accuracy on test dataset - {}".format(acc))

Model accuracy on test dataset - 98.05


### Saving the model

In [24]:
np.save('saved_model/pca/W', W)
np.save('saved_model/pca/b',b)