# Lanjutan Speech Command Recognition -after preprocessing
## and Adding Another Layer

### Import library

In [15]:
import numpy as np
import matplotlib.pyplot as plt
import json
from sklearn.model_selection import train_test_split
import os

JSON_PATH = "data_balanced_augmented.json"

### Load JSON data into np array

In [2]:
with open(JSON_PATH, "r") as f:
    data = json.load(f)

mfcc_nested = data['mfcc']
label_idx = data['labels']
labels = data['mapping']
# print(np.array(data['mfcc'][0][0]).shape)

preX = []
X = []
y = []

for sample, label in zip(mfcc_nested, label_idx):
    mfcc_array = np.array(sample)               
    X.append(mfcc_array.flatten())                 
    y.append(label)

X = np.array(X, dtype=np.float32)   
y = np.array(y, dtype=np.int32)     

print(X.shape)
print(y.shape)

(41862, 1313)
(41862,)


### Split into test val and test

In [None]:
X_train, X_dev, y_train, y_dev = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

print(X_train.shape, X_test.shape, X_dev.shape)
print(y_train.shape, y_test.shape, y_dev.shape)

In [16]:
def read_from_json(filename):
    with open(os.path.join('splitted_data', filename), 'r') as f:
        return np.array(json.load(f))

def load_serialized_dataset():
    X_train = read_from_json('X_train.json')
    X_test = read_from_json('X_test.json')
    X_dev = read_from_json('X_dev.json')
    y_train = read_from_json('y_train.json')
    y_test = read_from_json('y_test.json')
    y_dev = read_from_json('y_dev.json')
    return X_train, X_test, X_dev, y_train, y_test, y_dev

X_train, X_test, X_dev, y_train, y_test, y_dev = load_serialized_dataset()

print(X_train.shape, X_test.shape, X_dev.shape)
print(y_train.shape, y_test.shape, y_dev.shape)

(26791, 1313) (6698, 1313) (8373, 1313)
(26791,) (6698,) (8373,)


## Initialize Model

In [18]:
def init_params():
    input_dim = 1313
    hidden_dim1 = 512
    hidden_dim2 = 512
    output_dim = 14


    w = np.load("1600layer1.npz")
    weight1 = w["weight1"]
    bias1 = w["bias1"]

    # weight1 = np.random.randn(hidden_dim1, input_dim) * np.sqrt(2. / hidden_dim1)
    # bias1 = np.zeros(hidden_dim2)

    weight2 = np.random.randn(hidden_dim2, hidden_dim1) * np.sqrt(2. / hidden_dim1)
    bias2 = np.zeros(hidden_dim2)

    weight3 = np.random.randn(output_dim, hidden_dim2) * np.sqrt(2. / hidden_dim2)
    bias3 = np.zeros(output_dim)

    return weight1, bias1, weight2, bias2, weight3, bias3

def ReLu(outZ):
    return np.maximum(0, outZ)

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return expZ / np.sum(expZ, axis=1, keepdims=True)

def forward_prop(weight1, bias1, weight2, bias2, weight3, bias3, x):
    outZ1 = x.dot(weight1.T) + bias1
    activatedA1 = ReLu(outZ1)

    outZ2 = activatedA1.dot(weight2.T) + bias2
    activatedA2 = ReLu(outZ2)

    outZ3 = activatedA2.dot(weight3.T) + bias3
    activatedA3 = softmax(outZ3)

    return outZ1, activatedA1, outZ2, activatedA2, outZ3, activatedA3


def one_hot(y):
    one_hot_y = np.zeros((y.size, np.max(y) + 1))
    one_hot_y[np.arange(y.size), y] = 1
    return one_hot_y

def derive_ReLu(Z):
    return Z > 0

def backward_prop(outZ1, activatedA1, outZ2, activatedA2, outZ3, activatedA3, weight2, weight3, x, y):
    m = y.size
    one_hot_y = one_hot(y)

    difOutZ3 = activatedA3 - one_hot_y
    difWeight3 = 1 / m * difOutZ3.T.dot(activatedA2)
    difBias3 = 1 / m * np.sum(difOutZ3, axis=0)

    difActivate2 = difOutZ3.dot(weight3)
    difOutZ2 = difActivate2 * derive_ReLu(outZ2)
    difWeight2 = 1 / m * difOutZ2.T.dot(activatedA1)
    difBias2 = 1 / m * np.sum(difOutZ2, axis=0)

    difActivate1 = difOutZ2.dot(weight2)
    difOutZ1 = difActivate1 * derive_ReLu(outZ1)
    difWeight1 = 1 / m * difOutZ1.T.dot(x)
    difBias1 = 1 / m * np.sum(difOutZ1, axis=0)

    return difWeight1, difBias1, difWeight2, difBias2, difWeight3, difBias3
    # return difWeight2, difBias2, difWeight3, difBias3

def update_params(weight1, bias1, weight2, bias2, weight3, bias3, difWeight1, difBias1, difWeight2, difBias2, difWeight3, difBias3, alpha):
    weight1 = weight1 - alpha * difWeight1
    bias1 = bias1 - alpha * difBias1
    weight2 = weight2 - alpha * difWeight2
    bias2 = bias2 - alpha * difBias2
    weight3 = weight3 - alpha * difWeight3
    bias3 = bias3 - alpha * difBias3

    return weight1, bias1, weight2, bias2, weight3, bias3
    # return weight2, bias2, weight3, bias3

In [19]:
def get_predictions(activatedA3):
    return np.argmax(activatedA3, axis=1)

def get_accuracy(predictions, y):
    return np.sum(predictions == y) / y.size

def gradient_descent(X, y, alpha, iterations, devX, devy):
    weight1, bias1, weight2, bias2, weight3, bias3 = init_params()
    best_accuracy = 0.0
    best_params = (weight1.copy(), bias1.copy(), weight2.copy(), bias2.copy(), weight3.copy(), bias3.copy())

    for i in range(iterations):
        outZ1, activatedA1, outZ2, activatedA2, outZ3, activatedA3 = forward_prop(weight1=weight1, bias1=bias1, weight2=weight2, bias2=bias2, weight3=weight3, bias3=bias3, x=X)
        
        difWeight1, difBias1, difWeight2, difBias2, difWeight3, difBias3 = backward_prop(outZ1=outZ1, activatedA1=activatedA1, outZ2=outZ2, activatedA2=activatedA2, outZ3=outZ3, activatedA3=activatedA3, weight2=weight2, weight3=weight3, x=X, y=y)
        
        weight1, bias1, weight2, bias2, weight3, bias3 = update_params(weight1=weight1, bias1=bias1, weight2=weight2, bias2=bias2, weight3=weight3, bias3=bias3, difWeight1=difWeight1, difBias1=difBias1, difWeight2=difWeight2, difBias2=difBias2, difWeight3=difWeight3, difBias3=difBias3, alpha=alpha)

        predictions = get_predictions(activatedA3)
        acc = get_accuracy(predictions, y)

        if acc > best_accuracy:
            best_accuracy = acc
            best_params = (weight1.copy(), bias1.copy(), weight2.copy(), bias2.copy(), weight3.copy(), bias3.copy())

        if i % 10 == 0:
            print(f"Iteration {i}: Accuracy = {acc:.4f}")
            _, _, _, _, _, ActivatedA3dev = forward_prop(weight1=weight1, bias1=bias1, weight2=weight2, bias2=bias2, weight3=weight3, bias3=bias3, x=devX)
    
            predictionsDev = get_predictions(ActivatedA3dev)
            accDev= get_accuracy(predictionsDev, devy)
            print(f"Unknown Data: Accuracy = {accDev:.4f}")
    
    print(f"Iteration {iterations}: Accuracy = {acc:.4f}")
    print(f"Best Iteration : Accuracy = {best_accuracy:.4f}")

    return best_params

In [20]:
w1, b1, w2, b2, w3, b3 = gradient_descent(X_train, y_train, 0.0072, 1600, X_dev, y_dev)

Iteration 0: Accuracy = 0.0935
Unknown Data: Accuracy = 0.1304
Iteration 10: Accuracy = 0.2396
Unknown Data: Accuracy = 0.2393
Iteration 20: Accuracy = 0.3833
Unknown Data: Accuracy = 0.3598
Iteration 30: Accuracy = 0.4661
Unknown Data: Accuracy = 0.4415
Iteration 40: Accuracy = 0.5105
Unknown Data: Accuracy = 0.4888
Iteration 50: Accuracy = 0.5632
Unknown Data: Accuracy = 0.5308
Iteration 60: Accuracy = 0.5765
Unknown Data: Accuracy = 0.5403
Iteration 70: Accuracy = 0.6056
Unknown Data: Accuracy = 0.5771
Iteration 80: Accuracy = 0.6346
Unknown Data: Accuracy = 0.5863
Iteration 90: Accuracy = 0.6339
Unknown Data: Accuracy = 0.5933
Iteration 100: Accuracy = 0.6605
Unknown Data: Accuracy = 0.6000
Iteration 110: Accuracy = 0.6721
Unknown Data: Accuracy = 0.6085
Iteration 120: Accuracy = 0.6725
Unknown Data: Accuracy = 0.6226
Iteration 130: Accuracy = 0.6952
Unknown Data: Accuracy = 0.6357
Iteration 140: Accuracy = 0.6825
Unknown Data: Accuracy = 0.6318
Iteration 150: Accuracy = 0.7131
Unk

In [21]:
np.savez("1600layer2_92-77.npz", weight1=w1, bias1=b1, weight2=w2, bias2=b2, weigh3=w3, bias3=b3)

## Now for Prediction

In [None]:
def predictTest(X, y, addr):
    param = np.load(addr)
    weight1 = param['weight1']
    bias1 = param['bias1']
    weight2 = param['weight2']
    bias2 = param['bias2']
    weight3 = param['weigh3']
    bias3 = param['bias3']
    ActivatedA3 = 0

    _, _, _, _, _, ActivatedA3 = forward_prop(weight1=weight1, bias1=bias1, weight2=weight2, bias2=bias2, weight3=weight3, bias3=bias3, x=X)
    
    predictions = get_predictions(ActivatedA3)
    acc = get_accuracy(predictions, y)

    return acc

akurasi = predictTest(X_test, y_test, "1600layer2_92-77.npz")
print("Testing Accuracy =", akurasi)

def predict(addr):
    param = np.load(addr)
    weight1 = param['weight1']
    bias1 = param['bias1']
    weight2 = param['weight2']
    bias2 = param['bias2']
    weight3 = param['weigh3']
    bias3 = param['bias3']
    ActivatedA3 = 0

    _, _, _, _, _, ActivatedA3 = forward_prop(weight1=weight1, bias1=bias1, weight2=weight2, bias2=bias2, weight3=weight3, bias3=bias3, x=X)

Testing Accuracy = 0.772767990444909
