# Lanjutan Speech Command Recognition -after preprocessing

### Import library

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import json
from sklearn.model_selection import train_test_split
import os

# JSON_PATH = "preprocessed_data.json"
JSON_PATH = "data_balanced_augmented.json"

### Load JSON data into np array

In [3]:
with open(JSON_PATH, "r") as f:
    data = json.load(f)

mfcc_nested = data['mfcc']
label_idx = data['labels']
labels = data['mapping']
# print(np.array(data['mfcc'][0][0]).shape)

preX = []
X = []
y = []

for sample, label in zip(mfcc_nested, label_idx):
    mfcc_array = np.array(sample)               
    X.append(mfcc_array.flatten())                 
    y.append(label)

X = np.array(X, dtype=np.float32)   
y = np.array(y, dtype=np.int32)     

print(X.shape)
print(y.shape)

(41862, 1313)
(41862,)


### Split into test val and test

In [4]:
X_train, X_dev, y_train, y_dev = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

print(X_train.shape, X_test.shape, X_dev.shape)
print(y_train.shape, y_test.shape, y_dev.shape)

(26791, 1313) (6698, 1313) (8373, 1313)
(26791,) (6698,) (8373,)


In [5]:
def write_to_json(data, filename):
    os.makedirs('splitted_data', exist_ok=True)
    with open(os.path.join('splitted_data', filename), 'w') as f:
        json.dump(data, f)

def serialize_dataset(X_train, X_test, X_dev, y_train, y_test, y_dev):
    write_to_json(X_train.tolist(), 'X_train.json')
    write_to_json(X_test.tolist(), 'X_test.json')
    write_to_json(X_dev.tolist(), 'X_dev.json')
    write_to_json(y_train.tolist(), 'y_train.json')
    write_to_json(y_test.tolist(), 'y_test.json')
    write_to_json(y_dev.tolist(), 'y_dev.json')

serialize_dataset(X_train, X_test, X_dev, y_train, y_test, y_dev)

In [3]:
def read_from_json(filename):
    with open(os.path.join('splitted_data', filename), 'r') as f:
        return np.array(json.load(f))

def load_serialized_dataset():
    X_train = read_from_json('X_train.json')
    X_test = read_from_json('X_test.json')
    X_dev = read_from_json('X_dev.json')
    y_train = read_from_json('y_train.json')
    y_test = read_from_json('y_test.json')
    y_dev = read_from_json('y_dev.json')
    return X_train, X_test, X_dev, y_train, y_test, y_dev

X_train, X_test, X_dev, y_train, y_test, y_dev = load_serialized_dataset()

print(X_train.shape, X_test.shape, X_dev.shape)
print(y_train.shape, y_test.shape, y_dev.shape)

(26791, 1313) (6698, 1313) (8373, 1313)
(26791,) (6698,) (8373,)


## Initialize Model

In [6]:
def init_params():
    input_dim = 1313
    hidden_dim = 512  # can be tuned
    output_dim = 14 # len.labels

    weight1 = np.random.randn(hidden_dim, input_dim) * np.sqrt(2. / input_dim)
    bias1 = np.zeros(hidden_dim)      

    # w1 = np.load("aug_dataset_1layer_0.9135.npz")
    # weight1 = w1['weight1']
    # bias1 = w1['bias1']
    
    weight2 = np.random.randn(output_dim, hidden_dim) * np.sqrt(2. / hidden_dim)
    bias2 = np.zeros(output_dim) 

    return weight1, bias1, weight2, bias2

def ReLu(outZ1):
    return np.maximum(0, outZ1)

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return expZ / np.sum(expZ, axis=1, keepdims=True)

def forward_prop(weight1, bias1, weight2, bias2, x):
    outZ1 = x.dot(weight1.T) + bias1
    activatedA1 = ReLu(outZ1)

    outZ2 = activatedA1.dot(weight2.T) + bias2
    activatedA2 = softmax(outZ2)

    return outZ1, activatedA1, outZ2, activatedA2


def one_hot(y):
    one_hot_y = np.zeros((y.size, np.max(y) + 1))  # +1 ensures index max(y) is included
    one_hot_y[np.arange(y.size), y] = 1
    return one_hot_y

def derive_ReLu(Z):
    return Z > 0

def backward_prop(outZ1, activatedA1, outZ2, activatedA2, weight2, x, y):
    m = y.size
    one_hot_y = one_hot(y)
    difOutZ2 = activatedA2 - one_hot_y
    difWeight2 = 1 / m * difOutZ2.T.dot(activatedA1)
    difBias2 = 1 / m * np.sum(difOutZ2, axis=0)

    difOutZ1 = difOutZ2.dot(weight2) * derive_ReLu(outZ1)
    difWeight1 = 1 / m * difOutZ1.T.dot(x)
    difBias1 = 1 / m * np.sum(difOutZ1, axis=0)

    return difWeight1, difBias1, difWeight2, difBias2

def update_params(weight1, bias1, weight2, bias2, difWeight1, difBias1, difWeight2, difBias2, alpha):
    weight1 = weight1 - (alpha * difWeight1)
    bias1 = bias1 - (alpha * difBias1)
    weight2 = weight2 - (alpha * difWeight2)
    bias2 = bias2 - (alpha * difBias2)

    return weight1, bias1, weight2, bias2

In [4]:
def get_predictions(activatedA2):
    return np.argmax(activatedA2, axis=1)  

def get_accuracy(predictions, y):
    # print(predictions, y)
    return np.sum(predictions == y) / y.size

def gradient_descent(X, y, alpha, iterations, devX, devy):
    weight1, bias1, weight2, bias2 = init_params()
    best_accuracy = 0.0
    best_params = (weight1.copy(), bias1.copy(), weight2.copy(), bias2.copy())

    for i in range(iterations):
        outZ1, activatedA1, outZ2, activatedA2 = forward_prop(weight1=weight1, bias1=bias1, weight2=weight2, bias2=bias2, x=X)
        difWeight1, difBias1, difWeight2, difBias2 = backward_prop(outZ1=outZ1, activatedA1=activatedA1, outZ2=outZ2, activatedA2=activatedA2, weight2=weight2, x=X, y=y)
        
        weight1, bias1, weight2, bias2 = update_params(weight1=weight1, bias1=bias1, weight2=weight2, bias2= bias2, difWeight1=difWeight1, difBias1=difBias1, difWeight2=difWeight2, difBias2=difBias2, alpha=alpha)

        predictions = get_predictions(activatedA2)
        acc = get_accuracy(predictions, y)

        if acc > best_accuracy:
            best_accuracy = acc
            best_params = (weight1.copy(), bias1.copy(), weight2.copy(), bias2.copy())

        if i % 10 == 0:
            print(f"Iteration {i}: Accuracy = {acc:.4f}")
            _, _, _, ActivatedA2dev, = forward_prop(weight1=weight1, bias1=bias1, weight2=weight2, bias2=bias2, x=devX)
    
            predictionsDev = get_predictions(ActivatedA2dev)
            accDev= get_accuracy(predictionsDev, devy)
            print(f"Unknown Data: Accuracy = {accDev:.4f}")

    
    print(f"Iteration {iterations}: Accuracy = {acc:.4f}")
    print(f"Best Iteration : Accuracy = {best_accuracy:.4f}")

    return best_params

In [7]:
w1, b1, w2, b2 = gradient_descent(X_train, y_train, 0.0054, 1600, X_dev, y_dev)

Iteration 0: Accuracy = 0.0588
Unknown Data: Accuracy = 0.0917
Iteration 10: Accuracy = 0.1395
Unknown Data: Accuracy = 0.1502
Iteration 20: Accuracy = 0.1814
Unknown Data: Accuracy = 0.1689
Iteration 30: Accuracy = 0.2038
Unknown Data: Accuracy = 0.1969
Iteration 40: Accuracy = 0.2280
Unknown Data: Accuracy = 0.2194
Iteration 50: Accuracy = 0.2573
Unknown Data: Accuracy = 0.2398
Iteration 60: Accuracy = 0.2689
Unknown Data: Accuracy = 0.2512
Iteration 70: Accuracy = 0.3047
Unknown Data: Accuracy = 0.2871
Iteration 80: Accuracy = 0.3309
Unknown Data: Accuracy = 0.3083
Iteration 90: Accuracy = 0.3584
Unknown Data: Accuracy = 0.3373
Iteration 100: Accuracy = 0.3783
Unknown Data: Accuracy = 0.3528
Iteration 110: Accuracy = 0.3994
Unknown Data: Accuracy = 0.3678
Iteration 120: Accuracy = 0.4210
Unknown Data: Accuracy = 0.3831
Iteration 130: Accuracy = 0.4387
Unknown Data: Accuracy = 0.4071
Iteration 140: Accuracy = 0.4609
Unknown Data: Accuracy = 0.4198
Iteration 150: Accuracy = 0.4738
Unk

In [8]:
np.savez("1600layer1.npz", weight1=w1, bias1=b1, weight2=w2, bias2=b2)