## GENRE CLASSIFIER

In [5]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
# load data

dataset_path = "data_10.json"

def load_data(dataset_path):
    with open(dataset_path, "r") as fp: # r for reading
        data = json.load(fp)
    
    # convert lists into numpy array because labels and mfccs are lists
    inputs = np.array(data["mfcc"])
    targets = np.array(data["labels"])
    
    return inputs, targets

if __name__ == "__main__":
    
    # load data
    
    # inputs is a 3D array (segment #, mfcc 1 = interval, mfcc 2 = value)
    inputs, targets = load_data(dataset_path)
    
    # split data into training and test set with sklearn
    
    inputs_train, inputs_test, targets_train, targets_test = train_test_split(inputs, 
                                                                              targets, 
                                                                              test_size = 0.3) # test set = 30%
    #return x_train, x_test, y_train, y_test

    # build the network architecture with keras
    model = keras.Sequential([
         
        # input layer
        keras.layers.Flatten(input_shape = (inputs.shape[1], inputs.shape[2])), # turns 2D array into a 1D array
        
        # 1st. hidden layer
        keras.layers.Dense(512, activation = "relu"), # ReLU activation function
         
         # 2nd. hidden layer
        keras.layers.Dense(256, activation = "relu"), # ReLU activation function
         
         # 3rd. hidden layer
        keras.layers.Dense(64, activation = "relu"), # ReLU activation function
         
        # output layers
        keras.layers.Dense(10, activation = "softmax") # 10 categories, softmax normalizes result to 0 - 1
    ])
    
    # compile
    optimizer = keras.optimizers.Adam(learning_rate = 0.0001) # Adam = variation of SGD
    model.compile(optimizer = optimizer, 
                  loss = "sparse_categorical_crossentropy", # loss type depends on the type of task
                  metrics = ["accuracy"])
    
    model.summary()
    
    # train network
    model.fit(inputs_train, targets_train, 
              validation_data = (inputs_test, targets_test),
             epochs = 50,
             batch_size = 32) # batch_size defines the step at which the stochastic GD is performed

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 1690)              0         
                                                                 
 dense_4 (Dense)             (None, 512)               865792    
                                                                 
 dense_5 (Dense)             (None, 256)               131328    
                                                                 
 dense_6 (Dense)             (None, 64)                16448     
                                                                 
 dense_7 (Dense)             (None, 10)                650       
                                                                 
Total params: 1,014,218
Trainable params: 1,014,218
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50


