In [1]:
import pandas as pd
import numpy as np
import keras
import csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
import matplotlib.pyplot as plt

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# This function should transform data into a usable format 
def featureProcess(path):
    data = pd.read_csv(path)
    #get features data from loudness to vect148
    features = data.loc[:, "loudness":"vect_148"]

    return features.values.tolist()

In [3]:
# This function should transform data into a usable format 
def featureNormalProcess(path):
    data = pd.read_csv(path)
    #get audio data from vect1 to vect148
    #featureData = data.loc[:, "vect_1":"vect_148"]
    
    #get feature data from loudness to vect148
    featureData = data.loc[:, "loudness":"vect_148"]
    
    # normalization
    featureData=(featureData-featureData.mean())/featureData.std()
    
    return featureData.values.tolist()

In [4]:
# This function should transform data into a usable format 
def labelProcess(path):
    data = pd.read_csv(path)
    #get classes
    labels = data.loc[:,"genre"].values.tolist()
    
    return labels

In [5]:
def toOneHot(classes, labels):
    one_hot_labels = []

    for label in labels:
        one_hot_vector = [0] * len(classes)
        one_hot_vector[classes.index(label)] = 1

        one_hot_labels.append(one_hot_vector)
    return one_hot_labels

In [6]:
def MLP(trainData, trainLabel):
    
    num_classes = len(trainLabel[0])  # total classes to train
    
    model = keras.models.Sequential()
    model.add(Dense(trainData.shape[1], input_shape = trainData.shape[1:], activation="relu", kernel_initializer="random_uniform"))
    model.add(Dropout(0.5))
    model.add(Dense(1000, activation="relu", kernel_initializer="random_uniform"))
    model.add(Dropout(0.5))
    model.add(Dense(500, activation="relu", kernel_initializer="random_uniform"))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation="relu", kernel_initializer="random_uniform"))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation="softmax", kernel_initializer="random_uniform"))

    #Define loss, optimizer, and metrics
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [7]:
def train(trainData, trainLabel, model):
    
    batch_size = 32  #the number of data to feed into model per batch
    num_epoch = 50  # go through your training data epoch times
    
    #callbacks stop training if val_loss is not improving.
    callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose = 1)]
    
    history = model.fit(trainData, trainLabel, validation_split=0.2, epochs = num_epoch, batch_size=batch_size, verbose=1, callbacks = callbacks)
                 
    return history, model

In [8]:
def evaluate(validData, validLabel, model):
    loss, accuracy = model.evaluate(validData, validLabel)
    return loss, accuracy

In [9]:
def predict(testData, model):
    result = model.predict_classes(testData, verbose=1)
    return result

DATA Preprocess

In [10]:
trainData = np.array(featureNormalProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\train_features.csv"))
validData = np.array(featureNormalProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\valid_features.csv"))
testData = np.array(featureNormalProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\test_features.csv"))

In [11]:
trainLabel = labelProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\train_labels.csv")
validLabel = labelProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\valid_labels.csv")
#get classes
classes = list(set(trainLabel))

oneHotTrain = np.array(toOneHot(classes, trainLabel))
oneHotvalid = np.array(toOneHot(classes, validLabel))

Training

In [12]:
model = MLP(trainData, oneHotTrain)
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 148)               22052     
_________________________________________________________________
dropout_1 (Dropout)          (None, 148)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1000)              149000    
_________________________________________________________________
dropout_2 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 500)               500500    
_________________________________________________________________
dropout_3 (Dropout)  

In [13]:
history, _= train(trainData, oneHotTrain, model)
print("Finish training")

Instructions for updating:
Use tf.cast instead.
Train on 6142 samples, validate on 1536 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 00037: early stopping
Finish training


In [14]:
loss, accuracy = evaluate(validData, oneHotvalid, model)
print("loss, accuracy:", loss, accuracy)

loss, accuracy: 1.6877009942796495 0.46444444444444444


In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

Predict

In [15]:
trackID = pd.read_csv(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\test_features.csv").loc[:, "trackID"].values.tolist()
prediction = predict(testData, model)



In [16]:
results_name = []

for result in prediction:
    results_name.append(classes[result])

result = list(zip(trackID,results_name))

In [18]:
#file = "audio_MLP_outPut.csv"
file = "meta+audio_MLP_outPut.csv"
with open(file, 'w', newline='') as csvFile:
    writer = csv.writer(csvFile)
    writer.writerow(["trackID", "genre"])
    writer.writerows(result)