In [1]:
import pandas as pd
import numpy as np
import keras
import csv
from keras.models import Sequential, Model
from keras.layers.merge import concatenate
from keras.layers import Dense, Input
from keras.layers import Dropout
from keras.layers import Conv1D, MaxPooling1D, Flatten
from sklearn.feature_extraction.text import CountVectorizer

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# This function should transform data into a usable format 
def audioProcess(path):
    data = pd.read_csv(path)
    #get audio data from vect1 to vect148
    audioData = data.loc[:, "vect_1":"vect_148"]
    
    # normalization
    audioData=(audioData-audioData.mean())/audioData.std()
    
    return audioData.values.tolist()

In [3]:
# This function should transform data into a usable format 
def textProcess(path):
    data = pd.read_csv(path)
    #get audio data from vect1 to vect148
    textData = data.loc[:, "tags"]
    
    return textData.values.tolist()

In [4]:
# This function should transform data into a usable format 
def labelProcess(path):
    data = pd.read_csv(path)
    #get classes
    labels = data.loc[:,"genre"].values.tolist()
    
    
    return labels

In [5]:
def toOneHot(classes, labels):
    one_hot_labels = []

    for label in labels:
        one_hot_vector = [0] * len(classes)
        one_hot_vector[classes.index(label)] = 1

        one_hot_labels.append(one_hot_vector)
    return one_hot_labels

In [6]:
def train(audioData, textData, trainLabel, model):
    
    batch_size = 32  #the number of data to feed into model per batch
    num_epoch = 50  # go through your training data epoch times
    
    #callbacks stop training if val_loss is not improving.
    callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, verbose = 1)]
    
    history = model.fit([audioData, textData], trainLabel, validation_split=0.2, epochs = num_epoch, batch_size=batch_size, verbose=1, callbacks = callbacks)
                 
    return history, model

In [7]:
def models(textData, trainLabel):
    num_classes = len(trainLabel[0])  # total classes to train
    
    model1_in = Input(shape=(148, 1))
    model1 = (Conv1D(128, 10,padding="same", activation="relu", kernel_initializer="random_uniform"))(model1_in)
    model1 = Dropout(0.5)(model1)
    model1 = MaxPooling1D(pool_size=(2))(model1)
    model1 = Conv1D(64, 10,padding="same", activation="relu", kernel_initializer="random_uniform")(model1)
    model1 = Dropout(0.5)(model1)
    model1_out = Flatten()(model1)
    
    model2_in = Input(shape=textData.shape[1:])
    
    concatenated = concatenate([model1_out, model2_in])
    out = Dense(num_classes, activation="softmax", kernel_initializer="random_uniform")(concatenated)
    
    merged_model = Model([model1_in, model2_in], out)
    
    #Define loss, optimizer, and metrics
    merged_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return merged_model

In [8]:
def predict(testData, model):
    prediction = model.predict(testData)
    return prediction

In [9]:
def evaluate(test_label, prediction):
    accuracy = metrics.accuracy_score(test_label, prediction)
    return accuracy

DATA Preprocess

In [10]:
trainText = np.array(textProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\train_features.csv"))
validText= np.array(textProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\valid_features.csv"))
testText = np.array(textProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\test_features.csv"))

In [11]:
#Create BOW
trainTxt = []
validTxt = []
testTxt = []
for i in range(len(trainText)):
    trainTxt.append(trainText[i].replace(",", ""))
    
for i in range(len(validText)):
    validTxt.append(validText[i].replace(",", ""))   
    
for i in range(len(testText)):
    testTxt.append(testText[i].replace(",", ""))   
    
vectorizer = CountVectorizer() 
trainVectors = vectorizer.fit_transform(trainTxt)
validVectors = vectorizer.transform(validTxt)
testVectors = vectorizer.transform(testTxt)

In [13]:
trainAudio = np.expand_dims(np.array(audioProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\train_features.csv")), axis=2)
validAudio = np.expand_dims(np.array(audioProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\valid_features.csv")), axis=2)
testAudio = np.expand_dims(np.array(audioProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\test_features.csv")), axis=2)

In [14]:
trainLabel = labelProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\train_labels.csv")
validLabel = labelProcess(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\valid_labels.csv")
#get classes
classes = list(set(trainLabel))

oneHotTrain = np.array(toOneHot(classes, trainLabel))
oneHotvalid = np.array(toOneHot(classes, validLabel))

Training

In [15]:
model = models(trainVectors, oneHotTrain)
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 148, 1)       0                                            
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 148, 128)     1408        input_1[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 148, 128)     0           conv1d_1[0][0]                   
__________________________________________________________________________________________________
max_pooling1d_1 (MaxP

In [16]:
history = train(trainAudio, trainVectors, oneHotTrain, model)
print("Finish training")

Instructions for updating:
Use tf.cast instead.
Train on 6142 samples, validate on 1536 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 00013: early stopping
Finish training


In [17]:
loss, accuracy = model.evaluate([validAudio, validVectors], oneHotvalid)
print("loss, accuracy:", loss, accuracy)

loss, accuracy: 1.004422158135308 0.7


Predict

In [None]:
trackID = pd.read_csv(r"D:\Unimelb\Introduction to Machine Learning\assignment2\dataset\dataset\test_features.csv").loc[:, "trackID"].values.tolist()
prediction = predict([testAudio, testVectors], model)

In [None]:
prediction = prediction.tolist()
results_name = []

for result in prediction:
    results_name.append(classes[result.index(max(result))])
    
result = list(zip(trackID,results_name))

In [None]:
with open("final_outPut.csv", 'w', newline='') as csvFile:
    writer = csv.writer(csvFile)
    writer.writerow(["trackID", "genre"])
    writer.writerows(result)