# <span style="color:#0b486b">  FIT5215: Deep Learning (2021)</span>
***
*CE/Lecturer:*  **Dr Trung Le** | trunglm@monash.edu <br/>
*Head TA:*  **Dr Van Nguyen** | van.nguyen1@monash.edu <br/>
*Tutor:* **Mr Anh Bui** \[tuananh.bui@monash.edu\] | **Mr Tuan Nguyen**  \[tuan.ng@monash.edu \] | **Dr Binh Nguyen** \[binh.nguyen1@monash.edu\] | **Dr Mahmoud Mohammad** \[mahmoud.hossam@monash.edu\]
<br/> <br/>
Faculty of Information Technology, Monash University, Australia
***


## Sentence Classification with Convolutional Neural Network</span>

In [None]:
%matplotlib inline
from __future__ import print_function
import collections
import math
import numpy as np
import os
import random
import tensorflow as tf
import zipfile
from matplotlib import pylab
from six.moves import range
from six.moves.urllib.request import urlretrieve
import tensorflow as tf 
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split

In [None]:
class DataManager:
    def __init__(self, verbose=True):
        self.verbose= verbose
        self.max_sentence_len= 0
        self.questions= list()
        self.str_labels= list()
        self.numeral_labels= list()
        self.numeral_data= list()
        self.cur_pos=0
        
    def maybe_download(self, dir_name, file_name, url):
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)
        if not os.path.exists(os.path.join(dir_name, file_name)):
            urlretrieve(url + file_name, os.path.join(dir_name, file_name))
        if self.verbose:
            print("Downloaded successfully {}".format(file_name))
    
    def read_data(self, dir_name, file_name):
        file_path= os.path.join(dir_name, file_name)
        self.questions= list(); self.labels= list()
        with open(file_path, "r", encoding="latin-1") as f:
            for row in f:
                row_str= row.split(":")
                label, question= row_str[0], row_str[1]
                question= question.lower()
                self.labels.append(label)
                self.questions.append(question.split())
                if self.max_sentence_len < len(self.questions[-1]):
                    self.max_sentence_len= len(self.questions[-1])
        le= preprocessing.LabelEncoder()
        le.fit(self.labels)
        self.numeral_labels = le.transform(self.labels)
        self.str_classes= le.classes_
        self.num_classes= len(self.str_classes)
        if self.verbose:
            print("Sample questions \n")
            print(self.questions[0:5])
            print("Labels {}\n\n".format(self.str_classes))
    
    def padding(self, length):
        for question in self.questions:
            question= question.extend(["pad"]*(length- len(question)))
    
    def build_numeral_data(self, dictionary):
        self.numeral_data= list()
        for question in self.questions:
            data= list()
            for word in question:
                data.append(dictionary[word])
            self.numeral_data.append(data)
        if self.verbose:
            print('Sample numeral data \n')   
            print(self.numeral_data[0:5])
    
    def train_valid_split(self, train_size=0.9, rand_seed=33):
        X_train, X_valid, y_train, y_valid = train_test_split(np.array(self.numeral_data), np.array(self.numeral_labels), 
                                                            test_size = 1-train_size, random_state= rand_seed)
        self.train_numeral= X_train
        self.train_labels= y_train
        self.valid_numeral= X_valid
        self.valid_labels= y_valid
        
    @staticmethod
    def build_dictionary_count(questions):
        count= []
        dictionary= dict()
        words= []
        for question in questions:
            words.extend(question)
        count.extend(collections.Counter(words).most_common())
        for word,freq in count:
            dictionary[word]= len(dictionary)
        reverse_dictionary= dict(zip(dictionary.values(), dictionary.keys()))
        return dictionary, reverse_dictionary, count
    
    def next_batch(self, batch_size, vocab_size, input_len):
        data_batch= np.zeros([batch_size, input_len, vocab_size])
        label_batch= np.zeros([batch_size, self.num_classes])
        train_size= len(self.train_numeral)
        for i in range(batch_size):
            for j in range(input_len):
                data_batch[i,j, self.train_numeral[self.cur_pos][j]]=1
            label_batch[i, self.train_labels[self.cur_pos]]=1
            self.cur_pos= (self.cur_pos+1)%train_size
        return data_batch, label_batch
    
    def convert_to_feed(self, data_numeral, label_numeral, input_len, vocab_size):
        data2feed= np.zeros([data_numeral.shape[0], input_len, vocab_size])
        label2feed= np.zeros([data_numeral.shape[0], self.num_classes])
        for i in range(data_numeral.shape[0]):
            for j in range(input_len):
                data2feed[i,j, data_numeral[i][j]]=1
            label2feed[i, label_numeral[i]]=1
        return data2feed, label2feed
        

In [None]:
train_dm = DataManager()
train_dm.maybe_download("Data/question-classif-data", "train_1000.label", "http://cogcomp.org/Data/QA/QC/")
test_dm = DataManager()
test_dm.maybe_download("Data/question-classif-data", "TREC_10.label", "http://cogcomp.org/Data/QA/QC/")

In [None]:
train_dm.read_data("Data/question-classif-data", "train_1000.label")
test_dm.read_data("Data/question-classif-data", "TREC_10.label")
pad_len = max(train_dm.max_sentence_len, test_dm.max_sentence_len)
train_dm.padding(pad_len)
test_dm.padding(pad_len)
all_questions= list(train_dm.questions) 
all_questions.extend(test_dm.questions)
dictionary,_,_= DataManager.build_dictionary_count(all_questions)
train_dm.build_numeral_data(dictionary)
test_dm.build_numeral_data(dictionary)
train_dm.train_valid_split()
data_batch, label_batch= train_dm.next_batch(batch_size=5, vocab_size= len(dictionary), input_len= pad_len)
print("Sample data batch- label batch \n")
print(data_batch)
print(label_batch)

In [None]:
class Layers:
    @staticmethod
    def dense(inputs, output_size, name="dense1", act=None):
        with tf.name_scope(name):
            input_size= int(inputs.get_shape()[1])
            W_init = tf.random_normal([input_size, output_size], mean=0, stddev= 0.1, dtype= tf.float32)
            b_init= tf.random_normal([output_size], mean=0, stddev= 0.1, dtype= tf.float32)
            W= tf.Variable(W_init, name= "W")
            b= tf.Variable(b_init, name="b")
            Wxb= tf.matmul(inputs, W) + b
            if act is None:
                return Wxb
            else:
                return act(Wxb)

    @staticmethod
    def conv2D(inputs, filter_shape, strides=[1,1,1,1], padding="SAME", name= "conv1", act=None):
        with tf.name_scope(name):
            W_init= tf.random_normal(filter_shape, mean=0, stddev=0.1, dtype= tf.float32)
            W= tf.Variable(W_init, name="W")
            b_init= tf.random_normal([int(filter_shape[3])], mean=0, stddev=0.1, dtype= tf.float32)
            b= tf.Variable(b_init, name="b")
            Wxb= tf.nn.conv2d(input= inputs, filter= W, strides= strides, padding= padding)+b
            if act is None:
                return Wxb
            else:
                return act(Wxb)
            
    @staticmethod
    def conv1D(inputs, filter_shape, stride=1, padding="SAME", name="conv1", act=None):
        with tf.name_scope(name):
            W_init= tf.random_normal(filter_shape, mean=0, stddev=0.1, dtype= tf.float32)
            W= tf.Variable(W_init, name="W")
            b_init= tf.random_normal([filter_shape[2]], mean=0, stddev=0.1)
            b= tf.Variable(b_init, name="b")
            Wxb= tf.nn.conv1d(value=inputs, filters=W, stride= stride, padding= padding) +b
            if act is None:
                return Wxb
            else:
                return act(Wxb)
    
    @staticmethod
    def max_pool(inputs, ksize=[1,2,2,1],strides=[1,2,2,1], padding="SAME"):
        return tf.nn.max_pool(value= inputs, ksize=ksize, strides= strides, padding= padding)
    
    @staticmethod
    def dropout(inputs, keep_prob):
        return tf.nn.dropout(inputs, keep_prob= keep_prob)
    
    @staticmethod
    def batch_norm(inputs, phase_train):
        return tf.contrib.layers.batch_norm(inputs, decay= 0.99, 
                                            is_training=phase_train, center= True, scale=True, reuse= False)


In [None]:
class SC_CNN:
    def __init__(self, height, width, batch_size=32, epochs=100, num_classes=5, save_history= True, 
                 verbose= True, optimizer= tf.train.AdamOptimizer(learning_rate=0.001), learning_rate=0.001):
        tf.reset_default_graph()
        self.height= height 
        self.width= width
        self.batch_size= batch_size
        self.epochs= epochs
        self.num_classes= num_classes
        self.optimizer= optimizer
        self.optimizer.learning_rate= learning_rate
        self.verbose= verbose
        self.save_history= save_history
        if self.save_history:
            self.H= {"train_loss_batch": [], "train_acc_batch": [], "train_loss_epoch": [], 
                     "train_acc_epoch": [], "valid_loss_epoch": [], "valid_acc_epoch": []}
        self.session= tf.Session()
    
    def build(self):
        self.X= tf.placeholder(shape= [None, self.height, self.width], dtype=tf.float32)
        self.y= tf.placeholder(shape= [None, self.num_classes], dtype= tf.float32)
        conv1= Layers.conv1D(inputs= self.X, filter_shape= [3, self.width, 1], name="conv1")
        conv2= Layers.conv1D(inputs= self.X, filter_shape= [5, self.width, 1], name="conv2")
        conv3= Layers.conv1D(inputs= self.X, filter_shape= [7, self.width, 1], name="conv3")
        h1=tf.reduce_max(conv1, axis=1)
        h2=tf.reduce_max(conv2, axis=1)
        h3= tf.reduce_max(conv3, axis=1)
        h= tf.concat([h1,h2,h3], axis=1)
        logits= Layers.dense(inputs= h, output_size= self.num_classes)
        with tf.name_scope("train"):
            cross_entropy= tf.nn.softmax_cross_entropy_with_logits(labels=self.y, logits= logits)
            self.loss= tf.reduce_mean(cross_entropy)
            self.train= self.optimizer.minimize(self.loss)
        with tf.name_scope("predict"):
            self.y_pred= tf.argmax(logits, axis=1)
            y1= tf.argmax(self.y, axis=1)
            corrections= tf.cast(tf.equal(self.y_pred, y1), dtype=tf.float32)
            self.accuracy= tf.reduce_mean(corrections)
        self.session.run(tf.global_variables_initializer())
    
    def partial_fit(self, data_batch, label_batch):
        self.session.run([self.train], feed_dict={self.X:data_batch, self.y:label_batch})
        if self.save_history:
            self.compute_loss_acc(data_batch, label_batch, "train", "Iteration", 1)
    
    def predict(self, X,y):
        y_pred, acc= self.session.run([self.y_pred, self.accuracy], feed_dict={self.X:X, self.y:y})
        return y_pred, acc
    
    def compute_loss_acc(self, X, y, applied_set="train", applied_scope="Epoch", index= 1):
        loss, acc= self.session.run([self.loss, self.accuracy], feed_dict={self.X:X, self.y:y})
        if self.verbose and applied_scope=="Epoch":
            print("{} {} {} loss= {}, acc={}".format(applied_scope, index, applied_set, loss, acc))
        if self.save_history:
            if applied_scope=="Iteration":
                self.H["train_loss_batch"].append(loss)
                self.H["train_acc_batch"].append(acc)
            else:
                if applied_set=="train":
                    self.H["train_loss_epoch"].append(loss)
                    self.H["train_acc_epoch"].append(acc)
                else:
                    self.H["valid_loss_epoch"].append(loss)
                    self.H["valid_acc_epoch"].append(acc)    
            
        

In [None]:
batch_size= 32
epochs= 100
train_size= len(train_dm.train_numeral)
iter_per_epoch= math.ceil(train_size/batch_size)
network= SC_CNN(height= pad_len, width= len(dictionary),batch_size=batch_size, epochs= epochs, num_classes= train_dm.num_classes)
network.build()

train2feed, train_label2feed= train_dm.convert_to_feed(train_dm.train_numeral, train_dm.train_labels, 
                                     input_len= pad_len, vocab_size=len(dictionary))

valid2feed, valid_label2feed=  train_dm.convert_to_feed(train_dm.valid_numeral, train_dm.valid_labels, 
                                     input_len= pad_len, vocab_size=len(dictionary))

test2feed, test_label2feed= test_dm.convert_to_feed(np.array(test_dm.numeral_data), np.array(test_dm.numeral_labels), 
                                     input_len= pad_len, vocab_size=len(dictionary))

for epoch in range(epochs):
    for i in range(iter_per_epoch):
        data_batch, label_batch= train_dm.next_batch(batch_size= batch_size, 
                                                      vocab_size=len(dictionary), input_len= pad_len)
        #print(data_batch.shape, label_batch.shape)
        network.partial_fit(data_batch, label_batch)
    network.compute_loss_acc(train2feed, train_label2feed, "train", "Epoch", epoch +1)
    network.compute_loss_acc(valid2feed, valid_label2feed, "valid", "Epoch", epoch +1)
print("Finish training and computing testing performance\n")
y_pred, test_acc= network.predict(test2feed, test_label2feed)
print("Testing accuracy: {}".format(test_acc))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.rcParams["figure.figsize"] = (20,3)

def plot_history(history):
    plt.subplot(2,1,1)
    plt.plot(history["train_loss_epoch"], "r^-", label="train loss epoch")
    plt.plot(history["valid_loss_epoch"], "b*-", label= "valid loss epoch")
    plt.legend()
    plt.subplot(2,1,2)
    plt.plot(history["train_acc_epoch"], "r^-", label="train acc epoch")
    plt.plot(history["valid_acc_epoch"], "b*-", label= "valid acc epoch")
    plt.legend()

    plt.show()

plot_history(network.H)