# Theano version of simple DNN

In [1]:
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np

from sklearn import cross_validation as cv
from sklearn import metrics
from sklearn import grid_search as gs
from sklearn.metrics import accuracy_score
# from sklearn.base import BaseEstimator

from time import time

Using gpu device 0: GeForce GTX 980


## Data preprocess

In [3]:
### DataProcessing 

'''
[ INPUT  ] : model { wav, NFCC, ... }
[ OUTPUT ] : (Training_data, Validation_data, Testing_Data)

- Training_data : ( x, y, id )
- Validation_data : ( x, y, id )
- Testing_Data : ( x, id )

-- x format : 
---- NFCC : [ 39-vector ]'

-- y format :
---- Now we use 39-phonemes for all : [ 0 0 0 ... 1 .... 0 0 0 ]  as a number of 39
'''

def Dataset ( model, dratio ) :
    trains = {}
    tests_data = {}
    phones_mapping = {} # {48} to realNumber
    result_mapping = {} # {48} to {39}
    
    training_inputs = []
    training_result = []
    if model == "mfcc":
        # TRAINING X(INPUT)
        with open('./MLDS_HW1_RELEASE_v1/mfcc/train.ark') as f:
            for lines in f :
                frames = lines.split(' ')
                
                # trains[frames[0]] = frames[1:]
                trains[frames[0]] = frame2float

        # MAPPING 48 to number ( we map 48 to 39 later )
        with open('./MLDS_HW1_RELEASE_v1/phones/48_39.map') as f:    
            i = 0
            for lines in f :
                phones = lines.split('\t')
                phones_mapping[phones[0]] = i
                i += 1
        
        with open('./MLDS_HW1_RELEASE_v1/phones/48_39.map') as f:    
            for lines in f :
                phones = lines.split('\t')
                result_mapping[ phones_mapping[phones[0]] ] = phones[1]
        
        # TRAINING Y(OUTPUT)
        with open('./MLDS_HW1_RELEASE_v1/label/train.lab') as f:
            for lines in f :
                labels = lines.split(',')
                # trains[labels[0]].append(labels[1])
                training_inputs.append( np.reshape(trains.get(labels[0]), (39, 1) ) )
                training_result.append( vectorized_result(phones_mapping[labels[1].rstrip('\n')] ) )
        
        # 10% for validation
        dataSize_weUse = len(training_inputs) * dratio
        trainingRationTEST = int( dataSize_weUse * 0.9)
        trainingRationVARI = int( dataSize_weUse * 0.1)
        trainingRationVARIend = trainingRationVARI + trainingRationTEST
        print "slide ratio : " , trainingRationTEST
        if dratio != 1:
            training_data = zip(training_inputs[0:trainingRationTEST], training_result[0:trainingRationTEST])
            validation_data = zip(training_inputs[trainingRationTEST+1:trainingRationVARIend], training_result[trainingRationTEST+1:trainingRationVARIend])
        else:
        
            X_train, X_test, y_train, y_test = cv.train_test_split(training_inputs, training_result, test_size=0.1)
            y_train = np.array(y_train)
            y_train = ((y_train.flatten())).reshape(len(y_train.flatten())/48,48).astype(np.float32)
            X_train = np.array(X_train)
            X_train = ((X_train.flatten())).reshape(len(X_train.flatten())/39,39).astype(np.float32)
            y_test = np.array(y_test)
            y_test = ((y_test.flatten())).reshape(len(y_test.flatten())/48,48).astype(np.float32)
            X_test = np.array(X_test)
            X_test = ((X_test.flatten())).reshape(len(X_test.flatten())/39,39).astype(np.float32)
            
            
#             training_data = zip(training_inputs[0:trainingRationTEST], training_result[0:trainingRationTEST])
#             validation_data = zip(training_inputs[trainingRationTEST+1:], training_result[trainingRationTEST+1:])
            
#         print "Size of Training Data", len(training_data)
#         print "Size of Validation Data", len(validation_data)
        
        # Testing data
        with open('./MLDS_HW1_RELEASE_v1/mfcc/test.ark') as f:
            # i = 0
            for lines in f :
                frames = lines.split(' ')
                tests_data[frames[0]] = np.reshape([ float(x) for x in frames[1:] ], (39, 1) )
                # if i < 20:
                #     i += 1
                # else:
                #    break
        
#         tests_data = np.array(tests_data.values())
#         tests_data = np.array(((tests_data.flatten()))).reshape(len(tests_data.flatten())/39,39).astype(np.float32)
                
    else:
        print "Not implement yet"
        
#     return ( training_data, validation_data, tests_data, result_mapping )
    return (X_train, X_test, y_train, y_test, tests_data, result_mapping)
                                       
def vectorized_result ( j ) :
    e = np.zeros((48, 1))
    e[j] = 1.0
    return e

## Model part


In [4]:
srng = RandomStreams()

# translate data to theano data type
def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

# initialize weight by random
def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

# rectify
def rectify(X):
    return T.maximum(X, 0.)


def sigmoid(X):
    return 1.0/(1.0+ T.exp(-X))

# softmax
def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

# method provided by Hinton
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates

# momentum method
def momentum(loss, all_params, learning_rate, momentum=0.9):
    all_grads = theano.grad(loss, all_params)
    updates = []

    for param_i, grad_i in zip(all_params, all_grads):
        mparam_i = theano.shared(np.zeros(param_i.get_value().shape,
                                          dtype=theano.config.floatX),
                                 broadcastable=param_i.broadcastable)
        v = momentum * mparam_i - learning_rate * grad_i
        updates.append((mparam_i, v))
        updates.append((param_i, param_i + v))

    return updates

def multinominal_cross_entropy(z, X):
    
    L = - T.sum(X * T.log(z) + (1 - X) * T.log(1 - z), axis=1)
    loss = T.sum(L) / X.shape[0]
    
    return loss

# dropout
def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X /= retain_prob
    return X

# def model(X, w_h, w_o, p_drop_input, p_drop_hidden):
#     X = dropout(X, p_drop_input)
#     h = rectify(T.dot(X, w_h))

#     h = dropout(h, p_drop_hidden)
#     h2 = rectify(T.dot(h, w_h2))

#     h2 = dropout(h2, p_drop_hidden)
#     py_x = softmax(T.dot(h2, w_o))
#     return h, h2, py_x

def model(X, w_h, w_o):
    h = sigmoid(T.dot(X,w_h))
    py_x = softmax(T.dot(h,w_o))
    
    return py_x

In [7]:
class DNN():    
    
    def __init__(self, input_shape, hidden_layer, batch, 
                 max_epochs, eval_size, output_num_units, learning_rate):
        self.input_shape = input_shape
        self.hidden_layer = hidden_layer
        self.batch = batch
        self.max_epochs = max_epochs
        self.eval_size = eval_size
        self.output_num_units = output_num_units
        self.learning_rate = learning_rate
        self.train_history_ = []
        
        w_h = init_weights((self.input_shape[1], self.hidden_layer))
        w_o = init_weights((self.hidden_layer, self.output_num_units))

        X = T.dmatrix()
        Y = T.dmatrix()

        # Construct Theano expression graph
        py_x = model(X, w_h, w_o)
        y_x = T.argmax(py_x, axis=1)

        # cost function need to modify
        # cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
        cost = multinominal_cross_entropy(py_x, Y) 
        params = [w_h, w_o]
        updates = momentum(cost, params, self.learning_rate)

        # Compile expressions to functions
        self.train = theano.function(inputs=[X, Y], outputs=[y_x, cost], 
                                updates=updates, allow_input_downcast=True, name = "train")
        self.predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True, name = "predict")

        
    def fit(self, x, y):
        X_train, X_test, y_train, y_test = cv.train_test_split(x, y, test_size= self.eval_size)
        yy = np.array(map((lambda x: np.argmax(x)), y_test))
        
        if any([x.op.__class__.__name__ in ['Gemv', 'CGemv', 'Gemm', 'CGemm'] for x in
        self.train.maker.fgraph.toposort()]):
            print 'Used the cpu'
        elif any([x.op.__class__.__name__ in ['GpuGemm', 'GpuGemv'] for x in
                  self.train.maker.fgraph.toposort()]):
            print 'Used the gpu'
        else:
            print 'ERROR, not able to tell if theano used the cpu or the gpu'
            print self.train.maker.fgraph.toposort()
            
        print " "
        print "start training!!!!"
        print " "
            
        epochs = 0
        for i in range(self.max_epochs):
            epochs +=1
            t0 = time()
            
            # To do: modify a function of mini batch and using random
            for start, end in zip(range(0, len(X_train), self.batch), range(self.batch, len(X_train), self.batch)):
                pred, err = self.train(X_train[start:end], y_train[start:end])
            
            score = accuracy_score(yy, self.predict(X_test))
            self.train_history_.append({"epoch":epochs, "err": err, "score":score})
            
            print 'epoch {0} : err = {1}, score = {2}, time ={3} s'.format(epochs, err, score, time() - t0)

    def prediction(self, x):
        return self.predict(x)

    def outputCSV(self, wfilename, test_data, mapd): # read dictionary for id 
        test_results = []
                
        for xid, xdata in test_data.iteritems():
            test_results.append( (xid, self.predict(xdata.T)) ) 
            
        f = open(wfilename, 'w+')
        f.write("Id,Prediction\n")
        for xid, y in test_results:
            f.write("{0},{1}".format(xid, mapd[y[0]]))
            # f.write("{0},{1},{2}".format(xid, mapd[y], y))
        f.close()
        print "MISSION COMPLETE"
    
    

# Start training!

 ## Data reading

In [8]:
### Data Processing 

data_ratio = 1 # the input we use ( to put more efford on improve parameter)

X_train, X_test, y_train, y_test, test_data, result_mapping = Dataset("mfcc", data_ratio)


slide ratio :  1012340


In [7]:
(np.array(y_train)).shape
# y_train = np.array(y_train)
# ((y_train.flatten())).reshape(len(y_train.flatten())/48,48).astype(np.float32)
# y_train[0].flatten()

(1012340, 48)

In [91]:
(np.array(X_train)).shape
# X_train = np.array(X_train)
# X_train[0].shape
# (X_train.flatten()).reshape(1012340,39)[0]

(1012340, 39)

In [19]:
(np.array(test_data)).shape
# len(np.array(test_data.values()[0]))
# np.array(test_data.values()).flatten()

# test_data = np.array(test_data.values())
# np.array(((test_data.flatten()))).reshape(len(test_data.flatten())/39,39).shape


()

## Train model!

In [9]:
net1 = DNN(input_shape=(128,39), hidden_layer=128, batch=128, 
                 max_epochs=25, eval_size=0.1, output_num_units=48, learning_rate=0.001)


In [10]:
%time net1.fit(X_train, y_train)

ERROR, not able to tell if theano used the cpu or the gpu
[HostFromGpu(<CudaNdarrayType(float32, matrix)>), GpuDimShuffle{1,0}(<CudaNdarrayType(float32, matrix)>), HostFromGpu(<CudaNdarrayType(float32, matrix)>), InplaceDimShuffle{1,0}(<TensorType(float64, matrix)>), Elemwise{sub,no_inplace}(TensorConstant{(1, 1) of 1.0}, <TensorType(float64, matrix)>), Shape_i{0}(<TensorType(float64, matrix)>), HostFromGpu(GpuDimShuffle{1,0}.0), dot(<TensorType(float64, matrix)>, HostFromGpu.0), InplaceDimShuffle{x}(Shape_i{0}.0), sigmoid(dot.0), Elemwise{inv,no_inplace}(InplaceDimShuffle{x}.0), dot(sigmoid.0, HostFromGpu.0), InplaceDimShuffle{1,0}(sigmoid.0), InplaceDimShuffle{0,x}(Elemwise{inv,no_inplace}.0), Reduce{maximum}{1}(dot.0), InplaceDimShuffle{0,x}(max), Elemwise{sub,no_inplace}(dot.0, InplaceDimShuffle{0,x}.0), Softmax(Elemwise{sub,no_inplace}.0), Elemwise{Exp}[(0, 0)](Elemwise{sub,no_inplace}.0), Sum{axis=[1], acc_dtype=float64}(Elemwise{Exp}[(0, 0)].0), InplaceDimShuffle{0,x}(Sum{axis=[

In [11]:
net1.train_history_

[{'epoch': 1, 'err': array(2.9767858026605394), 'score': 0.3905703617361756},
 {'epoch': 2, 'err': array(2.75452518875809), 'score': 0.43729379457494516},
 {'epoch': 3, 'err': array(2.627806974933892), 'score': 0.46118892862081912},
 {'epoch': 4, 'err': array(2.5309916526771397), 'score': 0.47722109172807553},
 {'epoch': 5, 'err': array(2.454988144782991), 'score': 0.48795859098721772},
 {'epoch': 6, 'err': array(2.3943695260869777), 'score': 0.49548570638323092},
 {'epoch': 7, 'err': array(2.341224334737912), 'score': 0.50287452832052471},
 {'epoch': 8, 'err': array(2.294661122103386), 'score': 0.50830748562735839},
 {'epoch': 9, 'err': array(2.2552580034164778), 'score': 0.51308848805737206},
 {'epoch': 10, 'err': array(2.2226925718169546), 'score': 0.51664460556729952},
 {'epoch': 11, 'err': array(2.1953466433118676), 'score': 0.52006242961850757},
 {'epoch': 12, 'err': array(2.171825802607993), 'score': 0.52318391054388846},
 {'epoch': 13, 'err': array(2.151271124465296), 'score': 

## Save model!!

In [13]:
import sys
sys.setrecursionlimit(10000)

import cPickle as pickle
with open('first_net.pickle', 'wb') as f:
    pickle.dump(net1, f, -1)


## Load model

In [14]:
import cPickle as pickle

with open('./first_net.pickle', 'rb') as f:
    net = pickle.load(f)


## validation result

In [15]:
y_pred = net1.prediction(X_test)

yyy = np.array(map((lambda x: np.argmax(x)), y_test))

print metrics.classification_report((yyy), (y_pred))
print accuracy_score(yyy, y_pred)


             precision    recall  f1-score   support

          0       0.39      0.49      0.43      2462
          1       0.43      0.65      0.52      2828
          2       0.28      0.26      0.27      1654
          3       0.42      0.38      0.40      2204
          4       0.29      0.19      0.23      1085
          5       0.38      0.22      0.28      1868
          6       0.49      0.49      0.49      2640
          7       0.54      0.45      0.49      1090
          8       0.47      0.19      0.27       807
          9       0.62      0.70      0.66      6688
         10       0.46      0.23      0.31      1207
         11       0.36      0.34      0.35      1415
         12       0.53      0.36      0.43       857
         13       0.35      0.28      0.31      2573
         14       0.44      0.22      0.29       995
         15       0.41      0.01      0.02       619
         16       0.41      0.19      0.26       375
         17       0.62      0.63      0.62   

  'precision', 'predicted', average, warn_for)


# write predict file

In [16]:
net1.outputCSV("net1_predict.csv", test_data, result_mapping)

MISSION COMPLETE
