In [1]:
import os
import tqdm
import sys
import keras

Using TensorFlow backend.


In [3]:
#dataset length
LENGTH=10#in secs
FEATURE_DIM=1582# opensmile feature dimension for an input chunk
TIME_STAMPS=76



In [9]:
# USER INPUT- features_dir
# features_dir- path to save/load opensmile chunk features
curr=os.getcwd()
repo_path=curr.split('/code')[0]
features_path=repo_path+'/DIFv2'+'/features'
features_dir=features_path+'/'+str(LENGTH)+'/open_chunks'
saved_path=repo_path+'/saved_models'


# Generator and data split code

In [13]:
import csv
import numpy as np
'''
Input-  csv_file
Output- partition train, val test. Each partition consists of list of .npy files and dictionary of labels.
'''
def train_test_split(csv_path):
    label={'Drunk':1, 'Sober':0}
    partition={}
    train={}
    val={}
    test={}
    
    train_list=[]
    val_list=[]
    test_list=[]
    train_label={}
    val_label={}
    test_label={}
    
    with open(csv_path) as csvfile:
        reader=csv.reader(csvfile,delimiter=',')
        for row in reader:
            filename=row[2]
            filename=filename[:-4]
            if row[0]=='train':
                train_label[filename]=label[row[1]]
                train_list.append(filename)
            elif row[0]=='val':
                val_label[filename]=label[row[1]]
                val_list.append(filename)
            elif row[0]=='test':
                test_label[filename]=label[row[1]]
                test_list.append(filename)
            else:
                print("Error in label")
                return None
    train['list']=train_list
    val['list']=val_list
    test['list']=test_list
    
    train['label']=train_label
    val['label']=val_label
    test['label']=test_label
    
    partition['train']=train
    partition['val']=val
    partition['test']=test
    
    return partition

def count_classes(d):
    values=list(d.values())
    zeros=values.count(0)
    return (zeros,len(values)-zeros)

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels,datapath, batch_size=32, dim=(1582),n_classes=2, shuffle=True):
        'Initialization'        
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        #self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
        self.path=datapath

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))


    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)
        import gc
        gc.collect()
        return X, y
    
    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = np.load(self.path+'/' + ID + '.npy')

            # Store class
            y[i] = self.labels[ID]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

# Build Model


3.1 Model creation and summary

ordering of batch norm and dropout
https://stackoverflow.com/questions/39691902/ordering-of-batch-normalization-and-dropout

In [6]:
from keras.optimizers import Adam
from keras.models import Model
from keras.models import load_model
from keras.layers import Dense, Input, Dropout, LSTM, Activation,BatchNormalization
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard
from time import time
import gc


In [27]:
def create_model(num_class,lstm_units,dropout,input_shape=(None,1582)):
    """
        Single layer LSTM
    """
    X=Input(shape=input_shape)
    norm=BatchNormalization()(X)
    feat=LSTM(units=lstm_units)(norm)
    drop=Dropout(rate=dropout)(feat)
    prob=Dense(num_class, activation='sigmoid')(drop)
    return Model(inputs = X, outputs = prob)

In [28]:
def create_model2(num_class,lstm_units1,lstm_units2,dropout,input_shape=(None,1582)):
    """
        2 layer LSTM
    """
    X=Input(shape=input_shape)
    norm=BatchNormalization()(X)
    feat=LSTM(units=lstm_units1, return_sequences=True)(norm)
    drop=Dropout(rate=dropout)(feat)
    feat2=LSTM(units=lstm_units2)(drop)
    drop2=Dropout(rate=dropout)(feat2)
    prob=Dense(num_class, activation='sigmoid')(drop2)
    return Model(inputs = X, outputs = prob)

In [32]:
lstm_units1=128
lstm_units2=64
dropout=.4
hp=6
model=create_model2(2,lstm_units1,lstm_units2,dropout,(TIME_STAMPS,FEATURE_DIM))
model_path=repo_path+'/saved_models/open_chunks/'+str(LENGTH)+'/hp'+str(hp)#USER INPUT, path to save/load model
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 76, 1582)          0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 76, 1582)          6328      
_________________________________________________________________
lstm_7 (LSTM)                (None, 76, 128)           876032    
_________________________________________________________________
dropout_7 (Dropout)          (None, 76, 128)           0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dropout_8 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 2)                 130       
Total para

# 3.2 Fit the model

Class Imbalance 
https://groups.google.com/forum/#!topic/keras-users/MUO6v3kRHUw

class_weight in keras
https://keras.io/models/model/

tensorboard 
https://fizzylogic.nl/2017/05/08/monitor-progress-of-your-keras-based-neural-network-using-tensorboard/

save every k-epochs

In [33]:
def load_keras_model(path):
    if os.path.isfile(path):
        return load_model(path)
#Loading data filenames split
split_path = '/media/netweb/2.0 TB/Vineet/repo/DIFv2/10/train_test_sets/1/split_4540_642_948.csv'## or enter path to the split.csv in the parent directory 
partition=train_test_split(split_path)
print("Number of training examples ")
print(len(partition['train']['list']))
print("Number of validation examples ")
print(len(partition['val']['list']))

params = {'datapath':features_dir ,
          'dim': (TIME_STAMPS,FEATURE_DIM),
          'batch_size': 64,
          'n_classes': 2,
          'shuffle': True}
    
#weights for imbalance classes
count=count_classes(partition['train']['label'])
print("Class instances in training class.\n Sober:",count[0]," Drunk:",count[1])
weight_0=float(count[0]+count[1])/float(count[0])
weight_1=float(count[0]+count[1])/float(count[1])
class_weight={0:weight_0, 1:weight_1}

#instances in val set
count=count_classes(partition['val']['label'])
print("Class instances in val class.\n Sober:",count[0]," Drunk:",count[1])

#instances in test set
count=count_classes(partition['test']['label'])
print("Class instances in test class.\n Sober:",count[0]," Drunk:",count[1])


Number of training examples 
4540
Number of validation examples 
642
Class instances in training class.
 Sober: 1045  Drunk: 3495
Class instances in val class.
 Sober: 321  Drunk: 321
Class instances in test class.
 Sober: 306  Drunk: 642


In [None]:

model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=["accuracy"])

#saving best model
checkpoint = ModelCheckpoint(model_path+'/model-{epoch:03d}-{val_acc:03f}.h5', verbose=1, monitor='val_acc',save_best_only=False, mode='max',period=5)


#tensorboard
tensorboard = TensorBoard(log_dir=model_path+"/log/{}".format(time()))

train_generator=DataGenerator(partition['train']['list'],partition['train']['label'], **params)
val_generator=DataGenerator(partition['val']['list'],partition['val']['label'], **params)
print("generator created")
model.fit_generator(generator=train_generator,epochs=50,validation_data=val_generator,
                    use_multiprocessing=True,
                    workers=6,callbacks=[checkpoint,tensorboard],class_weight=class_weight)