In [6]:
from sklearn import datasets, linear_model, metrics 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
import re
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

from mord import OrdinalRidge
from mord import LAD
from keras.models import Model
from keras.layers import Input
from keras.layers import concatenate
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Dropout
from keras.layers import SpatialDropout1D
from keras.layers import Embedding
from keras.models import Sequential
from keras.models import load_model
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [8]:
def eval_confusion_matrix(actual, predicted):
    TN = 0
    FP = 0
    FN = 0
    TP = 0  
    for i in range(len(actual)):
        if actual[i]==0 or actual[i]==1:
            if actual[i]==predicted[i]:
                if actual[i]==1:
                    TP+=1
                elif actual[i]==0:
                    TN+=1
            else:
                if predicted[i]==1:
                    FP+=1
                elif predicted[i]==0:
                    FN+=1
    
    return TN, FP, FN, TP

def eval_recall(TP, FN):
    recall = TP/(TP+FN)
    return recall

def eval_acc(TN, FP, FN, TP):
    acc = (TN+TP)/(TN+FP+FN+TP)
    return acc

def eval_precision(TP, FP):
    if(TP+FP==0):
        return 0
    prec = TP/(TP+FP)
    return prec

def eval_f1(precision, recall):
    if(precision+recall==0):
        return 0
    f1 = 2*(precision*recall)/(precision+recall)
    return f1

def evaluate(actual, predicted):
    TN, FP, FN, TP = eval_confusion_matrix(actual, predicted)
    recall = eval_recall(TP, FN)
    prec = eval_precision(TP, FP)
    f1 = eval_f1(prec, recall)
    acc= eval_acc(TN, FP, FN, TP)
    print("Recall: ", recall)
    print("Precision: ", prec)
    print("F1 Score: ",f1)
    print("Accuracy: ", acc)
    return f1

In [21]:
X_train_t = np.load("./text/X_train.npy")
X_train_a = np.load("./audio/X_train.npy")
X_test_t = np.load("./text/X_test.npy")
X_test_a = np.load("./audio/X_test.npy")

y_train = np.load("./text/y_train.npy")
y_test = np.load("./text/y_test.npy")

### Method 1
Use another logistic regression which takes in the output of these models to produce the final prediction

In [274]:
# to combine multiple models for transcript only
def ensemble_models_t(models):
    preds = []
    for model in models:
        y_pred = model.predict(X_train_t)
        y_pred = y_pred.reshape(y_pred.shape[0], 1)
        preds.append(y_pred)
    y_preds = np.concatenate(preds,axis=1)
    reg = linear_model.LogisticRegression()
    reg.fit(y_preds, y_train)
    return reg

def ensemble_prediction_t(models, reg):
    preds = []
    for model in models:
        y_pred = model.predict(X_test_t)
        y_pred = y_pred.reshape(y_pred.shape[0], 1)
        preds.append(y_pred)
    y_preds = np.concatenate(preds,axis=1)
    prediction = reg.predict(y_preds)
    return prediction


# to combine 2 models (transcript and audio)
def ensemble_models_ta(models):
    preds = []
    
    y_pred = models[0].predict(X_train_t)
    y_pred = y_pred.reshape(y_pred.shape[0], 1)
    preds.append(y_pred)
    y_pred = models[1].predict(X_train_a)
    y_pred = y_pred.reshape(y_pred.shape[0], 1)
    preds.append(y_pred)
    
    y_preds = np.concatenate(preds,axis=1)
    reg = linear_model.LogisticRegression()
    reg.fit(y_preds, y_train)
    return reg

def ensemble_prediction_ta(models, reg, X_t, X_a):
    preds = []
    
    y_pred = models[0].predict(X_t)
    y_pred = y_pred.reshape(y_pred.shape[0], 1)
    preds.append(y_pred)
    y_pred = models[1].predict(X_a)
    y_pred = y_pred.reshape(y_pred.shape[0], 1)
    preds.append(y_pred)
    
    y_preds = np.concatenate(preds,axis=1)
    prediction = reg.predict(y_preds)
    return prediction

In [3]:
# Testing using decision tree
# Ensembling transcript and audio model

In [263]:
from sklearn.tree import DecisionTreeClassifier

In [264]:
model_t1 = DecisionTreeClassifier(max_depth=2, min_samples_leaf=18) 
model_t1.fit(X_train_t, y_train)
y_pred_t1 = model_t1.predict(X_test_t)
evaluate(y_test, y_pred_t1)

Recall:  1.0
Precision:  0.625
F1 Score:  0.7692307692307693
Accuracy:  0.7272727272727273


0.7692307692307693

In [273]:
model_a1 = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1) 
model_a1.fit(X_train_a, y_train)
y_pred_a1 = model_a1.predict(X_test_a)
evaluate(y_test, y_pred_a1)

Recall:  1.0
Precision:  0.5555555555555556
F1 Score:  0.7142857142857143
Accuracy:  0.6363636363636364


0.7142857142857143

In [283]:
models = [model_t1, model_a1]
reg = ensemble_models_ta(models)

In [285]:
pred = ensemble_prediction_ta(models, reg, X_test_t, X_test_a)
print(pred)
print(y_test)
evaluate(y_test, pred)

[0 0 1 1 1 1 1 1 1 1 0]
[0 0 1 1 1 0 0 0 1 1 0]
Recall:  1.0
Precision:  0.625
F1 Score:  0.7692307692307693
Accuracy:  0.7272727272727273


0.7692307692307693

In [5]:
# Testing with other models
# Ensembling multiple transcript models

In [265]:
from sklearn.naive_bayes import GaussianNB

In [266]:
model_t2 = GaussianNB()
model_t2.fit(X_train_t, y_train)
y_pred_t2 = model_t2.predict(X_test_t)
evaluate(y_test, y_pred_t2)

Recall:  0.6
Precision:  1.0
F1 Score:  0.7499999999999999
Accuracy:  0.8181818181818182


0.7499999999999999

In [267]:
model_t3 = linear_model.LogisticRegression()
model_t3.fit(X_train_t, y_train)
y_pred_t3 = model_t3.predict(X_test_t)
evaluate(y_test, y_pred_t3)

Recall:  0.8
Precision:  0.6666666666666666
F1 Score:  0.7272727272727272
Accuracy:  0.7272727272727273


0.7272727272727272

In [268]:
models = [model_t1, model_t2, model_t3]
reg = ensemble_models_t(models)

In [269]:
pred = ensemble_prediction_t(models, reg)
print(pred)
print(y_test)
evaluate(y_test, pred)

[0 1 0 1 1 0 1 0 1 1 0]
[0 0 1 1 1 0 0 0 1 1 0]
Recall:  0.8
Precision:  0.6666666666666666
F1 Score:  0.7272727272727272
Accuracy:  0.7272727272727273


0.7272727272727272

### Method 2
Use Functional model for neural network

In [22]:
y_train = to_categorical(y_train)

In [42]:
max_features = 5000
maxlen = 1000
embedding_dim = 128

In [43]:
text_input = Input(shape=(maxlen,))
text_emb = Embedding(max_features, embedding_dim, input_length=maxlen)(text_input)
text_layer = (Flatten())(text_emb)
text_layer = (Dense(2))(text_layer)

In [44]:
audio_input = Input(shape=(40,))
audio_layer = (Dense(2))(audio_input)

In [45]:
z = concatenate([text_layer, audio_layer])
z = Dense(2, activation='softmax')(z)
model = Model(inputs=[text_input, audio_input], outputs=[z])

In [46]:
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [47]:
checkpoint_filepath = 'model1.h5'
es = EarlyStopping(monitor = 'val_accuracy', mode = 'max', patience = 5, min_delta=0.0001)
checkpoint =  ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [48]:
history = model.fit([X_train_t, X_train_a], y_train, 
                    epochs=20, batch_size=4, validation_split=0.1, 
                    callbacks=[es, checkpoint])

Train on 89 samples, validate on 10 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


In [49]:
model = load_model(checkpoint_filepath)

In [50]:
y_pred = model.predict([X_test_t,X_test_a])
y_pred = np.argmax(y_pred,axis=1)
print(y_pred)
print(y_test)

[1 1 1 1 1 1 1 1 1 1 1]
[0 0 1 1 1 0 0 0 1 1 0]


In [51]:
evaluate(y_test, y_pred)

Recall:  1.0
Precision:  0.45454545454545453
F1 Score:  0.625
Accuracy:  0.45454545454545453


0.625