In [1]:
import os
#import skvideo.io
import tqdm
import sys
import numpy as np
import csv
import cv2
import keras
from keras import backend as K
K.tensorflow_backend._get_available_gpus()
from keras.models import Model
from keras.models import load_model

Using TensorFlow backend.


In [2]:
#specifying path for relevant folders
curr=os.getcwd()
repo_path=curr.split('/code')[0]
data_path=repo_path+'/DIFv2'
features_path=repo_path+'/features'
saved_path=repo_path+'/saved_models'

In [3]:
#dataset length
LENGTH=10
FRAME_RATE=24
SAMPLE_RATE=2
NUM_FRAMES=int(LENGTH*FRAME_RATE/SAMPLE_RATE)
data_dir=data_path+'/'+str(LENGTH)+'/frame_video'
vgg_dir=features_path+'/'+str(LENGTH)+'/vgg_face'
open_dir=features_path+'/'+str(LENGTH)+'/opensmile'
saved_path=None

# train test split

In [4]:
'''
Input-  csv_file
Output- partition train, val test. Each partition consists of list of .npy files and dictionary of labels.
'''
def count_classes(d):
    values=list(d.values())
    zeros=values.count(0)
    return (zeros,len(values)-zeros)
def train_test_split(csv_path):
    label={'Drunk':1, 'Sober':0}
    partition={}
    train={}
    val={}
    test={}
    
    train_list=[]
    val_list=[]
    test_list=[]
    train_label={}
    val_label={}
    test_label={}
    
    with open(csv_path) as csvfile:
        reader=csv.reader(csvfile,delimiter=',')
        for row in reader:
            filename=row[2]
            filename=filename[:-4]
            if row[0]=='train':
                train_label[filename]=label[row[1]]
                train_list.append(filename)
            elif row[0]=='val':
                val_label[filename]=label[row[1]]
                val_list.append(filename)
            elif row[0]=='test':
                test_label[filename]=label[row[1]]
                test_list.append(filename)
            else:
                print("Error in label")
                return None
    train['list']=train_list
    val['list']=val_list
    test['list']=test_list
    
    train['label']=train_label
    val['label']=val_label
    test['label']=test_label
    
    partition['train']=train
    partition['val']=val
    partition['test']=test
    
    return partition


In [5]:
def load_keras_model(path):
    if os.path.isfile(path):
        return load_model(path)

# vgg lstm 

Best model

In [6]:
model_path='/home/ubuntu/Vineet/repo/saved_models/vgg_lstm/10/hp1/model-135-0.781250.h5'
model=load_keras_model(model_path)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 120, 4096)         0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 120, 4096)         16384     
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               4457472   
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 514       
Total params: 4,474,370
Trainable params: 4,466,178
Non-trainable params: 8,192
_________________________________________________________________


In [7]:
csv_path=repo_path+'/DIFv2/10/train_test_sets/1/split_4540_642_948.csv'
partition=train_test_split(csv_path)
lt =partition['test']['list']
dic=partition['test']['label']
count=count_classes(partition['test']['label'])
print("Sober ",count[0],"Drunk ",count[1])
total=count[0]+count[1]

lt_val =partition['val']['list']
dic_val=partition['val']['label']
count_val=count_classes(partition['val']['label'])
print("Val Sober ",count_val[0],"Drunk ",count_val[1])
total_val=count_val[0]+count_val[1]


Sober  306 Drunk  642
Val Sober  321 Drunk  321


In [8]:
vgg_pred={}
fp=float(0)
tp=float(0)
fn=float(0)
tn=float(0)

for i in lt:
    arr=np.expand_dims(np.load(vgg_dir+'/'+i+'.npy'),0)
    pred=model.predict(arr)
    pl=np.argmax(pred)
    prob=pred[0][1]
    vgg_pred[i]=prob
    if dic[i]==1:
        if pl==1:
            tp+=1
        else:
            fn+=1
    else:
        if pl==0:
            tn+=1
        else:
            fp+=1
acc=(tp+tn)/total
peci=(tp)/(tp+fp)
rec=(tp)/(tp+fn)
print("Accuracy ",acc)
print("precision ",peci)
print("recall ",rec)
print(tp)
print(tn)

Accuracy  0.7637130801687764
precision  0.7764550264550265
recall  0.9143302180685359
587.0
137.0


In [None]:
vgg_pred_val={}
fp=0
tp=0
fn=0
tn=0

for i in lt_val:
    arr=np.expand_dims(np.load(vgg_dir+'/'+i+'.npy'),0)
    pred=model.predict(arr)
    pl=np.argmax(pred)
    prob=pred[0][1]
    vgg_pred_val[i]=prob
    if dic_val[i]==1:
        if pl==1:
            tp+=1
        else:
            fn+=1
    else:
        if pl==0:
            tn+=1
        else:
            fp+=1
acc=(tp+tn)/total
peci=(tp)/(tp+fp)
rec=(tp)/(tp+fn)


In [45]:
print("Accuracy ",acc)
print("precision ",peci)
print("recall ",rec)
print(tp)
print(tn)

Accuracy  0.9419831223628692
precision  1.0
recall  0.9321824907521579
756
137


# audio 

In [9]:
am_path='/home/ubuntu/Vineet/repo/saved_models/audio_open/10/hp1/model-070-0.885156.h5'
amodel=load_keras_model(am_path)
amodel.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 1582)              0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 1582)              6328      
_________________________________________________________________
dense_4 (Dense)              (None, 512)               810496    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_6 (Dense)              (None, 2)                 514       
Total params: 948,666
Trainable params: 945,502
Non-trainable params: 3,164
_________________________________________________________________


In [10]:
audio_pred={}
fp1=float(0)
tp1=float(0)
fn1=float(0)
tn1=float(0)

for i in lt:
    arr=np.expand_dims(np.load(open_dir+'/'+i+'.npy'),0)
    pred=amodel.predict(arr)
    pl=np.argmax(pred)
    prob=pred[0][1]
    audio_pred[i]=prob
    if dic[i]==1:
        if pl==1:
            tp1+=1
        else:
            fn1+=1
    else:
        if pl==0:
            tn1+=1
        else:
            fp1+=1
acc=(tp1+tn1)/total
preci=(tp1)/(tp1+fp1)
rec=(tp1)/(tp1+fn1)
print("Accuracy ",acc)
print("precision ",preci)
print("recall ",rec)
print(tp1)
print(total)

Accuracy  0.8755274261603375
precision  0.8521505376344086
recall  0.9875389408099688
634.0
948


In [60]:
audio_pred_val={}
fp1=0
tp1=0
fn1=0
tn1=0

for i in lt_val:
    arr=np.expand_dims(np.load(open_dir+'/'+i+'.npy'),0)
    pred=amodel.predict(arr)
    pl=np.argmax(pred)
    prob=pred[0][1]
    audio_pred_val[i]=prob
    if dic_val[i]==1:
        if pl==1:
            tp1+=1
        else:
            fn1+=1
    else:
        if pl==0:
            tn1+=1
        else:
            fp1+=1
acc=(tp1+tn1)/total_val
preci=(tp1)/(tp1+fp1)
rec=(tp1)/(tp1+fn1)
print("Accuracy ",acc)
print("precision ",preci)
print("recall ",rec)

Accuracy  0.8566978193146417
precision  0.861198738170347
recall  0.8504672897196262


# C3D

In [11]:
c3d_pred={}
count=0
with open('3d_pred.csv') as csvfile:
    csv_reader = csv.reader(csvfile, delimiter=',')
    for row in csv_reader:
        fname=row[0].split('/')[3]
        ID=fname[:-4]
        c3d_pred[ID]=float(row[2])

In [12]:
fp1=0
tp1=0
fn1=0
tn1=0

for i in c3d_pred:
    gt=dic[i]
    pl=c3d_pred[i]>.5
    if gt==1:
        if pl:
            tp1+=1
        else:
            fn1+=1
    else:
        if not pl:
            tn1+=1
        else:
            fp1+=1
acc=(tp1+tn1)/total
preci=(tp1)/(tp1+fp1)
rec=(tp1)/(tp1+fn1)
print("Accuracy ",acc)
print("precision ",preci)
print("recall ",rec)

Accuracy  0.7742616033755274
precision  0.7907608695652174
recall  0.9065420560747663


# Ensmeble Scores

In [16]:
total_pred=float(87.55+76.37+77.42)
wt_vg=float(76.37)/total_pred
wt_a=float(87.55)/total_pred
wt_3d=float(77.42)/total_pred
print(wt_a)
print(wt_vg)
print(wt_3d)

0.362766221927571
0.31644153476423303
0.3207922433081959


In [17]:
fp1=0
tp1=0
fn1=0
tn1=0
ensem={}
for i in c3d_pred:
    gt=dic[i]
    prob=(wt_3d*c3d_pred[i] + wt_a*audio_pred[i] + wt_vg*vgg_pred[i])
    ensem[i]=prob
    pl=prob>.5
    if gt==1:
        if pl:
            tp1+=1
        else:
            fn1+=1
    else:
        if not pl:
            tn1+=1
        else:
            fp1+=1
acc=(tp1+tn1)/total
preci=(tp1)/(tp1+fp1)
rec=(tp1)/(tp1+fn1)
print("Accuracy ",acc)
print("precision ",preci)
print("recall ",rec)
print(tp1)
print(tn1)
print(total)

Accuracy  0.8839662447257384
precision  0.8594594594594595
recall  0.9906542056074766
636
202
948


## prediction observations 
Case 1: Wrong prediction by audio and right predicitons by video

In [20]:
for i in c3d_pred:
    if c3d_pred[i]<=.5 and vgg_pred[i]<=.5 and audio_pred[i]>=.5 and dic[i]==1 and ensem[i]>=.5:
        print(i)

videoplayback (26).mp4_14_59.mp4_aligned_3
videoplayback (26).mp4_20_104.mp4_aligned_1
videoplayback (27).mp4_26_98.mp4_aligned_1
videoplayback (27).mp4_36_148.mp4_aligned_1
videoplayback (36).mp4_23_4.mp4_aligned_2


In [21]:
for i in c3d_pred:
    if c3d_pred[i]>=.5 and vgg_pred[i]>=.5 and audio_pred[i]<=.5 and dic[i]==0 and ensem[i]<=.5:
        print(i)

Mark Zuckerberg- “I’m really sorry that this happened” - YouTube.MKV_18_33.mp4_aligned_1
Mark Zuckerberg- “I’m really sorry that this happened” - YouTube.MKV_27_49.mp4_aligned_4
Mark Zuckerberg- “I’m really sorry that this happened” - YouTube.MKV_27_49.mp4_aligned_7
Mark Zuckerberg- “I’m really sorry that this happened” - YouTube.MKV_27_49.mp4_aligned_8
Mark Zuckerberg- “I’m really sorry that this happened” - YouTube.MKV_37_72.mp4_aligned_3
What is your greatest weakness- - YouTube.MKV_1_1.mp4_aligned_44
