Insight DS, Jun 12, Seda Unal
#### Violence Detection in Videos

In [None]:
!mkdir test train validation Violence NonViolence

In [None]:
!scp -r Violence NonViolence test/ 
!scp -r Violence NonViolence train/ 
!scp -r Violence NonViolence validation/

In [None]:
# Run if you have have all the code once, 
#because then you have distributed the content to other folders 
!scp ViolenceDataSet/* ViolenceData/. 
!scp NonViolenceDataSet/* NonViolenceData/. 
!rm -rf test train validation
!mkdir test train validation 
!scp -r Violence NonViolence test/ 
!scp -r Violence NonViolence train/ 
!scp -r Violence NonViolence validation/

In [None]:
!ls NonViolence//

##### Split data into Train, Validation and Test Folders

In [None]:
from glob import glob
import numpy as np

In [None]:
v_files = np.random.permutation(glob("ViolenceData/*.avi"))
nv_files = np.random.permutation(glob("NonViolenceData/*.avi"))

In [None]:
len(v_files), len(nv_files), type(v_files)

In [None]:
train_v_idx = int(.8 * len(v_files))
train_nv_idx = int(.8 * len(nv_files))

In [None]:
int(.8 * train_v_idx)

In [None]:
test_v_files = v_files[train_v_idx:]
test_nv_files = nv_files[train_nv_idx:]

In [None]:
from shutil import move

In [None]:
for filename in test_v_files:
    move(filename, "test/Violence/")
    
for filename in test_nv_files:
    move(filename, "test/NonViolence/")

In [None]:
valid_v_idx = int(.8 * train_v_idx)
valid_nv_idx = int(.8 * train_nv_idx)

In [None]:
train_v_files, validation_v_files = v_files[:valid_v_idx], v_files[valid_v_idx:train_nv_idx]
train_nv_files, validation_nv_files = nv_files[:valid_nv_idx], nv_files[valid_nv_idx:train_nv_idx]

In [None]:
for filename in train_v_files:
    move(filename, "train/Violence/")

for filename in train_nv_files:
    move(filename, "train/NonViolence/")

In [None]:
for filename in validation_v_files:
    move(filename, "validation/Violence/")
    
for filename in validation_nv_files:
    move(filename, "validation/NonViolence/")

##### Create .png images from videos (frame by frame depending on fps)

In [None]:
!find train/Violence/*.avi | while read f; do ffmpeg -i  $f ${f//.avi/-%03d.png}; sleep 5; done

In [None]:
!find train/NonViolence/*.avi | while read f; do ffmpeg -i  $f ${f//.avi/-%03d.png}; sleep 5; done

In [None]:
!find validation/Violence/*.avi | while read f; do ffmpeg -i  $f ${f//.avi/-%03d.png}; sleep 5; done

In [None]:
!find validation/NonViolence/*.avi | while read f; do ffmpeg -i  $f ${f//.avi/-%03d.png}; sleep 5; done

In [None]:
!find test/Violence/*.avi | while read f; do ffmpeg -i  $f ${f//.avi/-%03d.png}; sleep 5; done

In [None]:
!find test/NonViolence/*.avi | while read f; do ffmpeg -i  $f ${f//.avi/-%03d.png}; sleep 5; done

##### Re-Training of the Inception 

    Train and Validation Data Iterators

In [None]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

traindatagen = ImageDataGenerator(
    # augmentation
        rotation_range=45,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

valdatagen = ImageDataGenerator()

In [None]:
batch_size = 4
from sklearn.utils import shuffle

# output: directory iterator
train_generator = traindatagen.flow_from_directory(
    directory='train', 
    shuffle = True,
        target_size=(299,299),  # all images will be resized to 299x299
        batch_size=batch_size,
        class_mode='categorical')  
# since we use binary_crossentropy loss, we need binary labels


In [None]:
test_generator = valdatagen.flow_from_directory(
    directory='validation', 
        shuffle = False,
        target_size=(299,299),  # all images will be resized to 299x299
        batch_size=batch_size,
        class_mode='categorical')  # since we use binary_crossentropy loss, we need binary labels

    Model

In [None]:
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras import optimizers
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

In [None]:
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)
# whether to include the fully-connected layer at the top of the network.

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

In [None]:
# let's add a fully-connected layer
# first arg is the dim of output
x = Dense(16, activation='relu')(x)

In [None]:
# and a logistic layer -- let's say we have 2 classes
predictions = Dense(2, activation='softmax')(x)

In [None]:
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

    Training

In [None]:
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=2)

nb_epoch = 2

## Train the model 
TrainingLogs = model.fit_generator(
    train_generator,
    steps_per_epoch=2,
    validation_data=test_generator,
    validation_steps=2,
    epochs=nb_epoch,
    callbacks=[early_stopping],
    verbose=1,
    pickle_safe=False
)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')



In [None]:
heldout = valdatagen.flow_from_directory(directory='test', 
        target_size=(299, 299),  # all images will be resized to 299x299
        batch_size=batch_size,
        class_mode='categorical') 




In [None]:
from keras.models import load_model
model = load_model('EarlyStopped_batch32_Epoch100_Steps230.h5')

In [None]:
from sklearn import svm, datasets
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
import itertools 
from sklearn.metrics import confusion_matrix



# real labels
itr2 = valdatagen.flow_from_directory('test', batch_size=1, target_size=(32,32))
real_vals = (itr2.classes)
n_classes = len(itr2.class_indices)

# predicted labels
res = model.predict_generator(heldout, steps=1, max_q_size=1, workers=1, pickle_safe=False, verbose=0)
predicted_vals = list()
for tup in range(len(res)):
    predicted_vals.append(0 if res[tup][0]>res[tup][1] else 1)
pred_vals = np.array(predicted_vals)  

# TP
clas1 = pred_vals.sum()
clas2 = len(pred_vals)-clas1
clas1r = real_vals.sum()
clas2r = len(real_vals)-clas1r
#abs(pred_vals == real_vals).sum()

# Compute Precision-Recall and plot curve
precision = dict()
recall = dict()
average_precision = dict()

def ytest(realy):
    y_test = list()
    y1 = np.array([1,0])
    y2 = np.array([0,1])
    for i in range(len(real_vals)):
        if real_vals[i] == 0:
            y_test.append(y1)
        else:
            y_test.append(y2)
    return np.array(y_test)
y_test = ytest(real_vals)


for i in range(n_classes):
    precision[i], recall[i], _ = precision_recall_curve(y_test[:, i],
                                                        res[:, i])
    average_precision[i] = average_precision_score(y_test[:, i], res[:, i])
    
    
# Compute micro-average ROC curve and ROC area
precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(),
    res.ravel())
average_precision["micro"] = average_precision_score(y_test, res,
                                                     average="micro")


# Plot Precision-Recall curve
plt.clf()
plt.plot(recall[0], precision[0],color='navy',
         label='Precision-Recall curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
plt.legend(loc="lower left")
plt.show()

# Plot Precision-Recall curve for each class
plt.clf()
plt.plot(recall["micro"], precision["micro"], color='gold',
         label='micro-average Precision-recall curve (area = {0:0.2f})'
               ''.format(average_precision["micro"]))


# Confusion Matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


cnf_matrix = confusion_matrix(real_vals, pred_vals)
np.set_printoptions(precision=3)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=itr2.class_indices,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=itr2.class_indices, normalize=True,
                      title='Normalized confusion matrix')

plt.show()

In [None]:
itr = valdatagen.flow_from_directory('test', batch_size=1, target_size=(32,32))
real_vals = itr.classes

res = model.predict_generator(heldout, steps=1, max_q_size=1, workers=1, pickle_safe=False, verbose=0)

predicted_vals = list()
for tup in range(len(res)):
    predicted_vals.append(0 if res[tup][0]>res[tup][1] else 1)
    
pred_vals = np.array(predicted_vals)



In [None]:

def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

clas1 = pred_vals.sum()
clas2 = len(pred_vals)-clas1
clas1r = real_vals.sum()
clas2r = len(real_vals)-clas1r
TP = abs(pred_vals == real_vals).sum()
print(TP)


cnf_matrix = confusion_matrix(real_vals, pred_vals)
np.set_printoptions(precision=3)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=itr2.class_indices,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=itr2.class_indices, normalize=True,
                      title='Normalized confusion matrix')

plt.show()