In [None]:
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms
import os
import pandas as pd
from torchvision.models import densenet121
from torchvision.models import alexnet
from tqdm import tqdm
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import math
from collections import OrderedDict

In [None]:
# Mount google drive
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/MyDrive/violence-detection')
from convlstm import ConvLSTM



In [None]:
# Train data directory
directory = '/content/drive/MyDrive/data' #modify as needed

In [None]:
# # TESTING THAT A VIDEO CAN BE READ
# # Create a VideoCapture object
# cap = cv2.VideoCapture(directory + '/' + 'Violence' + '/' + 'Violence001.avi')

# # Check if camera opened successfully
# if (cap.isOpened()== False):
#     print("Error opening video file")

# # Read until video is completed
# while(cap.isOpened()):
#     # Capture frame-by-frame
#     ret, frame = cap.read()
#     if ret == True:
#         # Display the resulting frame
#         cv2.imshow('Frame', frame)
#         # Press Q on keyboard to exit
#         if cv2.waitKey(25) & 0xFF == ord('q'):
#             break
#     # Break the loop
#     else:
#         break

# # When everything done, release the video capture object
# cap.release()

# # Closes all the frames
# cv2.destroyAllWindows()

In [None]:
def calculate_optical_flow(video_path, frame_skip=4):
    cap = cv2.VideoCapture(video_path)
    ret, frame1 = cap.read()
    prvs = cv2.resize(frame1, (224, 224))
    prvs = cv2.cvtColor(prvs, cv2.COLOR_BGR2GRAY)
    hsv = np.zeros((prvs.shape[0], prvs.shape[1], 3))
    hsv[..., 1] = 255

    optical_flows = []  # list to store optical flow of each frame
    frame_count = 0
    while True:
        ret, frame2 = cap.read()
        if not ret:
            break
        frame_count += 1
        if frame_count % frame_skip != 0:
            continue
        next = cv2.resize(frame2, (224, 224))
        next = cv2.cvtColor(next, cv2.COLOR_BGR2GRAY)

        flow = cv2.calcOpticalFlowFarneback(
            prvs, next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        hsv[..., 0] = ang * 180 / np.pi / 2
        hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
        bgr = cv2.cvtColor(hsv.astype(np.float32), cv2.COLOR_HSV2BGR)
        # normalize
        # bgr = (bgr - bgr.min()) / (bgr.max() - bgr.min())
        optical_flows.append(bgr)  # store optical flow of current frame

        prvs = next

    cap.release()
    return optical_flows  # return list of optical flows

### Loading all videos, transforming them into optical flow representations, storing in memory, and creating a training/testing data loader.

**Now the model is ready to be trained**

In [None]:
Videos = []
labels = []

for filename in tqdm(os.listdir(directory)):
    if filename.endswith(".avi"):  # videos are in .avi format
        video_path = os.path.join(directory, filename)
        # assuming this now returns a list of optical flows
        frame_skip = 2
        resultant_frames = 20
        optical_flows = calculate_optical_flow(video_path, frame_skip = frame_skip)
        if len(optical_flows) < resultant_frames:
          padding = [np.zeros_like(optical_flows[0]) for _ in range(resultant_frames - len(optical_flows))]
          optical_flows = padding + optical_flows
        elif len(optical_flows) > resultant_frames:
          optical_flows = optical_flows[:resultant_frames]
        # transpose each optical flow
        optical_flows = np.stack(
            [np.transpose(flow, (2, 0, 1)) for flow in optical_flows])
        if filename.startswith('fi'):
            Videos.append(optical_flows)
            labels.append(0)
        elif filename.startswith('no'):
            Videos.append(optical_flows)
            labels.append(1)

data = np.array(Videos, dtype=np.float32)
labels = np.array(labels, dtype=np.int64)

# split into train ad test data using sklearn
from sklearn.model_selection import train_test_split
train_data, test_data, train_labels, test_labels = train_test_split(
    data, labels, test_size=0.2, random_state=42)

# train data
# train_data = torch.tensor(train_data).half()
train_data = torch.tensor(train_data)
train_data = train_data.permute(0, 2, 1, 3, 4)
# print(train_data.shape)

train_labels = torch.tensor(train_labels).long()
train_dataset = TensorDataset(train_data, train_labels)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
print(len(train_loader))
print(train_loader.dataset.tensors[0].shape)

# test data
# test_data = torch.tensor(test_data).half()
test_data = torch.tensor(test_data)
test_data = test_data.permute(0, 2, 1, 3, 4)

# print(test_data.shape)
test_labels = torch.tensor(test_labels).long()
test_dataset = TensorDataset(test_data, test_labels)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True, drop_last=True)
print(len(test_loader))
print(test_loader.dataset.tensors[0].shape)
# Save
torch.save(train_loader, '/content/drive/My Drive/train_loader.pth')
torch.save(test_loader, '/content/drive/My Drive/test_loader.pth')

**Run the cell below if you already have the loaders saved**

In [None]:
# train_loader = torch.load('/content/drive/My Drive/train_loader.pth')
# test_loader = torch.load('/content/drive/My Drive/test_loader.pth')
# print(len(train_loader))
# print(len(test_loader))

### DenseNet121 model adapted for video data and ConvLSTM added

In [None]:
model = densenet121(pretrained=True)
# print(model)

In [None]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict


class _DenseLayer(nn.Sequential):

    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
        super().__init__()
        self.add_module('norm1', nn.BatchNorm3d(num_input_features))
        self.add_module('relu1', nn.ReLU(inplace=True))
        self.add_module(
            'conv1',
            nn.Conv3d(num_input_features,
                      bn_size * growth_rate,
                      kernel_size=1,
                      stride=1,
                      bias=False))
        self.add_module('norm2', nn.BatchNorm3d(bn_size * growth_rate))
        self.add_module('relu2', nn.ReLU(inplace=True))
        self.add_module(
            'conv2',
            nn.Conv3d(bn_size * growth_rate,
                      growth_rate,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False))
        self.drop_rate = drop_rate

    def forward(self, x):
        new_features = super().forward(x)
        if self.drop_rate > 0:
            new_features = F.dropout(new_features,
                                     p=self.drop_rate,
                                     training=self.training)
        return torch.cat([x, new_features], 1)


class _DenseBlock(nn.Sequential):

    def __init__(self, num_layers, num_input_features, bn_size, growth_rate,
                 drop_rate):
        super().__init__()
        for i in range(num_layers):
            layer = _DenseLayer(num_input_features + i * growth_rate,
                                growth_rate, bn_size, drop_rate)
            self.add_module('denselayer{}'.format(i + 1), layer)


class _Transition(nn.Sequential):

    def __init__(self, num_input_features, num_output_features):
        super().__init__()
        self.add_module('norm', nn.BatchNorm3d(num_input_features))
        self.add_module('relu', nn.ReLU(inplace=True))
        self.add_module(
            'conv',
            nn.Conv3d(num_input_features,
                      num_output_features,
                      kernel_size=1,
                      stride=1,
                      bias=False))
        self.add_module('pool', nn.AvgPool3d(kernel_size=2, stride=2))


class DenseNet(nn.Module):
    """Densenet-BC model class
    Args:
        growth_rate (int) - how many filters to add each layer (k in paper)
        block_config (list of 4 ints) - how many layers in each pooling block
        num_init_features (int) - the number of filters to learn in the first convolution layer
        bn_size (int) - multiplicative factor for number of bottle neck layers
          (i.e. bn_size * k features in the bottleneck layer)
        drop_rate (float) - dropout rate after each dense layer
        num_classes (int) - number of classification classes
    """

    def __init__(self,
                 n_input_channels=3,
                 conv1_t_size=7,
                 conv1_t_stride=1,
                 no_max_pool=False,
                 growth_rate=32,
                 block_config=(6, 12, 24, 16),
                 num_init_features=64,
                 bn_size=4,
                 drop_rate=0,
                 num_classes=2):

        super().__init__()

        # First convolution
        self.features = [('conv1',
                          nn.Conv3d(n_input_channels,
                                    num_init_features,
                                    kernel_size=(conv1_t_size, 7, 7),
                                    stride=(conv1_t_stride, 2, 2),
                                    padding=(conv1_t_size // 2, 3, 3),
                                    bias=False)),
                         ('norm1', nn.BatchNorm3d(num_init_features)),
                         ('relu1', nn.ReLU(inplace=True))]
        if not no_max_pool:
            self.features.append(
                ('pool1', nn.MaxPool3d(kernel_size=3, stride=2, padding=1)))
        self.features = nn.Sequential(OrderedDict(self.features))

        # Each denseblock
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(num_layers=num_layers,
                                num_input_features=num_features,
                                bn_size=bn_size,
                                growth_rate=growth_rate,
                                drop_rate=drop_rate)
            self.features.add_module('denseblock{}'.format(i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = _Transition(num_input_features=num_features,
                                    num_output_features=num_features // 2)
                self.features.add_module('transition{}'.format(i + 1), trans)
                num_features = num_features // 2


        self.add_module('convlstm', ConvLSTM(input_dim=1,
                                                      hidden_dim=[4, 4],
                                                      kernel_size=(3, 3),
                                                      num_layers=2,
                                                      batch_first=True,
                                                      bias=True,
                                                      return_all_layers=False))

        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
            elif isinstance(m, nn.BatchNorm3d) or isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        # Linear layer
        self.classifier = nn.Linear(196, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm3d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        features = self.features(x)
        # print(features.shape)
        out,_ = self.convlstm(features)
        # Take the output from the last time step of the LSTM
        out = out[0][:, -1, :, :, :]  # Shape: [batch, channels, height, width]

        flattened_output = out.view(out.size(0), -1)  # Shape: [batch, channels*height*width]

        # print(flattened_output.shape)
        classified_output = self.classifier(flattened_output)

        return classified_output



In [None]:
model = DenseNet(num_init_features=32,
                         growth_rate=16,
                         block_config=(6, 12, 24, 16))
print(model)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

**The cell below tests that the model is working as expected**

In [None]:
# Test model on one optical flow example
# test_vid_path = directory + '/' + 'Violence' + '/' + 'Violence001.avi'
test_vid_path = directory + '/' + 'fi1_xvid.avi'
test_optical_flow = calculate_optical_flow(test_vid_path, 2)
# assuming this now returns a list of optical flows
test_optical_flow = np.stack([np.transpose(flow, (2, 0, 1))
                             for flow in test_optical_flow])
# test_optical_flow = torch.tensor(test_optical_flow).half().unsqueeze(0)
test_optical_flow = torch.tensor(test_optical_flow).unsqueeze(0)
test_optical_flow = test_optical_flow.permute(0, 2, 1, 3, 4)
test_optical_flow = test_optical_flow.to(device)
print(test_optical_flow.shape)
torch.cuda.empty_cache()
output = model(test_optical_flow)
print(output.shape) # [1, 2]
torch.cuda.empty_cache()

## Training the model

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
lossfun = nn.CrossEntropyLoss()
# number of epochs
numepochs = 50
# create a new model
# initialize losses
losses = []
trainAcc = []
testAcc = []
# loop over epochs
for epochi in range(numepochs):
  torch.cuda.empty_cache() # clear cache
  # switch on training mode
  model.train()

  # loop over training data batches
  batchAcc = []
  batchLoss = []
  for X, y in train_loader:
    X = X.to(device)
    y = y.to(device)
    # forward pass and loss
    yHat = model(X)
    # print(yHat.isnan().any())
    loss = lossfun(yHat, y)

    # backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # print(loss.item())
    # loss from this batch
    batchLoss.append(loss.item())
    # compute accuracy
    batchAcc.append(
        100*torch.mean((torch.argmax(yHat, axis=1) == y).float()).item())
    torch.cuda.empty_cache()
  # end of batch loop...
  # now that we've trained through the batches, get their average training accuracy
  trainAcc.append(np.mean(batchAcc))
  model.eval()
  with torch.no_grad():
    yTrue = []
    yPred = []
    batchAcc = []
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        batchAcc.append(100*torch.mean((torch.argmax(outputs, axis=1) == labels).float()).item())
        yTrue.extend(labels.cpu().numpy())
        yPred.extend(torch.argmax(outputs, axis=1).cpu().numpy())
    testAcc.append(np.mean(batchAcc))
    if testAcc[-1] >= 80:
      break
  # end of epoch loop...


  # and get average losses across the batches
  losses.append(np.mean(batchLoss))

  print(
      f'Epoch {epochi+1}/{numepochs}, Loss: {losses[-1]}, Train Accuracy: {trainAcc[-1]}, Test Accuracy: {testAcc[-1]}')
# end epochs
# output

In [None]:
# # Save the model
torch.save(model.state_dict(), '/content/drive/My Drive/model.pth')

## Testing Model Performance
**If model has been trained (or trained and the model is saved in a pth file), run the cells below**

In [None]:
# # Uncomment this cell only if you have the saved model and wish to load it. Note: model variable must be initialized above before loading the model
# def getSavedModel():
#     model.load_state_dict(torch.load('model.pth'))
#     return model
# model = getSavedModel()


**Get predictions using test data**

In [None]:
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
testAcc2 = []
with torch.no_grad():
    yTrue = []
    yPred = []
    batchAcc = []
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        batchAcc.append(100*torch.mean((torch.argmax(outputs, axis=1) == labels).float()).item())
        yTrue.extend(labels.cpu().numpy())
        yPred.extend(torch.argmax(outputs, axis=1).cpu().numpy())
    testAcc2.append(np.mean(batchAcc))
print(classification_report(yTrue, yPred))
print('Accuracy:', accuracy_score(yTrue, yPred))
print('F1:', f1_score(yTrue, yPred, average='weighted'))
print('Precision:', precision_score(yTrue, yPred, average='weighted'))
print('Recall:', recall_score(yTrue, yPred, average='weighted'))
sns.heatmap(confusion_matrix(yTrue, yPred), annot=True, fmt='d', cmap='Blues', cbar=False, xticklabels=['Violence', 'Normal'], yticklabels=['Violence', 'Normal'])
plt.show()

**Do not run the cell below if you did not train the model prior**

In [None]:
# DO NOT RUN THIS CELL IF YOU LOADED THE MODEL AND DID NOT TRAIN IT AS TRAINACC & LOSSES WILL BE UNDEFINED
# Plot trainAcc and losses
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(trainAcc)
plt.plot(testAcc)
plt.title('Training vs Testing Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend(['Training', 'Testing'])
plt.subplot(1, 2, 2)
plt.plot(losses)
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()