# Intelligent Crowd Scene Analytics
## Intelligent Sensing Systems Practice Module 2022

## Analytics engine capable of providing persistent monitoring on both indoor and outdoor static CCTVs to provide higher-order insights on crowd social behaviour
## Modules include:
#### - Social Distancing Monitoring (COVID-19)
#### - Crowd Management (Crowd Counting)
#### - Crowd Anomaly (Flow Estimation/Stampede Alert)
#### - Fall Detection (Medical Emergencies)
#### - Violent Behaviour Detection (Fighting, assualt)

### Team: Adriel Kuek, Chua Hao Zi, KC lim & Yap Pow Look

In [7]:
# Import
import torch
from IPython.display import Image, clear_output

clear_output()
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

# limit the number of cpus used by high performance libraries
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"

import sys
sys.path.insert(0, './yolov5')

import argparse
import os
import platform
import shutil
import time
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn

from yolov5.models.experimental import attempt_load
from yolov5.utils.downloads import attempt_download
from yolov5.models.common import DetectMultiBackend
from yolov5.utils.datasets import LoadImages, LoadStreams
from yolov5.utils.general import (LOGGER, check_img_size, non_max_suppression, scale_coords, 
                                  check_imshow, xyxy2xywh, increment_path)
from yolov5.utils.torch_utils import select_device, time_sync
from yolov5.utils.plots import Annotator, colors
from deep_sort.utils.parser import get_config
from deep_sort.deep_sort import DeepSort

# FILE = Path(__file__).resolve()
ROOT = os.path.abspath('')
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Setup complete. Using torch 1.10.2+cu102 (TITAN Xp)


# Initialise Models
## YOLOv5 + DeepSORT

In [8]:
evaluate = True
half = True
output = 'output/'
imgsz = [640, 640]
device = select_device(device)

# Initialise DeepSORT
cfg = get_config()
cfg.merge_from_file('deep_sort/configs/deep_sort.yaml')
deepsort = DeepSort('osnet_x0_25',
                    device,
                    max_dist=cfg.DEEPSORT.MAX_DIST,
                    max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                    max_age=cfg.DEEPSORT.MAX_AGE,
                    n_init=cfg.DEEPSORT.N_INIT,
                    nn_budget=cfg.DEEPSORT.NN_BUDGET)

# Initialise half precision - Only for CUDA enabled devices
half &= device.type != 'cpu'

# For MOT16 evaluation - Run multiple inference streams in parallel
if not evaluate:
    if os.path.exists(output):
        pass
        shutil.rmtree(output)
    os.makedirs(output)

# Directories
save_dir = increment_path(Path(ROOT) / 'exp', exist_ok=True)
save_dir.mkdir(parents=True, exist_ok=True)  # make dir

# Load YOLO Model
model = DetectMultiBackend('yolov5/models/crowdhuman_yolov5m.pt',
                            device=device,
                            dnn=True)
stride, names, pt, jit, _ = model.stride, model.names, model.pt, model.jit, model.onnx
imgsz = check_img_size(imgsz, s=stride)

half &= pt and device.type != 'cpu'
if pt:
    model.model.half() if half else model.model.float()

YOLOv5 🚀 2022-2-9 torch 1.10.2+cu102 CUDA:cuda (TITAN Xp, 12195MiB)



Successfully loaded imagenet pretrained weights from "/home/user/.cache/torch/checkpoints/osnet_x0_25_imagenet.pth"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']
Model: osnet_x0_25
- params: 203,568
- flops: 82,316,000


Fusing layers... 
Model Summary: 308 layers, 21041679 parameters, 0 gradients


# Video Extraction Module
## Extract video segment
## Convert video to image frames

In [9]:
show_vid = True

# Video Source
source = '/media/user/New Volume/MTech/ITSS/WildTrack_dataset/cam7_short2.mp4'

# Set Dataloader
vid_path, vid_writer = None, None

# Check if environment supports image displays
if show_vid:
    show_vid = check_imshow()
    print(f'show_vid: {show_vid}')

# Dataloader
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit)
bs = 1  # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs

show_vid: True


In [10]:
# Get Names and Colours
names = model.module.names if hasattr(model, 'module') else model.names

# extract filename
txt_file_name = source.split('/')[-1].split('.')[0]
txt_path = str(Path(save_dir)) + '/' + txt_file_name + '.txt'

if pt and device.type != 'cpu':
    model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
dt, seen = [0.0, 0.0, 0.0, 0.0], 0

# Configure Detector parameters

In [11]:
# Configuration
augment = True          # Augmented inference
visual = False          # Visualisation function - Set this to false as default
conf_thres = 0.3        # Object confidence threshold
iou_thres = 0.5         # IOU Threshold for NMS
classes = 0             # Filter for class 0 - Person
agnostic_nms = True     # Class agnostic NMS
max_det = 1000          # Max number of detections per image
save_txt = True
save_vid = True

# Iterate across images stored in pytorch dataloader and perform detection and tracking

In [12]:
with torch.no_grad():
    for frame_idx, (path, img, im0s, vid_cap, s) in enumerate(dataset):

        t1 = time_sync()
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visual else False
        pred = model(img, augment=augment, visualize=visualize)
        t3 = time_sync()
        dt[1] += t3 - t2

        # Apply NMS
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
        dt[2] += time_sync() - t3

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            seen += 1
            p, im0, _ = path, im0s.copy(), getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # im.jpg, vid.mp4, ...
            s += '%gx%g ' % img.shape[2:]  # print string

            annotator = Annotator(im0, line_width=2, pil=not ascii)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                xywhs = xyxy2xywh(det[:, 0:4])
                confs = det[:, 4]
                clss = det[:, 5]

                # pass detections to deepsort
                t4 = time_sync()
                outputs = deepsort.update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0)
                t5 = time_sync()
                dt[3] += t5 - t4

                # draw boxes for visualization
                if len(outputs) > 0:
                    for j, (output, conf) in enumerate(zip(outputs, confs)):

                        bboxes = output[0:4]
                        id = output[4]
                        cls = output[5]

                        c = int(cls)  # integer class
                        label = f'ID:{id} ({conf:.2f})'
                        annotator.box_label(bboxes, label, color=colors(c, True))

                        if save_txt:
                            # to MOT format
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2] - output[0]
                            bbox_h = output[3] - output[1]
                            # Write MOT compliant results to file
                            with open(txt_path, 'a') as f:
                                f.write(('%g ' * 10 + '\n') % (frame_idx + 1, id, bbox_left,  # MOT format
                                                                bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))

                LOGGER.info(f'{s}Done. YOLO:({t3 - t2:.3f}s), DeepSort:({t5 - t4:.3f}s)')

            else:
                deepsort.increment_ages()
                LOGGER.info('No detections')

            # Stream results
            im0 = annotator.result()
            if show_vid:
                cv2.imshow(str(p), im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_vid:
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer
                    if vid_cap:  # video
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    else:  # stream
                        fps, w, h = 30, im0.shape[1], im0.shape[0]

                    vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
                vid_writer.write(im0)
    
    vid_cap.release()
    vid_writer.release()
    cv2.destroyAllWindows()

video 1/1 (1/1078) /media/user/New Volume/MTech/ITSS/WildTrack_dataset/cam7_short2.mp4: 384x640 35 persons, Done. YOLO:(0.044s), DeepSort:(0.074s)
video 1/1 (2/1078) /media/user/New Volume/MTech/ITSS/WildTrack_dataset/cam7_short2.mp4: 384x640 34 persons, Done. YOLO:(0.044s), DeepSort:(0.081s)
video 1/1 (3/1078) /media/user/New Volume/MTech/ITSS/WildTrack_dataset/cam7_short2.mp4: 384x640 35 persons, Done. YOLO:(0.038s), DeepSort:(0.074s)
video 1/1 (4/1078) /media/user/New Volume/MTech/ITSS/WildTrack_dataset/cam7_short2.mp4: 384x640 40 persons, Done. YOLO:(0.035s), DeepSort:(0.085s)
video 1/1 (5/1078) /media/user/New Volume/MTech/ITSS/WildTrack_dataset/cam7_short2.mp4: 384x640 36 persons, Done. YOLO:(0.033s), DeepSort:(0.078s)
video 1/1 (6/1078) /media/user/New Volume/MTech/ITSS/WildTrack_dataset/cam7_short2.mp4: 384x640 36 persons, Done. YOLO:(0.032s), DeepSort:(0.080s)
video 1/1 (7/1078) /media/user/New Volume/MTech/ITSS/WildTrack_dataset/cam7_short2.mp4: 384x640 35 persons, Done. YOLO