In [None]:
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
import matplotlib.pyplot as plt 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os
from tqdm.notebook import tqdm
import logging
import pickle
import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
import subprocess as sp
from torch.utils.data import Dataset, DataLoader
import copy
import shutil

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
WORKING_DIR = '/content/drive/MyDrive/ai/video_surveillance/'
FRAME_DATA_FILE = os.path.join(WORKING_DIR, 'datasets/frame_data.pkl')
PEOPLE_DATA_FILE = os.path.join(WORKING_DIR, 'datasets/people_data.pkl')
FFMPEG_BIN = "ffmpeg"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
datasets_videos_fast = {'train/train1_resized_fast.mp4': [79168, [1920, 1080]], 
                        'train/train2_fast.mp4': [120211, [1920, 1080]], 
                        'test/test_fast.mp4': [74760, [1920, 1080]]
                        }

# Предсказание для картинок

In [None]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5l6')
model.classes = [0]
model.conf = 0.3

In [None]:
people_data = {}

for dataset, (frames_count, size_img) in datasets_videos_fast.items():
    if dataset not in people_data:
        people_data[dataset] = {}
    command = [FFMPEG_BIN,
            '-i', os.path.join(WORKING_DIR, f'datasets/{dataset}'),
            '-f', 'image2pipe',
            '-pix_fmt', 'rgb24',
            '-vcodec', 'rawvideo', '-']
    pipe = sp.Popen(command, stdout=sp.PIPE)
    for frame_i in tqdm(range(frames_count)):
        raw_frame = pipe.stdout.read(size_img[0] * size_img[1] * 3)
        frame = np.frombuffer(raw_frame, dtype='uint8')
        if not any(frame):
            break
        frame = frame.reshape((size_img[1], size_img[0], 3))

        preds = model(frame).xyxy[0]

        if preds.any():
            people_data[dataset][frame_i] = preds.tolist()

        if frame_i % 1000 == 0:
            pickle.dump(people_data, open(PEOPLE_DATA_FILE, 'wb'))
    pickle.dump(people_data, open(PEOPLE_DATA_FILE, 'wb'))

# Сохранение изображений для обучения yolo модели

In [None]:
for dataset, annotations in people_data.items():
    if 'test' in dataset:
        continue
    command = [ FFMPEG_BIN,
            '-i', os.path.join(WORKING_DIR, f'datasets/{dataset}'),
            '-f', 'image2pipe',
            '-pix_fmt', 'rgb24',
            '-vcodec', 'rawvideo', '-']
    pipe = sp.Popen(command, stdout=sp.PIPE)
    frames_count, size_img = datasets_videos_fast[dataset]
    for frame_i in tqdm(range(frames_count)):
        raw_frame = pipe.stdout.read(size_img[0] * size_img[1] * 3)
        if frame_i not in annotations:
            continue
        frame = np.frombuffer(raw_frame, dtype='uint8')
        if not any(frame):
            break
        frame = frame.reshape((size_img[1], size_img[0], 3))
        len_annotations = len(annotations[frame_i])
        if len_annotations != 0:
            img_name = f'{dataset.split("/")[-1].split("_")[0]}_{frame_i}_{len_annotations}.jpg'
            cv2.imwrite(os.path.join(WORKING_DIR, f'datasets/human_detect/images/{img_name}'), frame)

In [None]:
images = [os.path.join(os.path.join(WORKING_DIR, 'datasets/human_detect/images'), elem)
          for elem in os.listdir(os.path.join(WORKING_DIR, 'datasets/human_detect/images')) if '.jpg' in elem]
images.sort()

train_images = images[20000:]
val_images = images[:20000]

In [None]:
def move_files_to_folder(list_of_files, destination_folder):
    for f in tqdm(list_of_files):
        try:
            shutil.move(f, destination_folder)
        except:
            assert False

move_files_to_folder(train_images, os.path.join(WORKING_DIR, 'datasets/human_detect/images/train'))
move_files_to_folder(val_images, os.path.join(WORKING_DIR, 'datasets/human_detect/images/val'))

In [None]:
train_images = os.listdir(os.path.join(WORKING_DIR, 'datasets/human_detect/images/train'))
val_images = os.listdir(os.path.join(WORKING_DIR, 'datasets/human_detect/images/val'))
for_images = train_images + val_images
for dataset, all_annotations in people_data.items():
    frames_count, size_img = datasets_videos_fast[dataset]
    for frame_i in tqdm(range(frames_count)):
        if frame_i not in all_annotations:
            continue
        annotations = all_annotations[frame_i]
        len_annotations = len(annotations)
        img_name = f'{dataset.split("/")[-1].split("_")[0]}_{frame_i}_{len_annotations}.jpg'
        if img_name in for_images:
            annotation_name = f'{dataset.split("/")[-1].split("_")[0]}_{frame_i}_{len_annotations}.txt'
            annotations = [[0, 
                            annotation[0] / size_img[0], 
                            annotation[1] / size_img[1], 
                            annotation[2] / size_img[0] - annotation[0] / size_img[0], 
                            annotation[3] / size_img[1] - annotation[1] / size_img[1]] for annotation in annotations]
            annotations = [[annotation[0], 
                            annotation[1] + annotation[3] / 2, 
                            annotation[2] + annotation[4] / 2, 
                            annotation[3], annotation[4]] for annotation in annotations]
            text = '\n'.join(map(lambda x: ' '.join(map(str, x)), annotations))
            annotation_name = f'{dataset.split("/")[-1].split("_")[0]}_{frame_i}_{len_annotations}.txt'
            if img_name in train_images:
                with open(os.path.join(WORKING_DIR, f'datasets/human_detect/labels/train/{annotation_name}'), 'w') as f:
                    f.write(text)
            else:
                with open(os.path.join(WORKING_DIR, f'datasets/human_detect/labels/val/{annotation_name}'), 'w') as f:
                    f.write(text)

# yolov5 из github

In [None]:
!git clone https://github.com/ultralytics/yolov5.git

Cloning into 'yolov5'...
remote: Enumerating objects: 12171, done.[K
remote: Counting objects: 100% (31/31), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 12171 (delta 16), reused 6 (delta 2), pack-reused 12140[K
Receiving objects: 100% (12171/12171), 12.60 MiB | 26.66 MiB/s, done.
Resolving deltas: 100% (8373/8373), done.


In [None]:
data = """train: ../datasets/human_detect/images/train/
val:  ../datasets/human_detect/images/val/
nc: 1
names: ["human"]"""
with open('yolov5/data/human_detect.yaml', 'w') as f:
    f.write(data)

In [None]:
# обучать yolov5l6 оказалось слишком долго
!python yolov5/train.py --img 640 --cfg yolov5l.yaml --batch 12 --epochs 1 --data human_detect.yaml --weights yolov5l.pt --name yolo_human_detect