# Import Libraries

In [1]:
import cv2
import matplotlib.pyplot as plt
import skimage.io
import numpy as np
import torch, torchvision
import PIL.Image
import os
from torchvision.transforms import functional as func
from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader

%matplotlib inline

In [2]:
results_path = 'D:/UDENAR/Electronic Engineering/Ninth Semester/SESCCA/Computer vision results/'
image_path = results_path+'Example Images/1.jpg'
video_path = results_path+'Example Images/pedestrians_1.mp4'

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

- Helper Functions

In [3]:
# Create dataset and dataloader from video frames
class FramesDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
    def __getitem__(self, idx):
        x = self.data[idx]
        if self.transform:
            x = self.data[idx]
            x = self.transform(x)
        return x
    def __len__(self):
        return len(self.data)

In [4]:
def get_video_frames(video_path):
    vidcap = cv2.VideoCapture(video_path)
    success, image = vidcap.read()
    images_list = []
    images_list.append(image)
    while success:
        success, image = vidcap.read()
        if not success:
            break
        images_list.append(image)
    vidcap.release()
    return images_list

In [5]:
def get_color_palette(num_colors):
    #Create a color pallette, selecting a color for each class
    palette = torch.tensor([2**25-1, 2**15-1, 2**21-1])
    colors = torch.as_tensor([i for i in range(num_colors)])[:, None]*palette
    colors = (colors%255).numpy().astype('uint8')
    return colors

# Computer Vision Algorithms
## Object Detection
### YOLO
- Load the model

In [None]:
%cd D:\UDENAR\Electronic Engineering\Ninth Semester\SESCCA\Models\Object Detection\yolov5

In [None]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

- Apply YOLO algorithm to an image

In [None]:
results = model(image_path)
results.save(results_path+'/YOLO Detections')
results.pandas().xyxy[0]

- Apply YOLO algorithm to a video

In [None]:
!python detect.py --source "D:\UDENAR\Electronic Engineering\Ninth Semester\SESCCA\Computer vision results\Example Images\pedestrians_1.mp4" --project "D:\UDENAR\Electronic Engineering\Ninth Semester\SESCCA\Computer vision results\YOLO Detections"

- Real-time detection

In [None]:
!python detect.py --source "rtsp://admin:EUISDZ@192.168.1.247" --project "D:\UDENAR\Electronic Engineering\Ninth Semester\SESCCA\Computer vision results\YOLO Detections"

## Semantic Segmentation
### DeepLabV3 model with a ResNet-101 backbone
- Load the model

In [None]:
torch.cuda.empty_cache()
model = torch.hub.load('pytorch/vision:v0.8.0', 'deeplabv3_resnet101', pretrained=True)
model.eval()
model = model.to(device)
preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
colors = get_color_palette(21)

- Apply segmentation algorithm to an image

In [None]:
input_image = PIL.Image.open(image_path)
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)
if torch.cuda.is_available():
    input_batch = input_batch.to(device)
with torch.no_grad():
    output = model(input_batch)['out'][0]
output_predictions = output.argmax(0)
r = PIL.Image.fromarray(output_predictions.byte().cpu().numpy()).resize(input_image.size)
r.putpalette(colors)
r = r.convert('RGB')
r.save(results_path+f'Semantic Segmentation/output.jpg')
r = np.asarray(r)

plt.figure(figsize=(10, 15))
plt.subplot(211)
plt.imshow(input_image)
plt.subplot(212)
plt.imshow(r)
plt.show()

- Apply segmentation algorithm to a video

In [None]:
vidcap = cv2.VideoCapture(video_path)
frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

In [None]:
half = False
half &= device.type != 'cpu'
half

In [None]:
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
out = cv2.VideoWriter(results_path+'Semantic Segmentation/pedestrian_1.mp4', fourcc, 20.0, (1920, 1080))
outputs_list = []
for i in range(1, frames-1):
    print(i)
    success, img = vidcap.read()
    if not success:
        break
    input_tensor = preprocess(img)
    input_batch = input_tensor.unsqueeze(0)
    input_batch = input_batch.to(device)
    with torch.no_grad():
        output = model(input_batch)['out'][0]
    output_prediction = output.argmax(0)
    r = PIL.Image.fromarray(output_prediction.byte().cpu().numpy()).resize((1920, 1080))
    r.putpalette(colors)
    r = np.array(r.convert('RGB'))
    out.write(r)
out.release()

## Instance Segmentation
### Mask R-CNN

In [6]:
%cd C:\Users\usuario\OneDrive\Electronic Engineering\Ninth Semester\SESCCA\Codes\Me\SESCCA-Computer-vision\My Notebooks

C:\Users\usuario\OneDrive\Electronic Engineering\Ninth Semester\SESCCA\Codes\Me\SESCCA-Computer-Vision\My Notebooks


In [7]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T

- Helper functions

In [8]:
def preprocess_data(image, targets, device, output=False, th=0.5):
    th = torch.tensor([th], device=device)
    image = image*255
    image = torch.movedim(image.to(torch.uint8), 0, -1)
    image = PIL.Image.fromarray(np.array(image.cpu()))
    if output:
        ids = torch.nonzero(torch.gt(targets['scores'], th))
        fields = ['boxes', 'labels', 'scores', 'masks']
        targets = {field: targets[field][ids] for field in fields}
        targets['masks'] = torch.gt(targets['masks'][:, 0, 0, :, :], th)
        targets['masks'] = targets['masks'].to(torch.uint8)
        targets['boxes'] = targets['boxes'][:, 0, :]
    boxes = targets['boxes'].cpu().to(torch.int)
    masks = targets['masks'].cpu()    
    
    return image, targets, boxes, masks

In [9]:
def get_img(images, targets, idx, device, th=None):
    output = True if th!=None else False
    img = images[idx]
    targets = targets[idx]
    img, targets, boxes, masks = preprocess_data(img, targets, device,
                                                 output=output, th=th)
    num_objs = len(boxes)
    color = [int(channel) for channel in torch.tensor([255, 0, 0], device='cuda:0')]
    #Add ground truth boxes and masks
    for i in range(num_objs):
        mask = np.array(masks[i])
        contours,_ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
                                      cv2.CHAIN_APPROX_NONE)
        img = cv2.drawContours(np.array(img), contours, -1, color, 2)
        coord = boxes[i]
        img = cv2.rectangle(img, (coord[0], coord[1]), (coord[2], coord[3]),
                            color, 2)
    return targets, img

- Defining the model

In [10]:
model_path = 'D:/UDENAR/Electronic Engineering/Ninth Semester/SESCCA/Models/Instance Segmentation/mask r-cnn 1.1'
model = torch.load(model_path)
model = model.to(device)
model.eval()
mask_transforms = transforms.ToTensor()

- Apply Mask-RCNN to an image

In [None]:
input_image = PIL.Image.open(image_path).convert('RGB')
input_tensor = mask_transforms(input_image)
input_batch = input_tensor.unsqueeze(0)
input_batch = input_batch.to(device)
with torch.no_grad():
    output = model(input_batch)

In [None]:
_, pred_img = get_img(input_batch, output, 0, device, th=0.5)
plt.figure(figsize=(10, 12))
plt.imshow(pred_img)
plt.show()

- Apply Mask-RCNN to a video

In [11]:
vidcap = cv2.VideoCapture(video_path)
frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

In [12]:
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
out = cv2.VideoWriter(results_path+'Instance Segmentation/pedestrian_1.mp4', fourcc, 20.0, (1920, 1080))
for i in range(frames):
    if i%100==0:
        print(i)
    success, img = vidcap.read()
    if not success:
        break
    input_tensor = mask_transforms(img)
    input_batch = input_tensor.unsqueeze(0)
    input_batch = input_batch.to(device)
    with torch.no_grad():
        output = model(input_batch)
    _, pred_img = get_img(input_batch, output, 0, device, th=0.5)
    r = np.array(pred_img)
    out.write(r)
out.release()

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
