In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import copy
import random
from tqdm import tqdm
import torch
import torchvision
from torchvision import datasets
import torchvision.transforms as transforms


In [None]:
%%sh
# Download the data - you need to do this only once
wget --no-verbose --output-document=image_me.jpg https://github.com/chrirupp/cv_course/raw/main/data/image_me.jpg

In [None]:
class Visualizer():
    def __init__(self, num_rows=1, num_cols=1, figsize=(5,5), axis_off=True, title='', tight=False, cm=None):
        self.fig, self.axs = plt.subplots(num_rows, num_cols, figsize=figsize, squeeze=False)
        # remove ticks
        if axis_off:
          plt.setp(plt.gcf().get_axes(), xticks=[], yticks=[])
        # set colormap
        if cm is not None:
            plt.set_cmap(cm)
        # set supertitle
        self.fig.suptitle(title)
        if tight:
            self.fig.subplots_adjust(top=0.88)

    def add_image_subplot(self, i, j, image, normalize=False, title_str=''):
        if normalize:
            image = self.normalize_image(image)
        if len(image.shape) == 3:
            #BGR -> RGB
            image = image[:, :, ::-1]
        self.axs[i, j].imshow(image)
        self.axs[i, j].set_title(title_str)

    def add_stem_subplot(self, i, j, x, y, title_str=''):
        self.axs[i, j].stem(x, y)
        self.axs[i, j].set_title(title_str)

    def add_subplot(self, i, j, data, title_str=''):
        self.axs[i, j].plot(data)
        self.axs[i, j].set_title(title_str)

    def add_bar_subplot(self, i, j, x, y, title_str=''):
        self.axs[i, j].bar(x, y)
        self.axs[i, j].set_title(title_str)

    def add_scatter_subplot_with_labels(self, i, j, data, labels, legend=None, title_str=''):
        scatter = self.axs[i, j].scatter(data[:,0], data[:,1], c=labels)
        scatter.set_cmap('jet')
        if legend is not None:
            plt.legend(handles=scatter.legend_elements()[0], labels=legend)
        self.axs[i, j].set_title(title_str)

    def add_scatter_subplot_with_txt(self, i, j, data, txt, title_str=''):
        self.axs[i, j].scatter(data[:,0], data[:,1])
        for idx, txt in enumerate(txt):
            self.axs[i, j].annotate(txt, (data[idx,0], data[idx,1]))
        self.axs[i, j].set_title(title_str)

    @staticmethod
    def normalize_image(image):
        img = np.float64(image) - np.min(image)
        img /= np.max(img)
        return img

In [None]:
# load image
img = cv2.imread('image_me.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# detect people with HOG
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
boxes, weights = hog.detectMultiScale(img, winStride=(8,8), padding=(32,32), scale=1.05)
grad = gray.copy()
angles = gray.copy()
grad, angles = hog.computeGradient(img, grad, angles)
print(grad.shape)

# draw boxes
for (x, y, w, h) in boxes:
    cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)

# visualize
vis = Visualizer(1, 1, figsize=(10,10), axis_off=True)
vis.add_image_subplot(0, 0, img, title_str='HOG')

# extract crop from detection
vis = Visualizer(1, 3, figsize=(10,10), axis_off=True)

x, y, w, h = boxes[0]
crop = gray[y:y+h, x:x+w]
grad = grad[y:y+h, x:x+w]
angles = np.arctan2(grad[:, :, 1], grad[:, :, 0])
# convert to color hsv
angles = (angles + np.pi) / (2 * np.pi) * 180
angles = angles[:,:,None]
angles = np.tile(angles, (1,1,3))
angles[:,:,1] = 255
angles[:,:,2] = 255
# clip to hsv range
angles = cv2.cvtColor(np.uint8(angles), cv2.COLOR_HSV2RGB)
# resize to 64x128
crop = cv2.resize(crop, (64, 128))
grad = cv2.resize(grad, (64, 128))
angles = cv2.resize(angles, (64, 128))

vis.add_image_subplot(0, 0, np.tile(crop[:,:,None], (1,1,3)), title_str='Crop')
vis.add_image_subplot(0, 1,grad[:,:,0]**2 + grad[:,:,1]**2, title_str='Gradient')
vis.add_image_subplot(0, 2, angles, title_str='Angles')



In [None]:
# selective search for object proposals
img = cv2.imread('image_me.jpg')
# resize 50%
img = cv2.resize(img, (0,0), fx=0.5, fy=0.5)
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(img)
ss.switchToSelectiveSearchFast()
rects = ss.process()
output = img.copy()
for i, rect in enumerate(rects):
    x, y, w, h = rect
    cv2.rectangle(output, (x, y), (x+w, y+h), (0, 255, 0), 1)

# visualize
vis = Visualizer(1, 1, figsize=(10,10), axis_off=True)
vis.add_image_subplot(0, 0, output, title_str=f'Selective Search {len(rects)} proposals')

In [None]:
model = torch.hub.load('pytorch/vision', 'resnet50', weights=torchvision.models.resnet.ResNet50_Weights.DEFAULT)
class_names = torchvision.models.resnet.ResNet18_Weights.DEFAULT.meta["categories"]

img = cv2.imread('image_me.jpg')
img = cv2.resize(img, (0,0), fx=0.5, fy=0.5)
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(img)
ss.switchToSelectiveSearchFast()
rects = ss.process()

In [None]:
# classify proposals
model.eval()
output = img.copy()
transform = transforms.Compose([transforms.ToPILImage(),
                                transforms.Resize((224,224)),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
for rect in tqdm(rects):
    x, y, w, h = rect
    # find square crop
    s = max(w, h)
    x = x + (w - s)//2
    y = y + (h - s)//2
    if x < 0 or y < 0:
        continue
    crop = img[y:y+h, x:x+w]
    crop = transform(crop)
    crop = crop[None, :, :, :]
    with torch.no_grad():
        out = model(crop)
        probs = torch.nn.functional.softmax(out[0], dim=0)
        prob, predicted = torch.max(probs, 0)
        if prob < 0.5:
            continue
        print("found")
        label = class_names[predicted]
        cv2.rectangle(output, (x, y), (x+w, y+h), (0, 255, 0), 1)
        cv2.putText(output, label, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

# visualize
vis = Visualizer(1, 1, figsize=(10,10), axis_off=True)
vis.add_image_subplot(0, 0, output, title_str=f'Classified proposals')
