In [None]:
import os, sys
import time
import copy
import math

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import torchvision
import cv2

sys.path.append('..')

In [None]:
list_models = torchvision.models.list_models()

list_models

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs, device):
    since = time.time()

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f}'.format(phase, epoch_loss))

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

    return model

In [None]:
from src.capture import LazyCapture

def random_load(path): # 'path' can be either a '.avi' or a wildcard for '.jpeg' images
    length, W, H, C = None, None, None, None
    cap = cv2.VideoCapture(path)
    ret, _ = cap.read()
    if not ret:
        cap.release()
        raise Exception("Couldn't read video file: " + path)
    else:
        length, W, H, C = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), 3
    cap.release()
    perm = list(np.random.permutation(np.arange(length)))
    def frames(reverse=False):
        def _ahead():
            cap = cv2.VideoCapture(path)
            for i in perm:
                cap.set(cv2.CAP_PROP_POS_FRAMES, i)
                _, frame = cap.read()
                yield i, frame
            cap.release()
        def _reverse():
            cap = cv2.VideoCapture(path)
            for i in reversed(perm):
                cap.set(cv2.CAP_PROP_POS_FRAMES, i)
                _, frame = cap.read()
                yield i, frame
            cap.release()
        return _reverse() if reverse else _ahead()
    return LazyCapture(length, W, H, C, frames)

In [None]:
def capture_size(cap, dtype=np.int32):
    return np.array([cap.W(), cap.H()], dtype=dtype)

In [None]:
import itertools as it
class FramesDataset(torch.utils.data.IterableDataset):
    def __init__(self, capture, labels, length=None):
        self.capture = capture
        self.labels = labels
        self.length = length

    def __iter__(self):
        def frames():
            count = 0
            for i, frame in self.capture.frames():
                if self.length is not None and count >= self.length:
                    break
                if i in self.labels:
                    frame = torch.from_numpy(np.transpose(frame, axes=(2, 0, 1)).astype(np.float32)) / 255.0
                    label = torch.Tensor(self.labels[i])
                    yield frame, label
                    count = count + 1
        return iter(frames())

    def __len__(self):
        return len(self.labels) if self.length is not None else self.length

In [None]:
from src.labels import load_labels, l2arr, larr2i

In [None]:
cap2 = random_load('../data/pw1_A1_0315_part2.avi')
lab2 = load_labels('../data/pw1_A1_0315_part2.txt')

cap3 = random_load('../data/pw1_A1_0315_part3.avi')
lab3 = load_labels('../data/pw1_A1_0315_part3.txt')

In [None]:
from src.cleaning import remove_outside_petri, remove_borders

In [None]:
cap2.apply(remove_outside_petri((cap2.W() // 2, cap2.H() // 2), ((cap2.W() // 2) - 10, (cap2.H() // 2) - 20)))
cap2.apply(remove_borders(10, hard=True), shape=(cap2.W() - 20, cap2.H() - 20, 3))
lab2 = {k: (l2arr(v) - 10.0) / capture_size(cap2, np.float32) for k, v in lab2.items()}

cap3.apply(remove_outside_petri((cap3.W() // 2, cap3.H() // 2), ((cap3.W() // 2) - 10, (cap3.H() // 2) - 20)))
cap3.apply(remove_borders(10, hard=True), shape=(cap3.W() - 20, cap3.H() - 20, 3))
lab3 = {k: (l2arr(v) - 10.0) / capture_size(cap3, np.float32) for k, v in lab3.items()}

In [None]:
from src.debug import print_frame

In [None]:
# Test
i2 = cap2.frame(0, index=False)
print_frame()(i2[0], i2[1], (larr2i(lab2[i2[0]] * capture_size(cap2, np.float32)), None))
i3 = cap3.frame(0, index=False)
print_frame()(i3[0], i3[1], (larr2i(lab3[i3[0]] * capture_size(cap3, np.float32)), None))

### Transfer learning

In [None]:
def requires_grad_false(model):
    for param in model.parameters():
        param.requires_grad = False
    return model

In [None]:
train_loader = torch.utils.data.DataLoader(FramesDataset(cap3, lab3, length=400), batch_size=4)
val_loader = torch.utils.data.DataLoader(FramesDataset(cap3, lab3, length=100), batch_size=4)

#### Test1

In [None]:
model = torchvision.models.vgg19(pretrained=True)
model = requires_grad_false(model)

n_feat = model.classifier[6].in_features
features = list(model.classifier.children())[:-1]
features.extend([torch.nn.Linear(n_feat, 2)])
model.classifier = torch.nn.Sequential(*features)

model = model.to(device)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [None]:
print(model)

In [None]:
model = train_model(model, {"train": train_loader, "val": val_loader}, criterion, optimizer, 2, device)

In [None]:
model.eval()
with torch.no_grad():
    for inputs, labels in it.islice(val_loader, 10):
        inputs = inputs.to(device)
        labels = labels.to(device)

        pred = model(inputs)

        for i in range(inputs.size()[0]):
            frame = np.transpose((inputs[i].cpu().numpy() * 255.0).astype(np.uint8), axes=(1, 2, 0))
            true_pos = larr2i(labels[i].cpu().numpy() * capture_size(cap2, dtype=np.float32))
            pred_pos = larr2i(pred[i].cpu().numpy() * capture_size(cap2, dtype=np.float32))
            print(true_pos, pred_pos)
            print_frame(False)(-1, frame, (true_pos, pred_pos))

### Tests

In [None]:
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image

#### Faster-RCNN

In [None]:
# Step 1: Initialize model with the best available weights
weights = torchvision.models.detection.FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9)
model.eval()

In [None]:
# Step 2: Initialize the inference transforms
preprocess = weights.transforms()
# Step 3: Apply inference preprocessing transforms
batch = [preprocess(i3)]
# Step 4: Use the model and visualize the prediction
prediction = model(batch)[0]
labels = [weights.meta["categories"][i] for i in prediction["labels"]]
box = draw_bounding_boxes(i3, boxes=prediction["boxes"],
                          labels=labels,
                          colors="red",
                          width=4, font_size=30)
im = to_pil_image(box.detach())
im.show()

#### Keypoint R-CNN

In [None]:
# Step 1: Initialize model with the best available weights
weights = torchvision.models.detection.KeypointRCNN_ResNet50_FPN_Weights
model = torchvision.models.detection.keypointrcnn_resnet50_fpn(weights=weights, box)
model.eval()

### Old

##### Main

In [None]:
cap.apply(remove_outside_petri((cap.W() // 2, cap.H() //2), int(cap.W()*0.9) // 2, (int(cap.H()*0.9) // 2)))
cap.apply(lambda i, f: (select_channel(0)(i, f) / 255.0).astype(np.float32), shape=(cap.W(), cap.H(), 1))

labels = {k: (v[0] / float(cap.W()), v[1] / float(cap.H()))  for k, v in labels.items()}

In [None]:
print("Info: ", cap.length(), cap.W(), cap.H(), cap.C())
print("Labels: ", len(labels))

In [None]:
full_loss_history = []
loss_history = []
for epoch in range(1, EPOCHS + 1):
    model.train() 

    loss_history = []
    batch_idx = 1
    for data, label in loader:
        data, label = data.to(device), label.to(device)

        pred = model(data)
        loss = crit(pred, label)

        opt.zero_grad()
        loss.backward()
        opt.step()

        loss_history.append(loss.item())

        if batch_idx % 1000 == 0:
            print(
                f"Train Epoch: {epoch}-{batch_idx} batch_loss={loss.item():0.2e}"
            )
        batch_idx = batch_idx + 1

    full_loss_history.extend(loss_history)

In [None]:
# ===== Plot training curves =====
n_train = len(full_loss_history)
t_train = EPOCHS * np.arange(n_train) / n_train
t_val = np.arange(1, EPOCHS + 1)
plt.figure()
plt.plot(t_train, full_loss_history, label="Train")
plt.legend()
plt.xlabel("Epoch")
plt.ylabel("Loss")

##### Eval

In [None]:
from src.debug import print_frame

In [None]:
cap_e = LazyCapture.load('../data/pw1_A1_0315_part2.avi')
labels_e = load_labels('../data/pw1_A1_0315_part2.txt')

cap_e.apply(remove_outside_petri((cap.W() // 2, cap.H() //2), int(cap.W()*0.9) // 2, (int(cap.H()*0.9) // 2)))
cap_e.apply(lambda i, f: (select_channel(0)(i, f) / 255.0).astype(np.float32), shape=(cap.W(), cap.H(), 1))

labels_e = {k: (v[0] / float(cap.W()), v[1] / float(cap.H()))  for k, v in labels_e.items()}

In [None]:
loader_e = torch.utils.data.DataLoader(FramesDataset(cap_e, labels_e), batch_size=1)

In [None]:
with torch.no_grad():
    for data, label in it.islice(loader_e, 10):
        data, label = data.to(device), label.to(device)
        pred = model(data)

        frame = np.transpose((data[0].cpu().numpy() * 255.0).astype(np.uint8), axes=(1, 2, 0))
        true_pos = (label[0].cpu().numpy() * np.array([cap.W(), cap.H()])).astype(np.int32)
        pred_pos = (pred.cpu().numpy() * np.array([cap.W(), cap.H()])).astype(np.int32)
        print_frame(True)(0, frame, (true_pos, pred_pos))
        