In [2]:
import numpy as np
from pycocotools.coco import COCO
from project_paths import *
import project_paths
import matplotlib.image as plt_image
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
from PIL import Image
import torch
import torch.utils.data
from torchvision import transforms, utils
import torchvision.transforms as T
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [12]:
a = np.array([[1,0,0, 0], [1,0,0, 0], [1,0,0, 0]])
b = torch.Tensor(a)
torch.stack([b, b, b]).shape

torch.Size([3, 3, 4])

In [2]:
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, image):
        #image, landmarks = sample['image'], sample['landmarks']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C x H x W
        np_image = np.array(image)
        np_image.transpose((2, 0, 1))
        
        return torch.as_tensor(np_image, dtype=torch.int32)
    
def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

class VideoDataset(torch.utils.data.Dataset):
    def __init__(self, root, video_ids=[0,1,2], channels=[0,1,2], transforms=None):
        self.transform = transforms
        self.root = root
        self.video_ids = video_ids
        self.channels = channels
        
        self.cocos = []
        self.ambient_paths = []
        self.intensity_paths = []
        self.range_paths = []
        self.local_img_ids = []
        
        for vid_id in video_ids:
            amb_root = ambient_path(vid_id)
            int_root = intensity_path(vid_id)
            rng_root = range_path(vid_id)
            coco = COCO(annotation_path(vid_id))
            self.local_img_ids += list(coco.imgs.keys())
            for frame_path in os.listdir(ambient_path(vid_id)):
                self.ambient_paths.append(os.path.join(amb_root, frame_path))
                self.intensity_paths.append(os.path.join(int_root, frame_path))
                self.range_paths.append(os.path.join(rng_root, frame_path))
                self.cocos.append(coco)
                
            
        
    def _get_images(self, idx):
        ambient_frame = plt_image.imread(self.ambient_paths[idx])
        intensity_frame = plt_image.imread(self.intensity_paths[idx])
        range_frame = plt_image.imread(self.range_paths[idx])
        frame = np.stack([
            ambient_frame.sum(axis=2) / 3,
            intensity_frame.sum(axis=2) / 3,
            range_frame.sum(axis=2) / 3,
        ],
        axis=2)
        pil_frame = (frame * 255).astype("uint8")
        channel_filter = self.channels if len(self.channels) > 1 else self.channels[0]
        channel_filtered_frame = pil_frame[:,:,channel_filter]
        pil_image = Image.fromarray(channel_filtered_frame)
        if self.transform:
            return self.transform(pil_image)
            
        return pil_image

    def __len__(self):
        return len(self.ambient_paths)

    def get_coco_anns(self, idx):
        coco = self.cocos[idx]
        coco_img_id = self.local_img_ids[idx]
        ann_ids = coco.getAnnIds(imgIds=coco_img_id)
        anns = coco.loadAnns(ann_ids)
        return coco, anns
    
    def __getitem__(self, idx):
        img = self._get_images(idx)
        coco, anns = self.get_coco_anns(idx)
        boxes = []
        labels = []
        areas = []
        is_crowds = []
        for ann in anns:
            bbox = ann["bbox"]
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = xmin + bbox[2]
            ymax = ymin + bbox[3]
            boxes.append([xmin, ymin, xmax, ymax])
            areas.append(ann["area"])
            labels.append(ann["category_id"])
            is_crowds.append(ann["iscrowd"])

        target = {}
        target["image_id"] = torch.tensor(idx)
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
        target["area"] = torch.as_tensor(areas, dtype=torch.float32)
        target["iscrowd"] = torch.as_tensor(is_crowds, dtype=torch.int64)
        return img, target


    
    
v = VideoDataset(
    project_paths.video_images, 
    channels=[1,2], 
    transforms=get_transform(True)
)
img, trgt = v[200]
#plt.imshow(img, cmap="gray")
coco, anns = v.get_coco_anns(200)
#coco.showAnns(anns, draw_bbox=True)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [3]:
v[0]

(tensor([[[0.0314, 0.0314, 0.0000,  ..., 0.0314, 0.0000, 0.0039],
          [0.0392, 0.0235, 0.0471,  ..., 0.1020, 0.1333, 0.0353],
          [0.1373, 0.0314, 0.0902,  ..., 0.0000, 0.0314, 0.1098],
          ...,
          [0.3451, 0.4902, 0.4471,  ..., 0.2471, 0.4039, 0.4667],
          [0.3922, 0.4118, 0.0392,  ..., 0.3255, 0.5059, 0.5333],
          [0.3961, 0.4471, 0.3020,  ..., 0.3412, 0.3882, 0.4784]],
 
         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0039, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0157, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]]),
 {'image_id': tensor(0),
  'boxes': tensor([[434.6000,  79.4000, 473.2000, 108.7100],
          [459.2000,  78.6500, 484.9400,  96.4500],
          [543.0600,  75.8200, 558

In [4]:
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 8  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [41]:
dir(torchvision.models.detection)

['FasterRCNN',
 'KeypointRCNN',
 'MaskRCNN',
 'RetinaNet',
 'SSD',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_utils',
 'anchor_utils',
 'backbone_utils',
 'faster_rcnn',
 'fasterrcnn_mobilenet_v3_large_320_fpn',
 'fasterrcnn_mobilenet_v3_large_fpn',
 'fasterrcnn_resnet50_fpn',
 'generalized_rcnn',
 'image_list',
 'keypoint_rcnn',
 'keypointrcnn_resnet50_fpn',
 'mask_rcnn',
 'maskrcnn_resnet50_fpn',
 'retinanet',
 'retinanet_resnet50_fpn',
 'roi_heads',
 'rpn',
 'ssd',
 'ssd300_vgg16',
 'ssdlite',
 'ssdlite320_mobilenet_v3_large',
 'transform']

In [22]:
dataset = VideoDataset(project_paths.video_images, transforms=get_transform(False))
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=4, shuffle=False, num_workers=0,
    collate_fn=lambda batch: list(zip(*batch))
)

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [29]:
images,targets = next(iter(data_loader))
print(images[0].shape)

torch.Size([3, 128, 1024])


In [31]:
images,targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]
print("before output")
output = model(images,targets) 
print("done")
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
x = images[0]
print(x.shape)
predictions = model(images)     

before output
done
torch.Size([3, 128, 1024])


In [32]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

from torch.optim.lr_scheduler import StepLR
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

[{'boxes': tensor([[3.0751e+02, 8.1508e+01, 3.1865e+02, 1.2740e+02],
          [2.9821e+02, 4.3597e+01, 3.1985e+02, 1.1972e+02],
          [2.6897e+02, 9.1959e+01, 3.4971e+02, 1.2673e+02],
          [3.0373e+02, 8.0378e+01, 3.1528e+02, 1.2434e+02],
          [3.1182e+02, 8.3331e+01, 3.2207e+02, 1.2435e+02],
          [2.8116e+02, 7.9610e+01, 3.1835e+02, 1.2454e+02],
          [3.0501e+02, 4.8063e+01, 3.2663e+02, 1.2296e+02],
          [1.0038e+02, 0.0000e+00, 1.7314e+02, 1.2436e+02],
          [2.7982e+02, 4.8642e+01, 3.2169e+02, 1.0401e+02],
          [9.9586e+02, 0.0000e+00, 1.0219e+03, 8.1097e+01],
          [2.7567e+02, 6.6898e+01, 3.1535e+02, 1.1013e+02],
          [1.8583e+02, 0.0000e+00, 3.6526e+02, 1.2800e+02],
          [2.8370e+02, 6.4390e+00, 3.3857e+02, 1.2800e+02],
          [7.7005e+01, 5.5709e+00, 2.3600e+02, 1.2800e+02],
          [2.5129e+02, 7.3543e+01, 3.4646e+02, 1.0335e+02],
          [1.2265e+02, 9.3812e+00, 1.5583e+02, 1.0099e+02],
          [2.6861e+00, 3.3626e+

In [13]:
a = np.arange(4*5).reshape(4, 5)
b = np.arange(4*5).reshape(4, 5)*10
c = np.arange(4*5).reshape(4, 5)*100
d = np.stack([a, b, c], axis=2)
e = np.stack([a, b, c])
print(d.shape)
print(e.shape)
#d[:,:,[1,2]]
d

(4, 5, 3)
(3, 4, 5)


array([[[   0,    0,    0],
        [   1,   10,  100],
        [   2,   20,  200],
        [   3,   30,  300],
        [   4,   40,  400]],

       [[   5,   50,  500],
        [   6,   60,  600],
        [   7,   70,  700],
        [   8,   80,  800],
        [   9,   90,  900]],

       [[  10,  100, 1000],
        [  11,  110, 1100],
        [  12,  120, 1200],
        [  13,  130, 1300],
        [  14,  140, 1400]],

       [[  15,  150, 1500],
        [  16,  160, 1600],
        [  17,  170, 1700],
        [  18,  180, 1800],
        [  19,  190, 1900]]])

In [None]:
f = d.transpose((2,0,1))
f

In [None]:
e

In [19]:
q = torch.as_tensor(a)

In [22]:
q[[0,1]]

tensor([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]], dtype=torch.int32)