In [None]:
import getpass
import os
import torch
import torchvision
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
import torch.nn as nn
import pandas as pd
import pydicom
import matplotlib.pyplot as plt
import torch.nn.functional as F
import sys
from sklearn.metrics import roc_curve, auc
import copy
import torchvision.models as models
import tarfile
from torch.autograd import Variable


In [None]:
#checking what kind of system you are using
try:
  import google.colab
  from google.colab import drive
  from google.colab import files
  IN_COLAB = True
except:
  IN_COLAB = False
try:
    hostname = !hostname
    if 'lab' in hostname[0] and '.eng.utah.edu' in hostname[0]:
        IN_CADE = True
    else:
        IN_CADE = False
except:
    IN_CADE = False

assert(not IN_CADE or not IN_COLAB)

In [None]:
def define_gpu_to_use(minimum_memory_mb = 3800):
    gpu_to_use = None
    try: 
        os.environ['CUDA_VISIBLE_DEVICES']
        print('GPU already assigned before: ' + str(os.environ['CUDA_VISIBLE_DEVICES']))
        return
    except:
        pass
    torch.cuda.empty_cache()
    for i in range(16):
        free_memory = !nvidia-smi --query-gpu=memory.free -i $i --format=csv,nounits,noheader
        if free_memory[0] == 'No devices were found':
            break
        free_memory = int(free_memory[0])
        if free_memory>minimum_memory_mb-500:
            gpu_to_use = i
            break
    if gpu_to_use is None:
        print('Could not find any GPU available with the required free memory of ' +str(minimum_memory_mb) + 'MB. Please use a different system for this assignment.')
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_to_use)
        print('Chosen GPU: ' + str(gpu_to_use))
        x = torch.rand((256,1024,minimum_memory_mb-500)).cuda()
        x = torch.rand((1,1)).cuda()
        del x

In [None]:
#setting the gpu that will be used, testing if it has enough available memory, and reserving the needed memory
define_gpu_to_use()

Model Functions

In [None]:
def rel_bbox(size, bbox):
    bbox = bbox.astype(np.float32)
    bbox[:,0] /= size[0]
    bbox[:,1] /= size[1]
    bbox[:,2] += 1
    bbox[:,2] /= size[0]
    bbox[:,3] += 1
    bbox[:,3] /= size[1]
    return bbox
def bbox_transform(ex_rois, gt_rois):
    ex_widths = ex_rois[:,2] - ex_rois[:,0] + 1.0
    ex_heights = ex_rois[:,3] - ex_rois[:,1] + 1.0
    ex_ctr_x = ex_rois[:,0] + 0.5 * ex_widths
    ex_ctr_y = ex_rois[:,1] + 0.5 * ex_heights

    gt_widths = gt_rois[:,2] - gt_rois[:,0] + 1.0
    gt_heights = gt_rois[:,3] - gt_rois[:,1] + 1.0
    gt_ctr_x = gt_rois[:,0] + 0.5 * gt_widths
    gt_ctr_y = gt_rois[:,1] + 0.5 * gt_heights

    targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
    targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
    targets_dw = np.log(gt_widths / ex_widths)
    targets_dh = np.log(gt_heights / ex_heights)

    targets = np.array([targets_dx, targets_dy, targets_dw, targets_dh]).T
    return targets
def calc_ious(ex_rois, gt_rois):
    ex_area = (1. + ex_rois[:,2] - ex_rois[:,0]) * (1. + ex_rois[:,3] - ex_rois[:,1])
    gt_area = (1. + gt_rois[:,2] - gt_rois[:,0]) * (1. + gt_rois[:,3] - gt_rois[:,1])
    area_sum = ex_area.reshape((-1, 1)) + gt_area.reshape((1, -1))

    lb = np.maximum(ex_rois[:,0].reshape((-1, 1)), gt_rois[:,0].reshape((1, -1)))
    rb = np.minimum(ex_rois[:,2].reshape((-1, 1)), gt_rois[:,2].reshape((1, -1)))
    tb = np.maximum(ex_rois[:,1].reshape((-1, 1)), gt_rois[:,1].reshape((1, -1)))
    ub = np.minimum(ex_rois[:,3].reshape((-1, 1)), gt_rois[:,3].reshape((1, -1)))

    width = np.maximum(1. + rb - lb, 0.)
    height = np.maximum(1. + ub - tb, 0.)
    area_i = width * height
    area_u = area_sum - area_i
    ious = area_i / area_u
    return ious
def reg_to_bbox(img_size, reg, box):
    img_width, img_height = img_size
    bbox_width = box[:,2] - box[:,0] + 1.0
    bbox_height = box[:,3] - box[:,1] + 1.0
    bbox_ctr_x = box[:,0] + 0.5 * bbox_width
    bbox_ctr_y = box[:,1] + 0.5 * bbox_height

    bbox_width = bbox_width[:,np.newaxis]
    bbox_height = bbox_height[:,np.newaxis]
    bbox_ctr_x = bbox_ctr_x[:,np.newaxis]
    bbox_ctr_y = bbox_ctr_y[:,np.newaxis]

    out_ctr_x = reg[:,:,0] * bbox_width + bbox_ctr_x
    out_ctr_y = reg[:,:,1] * bbox_height + bbox_ctr_y

    out_width = bbox_width * np.exp(reg[:,:,2])
    out_height = bbox_height * np.exp(reg[:,:,3])

    return np.array([
        np.maximum(0, out_ctr_x - 0.5 * out_width),
        np.maximum(0, out_ctr_y - 0.5 * out_height),
        np.minimum(img_width, out_ctr_x + 0.5 * out_width),
        np.minimum(img_height, out_ctr_y + 0.5 * out_height)
    ]).transpose([1, 2, 0])

def non_maximum_suppression(sc, bboxs, iou_threshold=0.7, score_threshold=0.6):
    nroi = sc.shape[0]
    idx = np.argsort(sc)[::-1]
    rb = 0
    while rb < nroi and sc[idx[rb]] >= score_threshold:
        rb += 1
    if rb == 0:
        return []
    idx = idx[:rb]
    sc = sc[idx]
    bboxs = bboxs[idx,:]
    ious = calc_ious(bboxs, bboxs)

    res = []
    for i in range(rb):
        if i == 0 or ious[i, :i].max() < iou_threshold:
            res.append(bboxs[i])

    return res

def plot (name, title, legendx, legendy, x, y, n_epoch, frame_size = 256,labelx = 'Epoch', labely = 'Loss'):
    i = 0
    x = np.array(x).flatten('F')
    y = np.array(y).flatten('F')
    framex = []
    framey = []
    
    while i*frame_size < len(x):
        framex.append(np.mean(x[i*frame_size:min(len(x),(i+1)*frame_size)]))
        framey.append(np.mean(y[i*frame_size:min(len(y),(i+1)*frame_size)]))
        i += 1
    
    a = np.arange(0,len(x),len(x)/len(framex))
    b = a/len(y)*n_epoch
    a = a/len(x)*n_epoch
    
    plt.figure()
    plt.plot(a,framex)
    plt.plot(b,framey)
    plt.xlabel(labelx)
    plt.ylabel(labely)
    plt.title(title)
    plt.legend([legendx,legendy])
    plt.savefig(name,dpi=600)
    plt.show()

In [None]:
class SlowROIPool(nn.Module):
    def __init__(self, output_size):
        super().__init__()
        self.maxpool = nn.AdaptiveMaxPool2d(output_size)
        self.size = output_size

    def forward(self, images, rois, roi_idx):
        n = rois.shape[0]
        h = images.size(2)
        w = images.size(3)
        x1 = rois[:,0]
        y1 = rois[:,1]
        x2 = rois[:,2]
        y2 = rois[:,3]

        x1 = np.floor(x1 * w).astype(int)
        x2 = np.ceil(x2 * w).astype(int)
        y1 = np.floor(y1 * h).astype(int)
        y2 = np.ceil(y2 * h).astype(int)
        
        res = []
        for i in range(n):
            img = images[roi_idx[i]].unsqueeze(0)
            img = img[:, :, y1[i]:y2[i], x1[i]:x2[i]]
            img = self.maxpool(img)
            res.append(img)
        res = torch.cat(res, dim=0)
        return res

In [None]:
class RCNN(nn.Module):
    def __init__(self):
        super().__init__()

        rawnet = torchvision.models.vgg16_bn(pretrained=True)
        self.seq = nn.Sequential(*list(rawnet.features.children())[:-1])
        self.roipool = SlowROIPool(output_size=(7, 7))
        self.feature = nn.Sequential(*list(rawnet.classifier.children())[:-1])

        _x = Variable(torch.Tensor(1, 3, 224, 224))
        _r = np.array([[0., 0., 1., 1.]])
        _ri = np.array([0])
        _x = self.feature(self.roipool(self.seq(_x), _r, _ri).view(1, -1))
        feature_dim = _x.size(1)
        self.cls_score = nn.Linear(feature_dim, N_CLASS+1)
        self.bbox = nn.Linear(feature_dim, 4*(N_CLASS+1))
        
        self.cel = nn.CrossEntropyLoss()
        self.sl1 = nn.SmoothL1Loss()

    def forward(self, inp, rois, ridx):
        res = inp
        res = self.seq(res)
        res = self.roipool(res, rois, ridx)
        res = res.detach()
        res = res.view(res.size(0), -1)
        feat = self.feature(res)

        cls_score = self.cls_score(feat)
        bbox = self.bbox(feat).view(-1, N_CLASS+1, 4)
        return cls_score, bbox

    def calc_loss(self, probs, bbox, labels, gt_bbox):
        loss_sc = self.cel(probs, labels)
        lbl = labels.view(-1, 1, 1).expand(labels.size(0), 1, 4)
        mask = (labels != 0).float().view(-1, 1).expand(labels.size(0), 4)
        loss_loc = self.sl1(bbox.gather(1, lbl).squeeze(1) * mask, gt_bbox * mask)
        lmb = 1.0
        loss = loss_sc + lmb * loss_loc
        return loss, loss_sc, loss_loc

evaluation

In [None]:
def test_image(img, img_size, rois, orig_rois):
    nroi = rois.shape[0]
    ridx = np.zeros(nroi).astype(int)
    sc, tbbox = rcnn(img, rois, ridx)
    sc = nn.functional.softmax(sc)
    sc = sc.data.cpu().numpy()
    tbbox = tbbox.data.cpu().numpy()
    bboxs = reg_to_bbox(img_size, tbbox, orig_rois)

    res_bbox = []
    res_cls = []

    for c in range(1, N_CLASS+1):
        c_sc = sc[:,c]
        c_bboxs = bboxs[:,c,:]

        boxes = non_maximum_suppression(c_sc, c_bboxs, iou_threshold=0.3, score_threshold=0.6)
        res_bbox.extend(boxes)
        res_cls.extend([c] * len(boxes))

    if len(res_cls) == 0:
        for c in range(1, N_CLASS+1):
            c_sc = sc[:,c]
            c_bboxs = bboxs[:,c,:]

            boxes = non_maximum_suppression(c_sc, c_bboxs, iou_threshold=0.3, score_threshold=0.3)
            res_bbox.extend(boxes)
            res_cls.extend([c] * len(boxes))
        res_bbox = res_bbox[:1]
        res_cls = res_cls[:1]

    print(res_cls)

    return np.array(res_bbox), np.array(res_cls)

In [None]:
def test_epoch():
    Nimg = test_imgs.size(0)
    Nc = Nimg

    perm = np.random.permutation(Nimg)[:Nc]

    bbox_preds = []
    bbox_cls = []

    for i in range(Nimg):
        bbox_preds.append(np.ndarray((0, 4)))
        bbox_cls.append(np.ndarray((0, 1)))

    for i in range(Nc):
        pi = perm[i]
        img = Variable(test_imgs[pi:pi+1], volatile=True)
        ridx = []
        glo_ids = []

        info = test_img_info[pi]
        img_size = info['img_size']
        idxs = info['idxs']

        idxs = np.array(idxs)
        rois = test_roi[idxs]
        orig_rois = test_orig_roi[idxs]

        res_bbox, res_cls = test_image(img, img_size, rois, orig_rois)
        bbox_preds[pi] = res_bbox
        bbox_cls[pi] = res_cls

    evaluate.evaluate(bbox_preds, bbox_cls)

    print('Test complete')

Read Data

In [None]:
#change to location of root directory
root = "/hdd/"
folders = ["pringles_1k_1554610789151371002",
           "black_decker_1k_1554675460418814897",
          "lego_toy_1k_1554674193206299066",
          "pringles_1k_1554677386797606945"]


In [None]:
from torch.utils.data.dataset import Dataset
from torchvision import transforms
import torch
from PIL import Image
import numpy as np
class ourDataset(Dataset):
    def __init__(self, folder):
        
        labelFileName = folder + "/labels.txt"
        file = open(labelFileName,"r")
        
        lines = file.readlines()
        self.count = len(lines)
        self.examples = []
        for line in lines:
            array = line.split(" ")
            example = [array[0],array[1],
                       int(array[2]),int(array[3]),
                       int(array[4]),int(array[5])]
            #print(example)
            self.examples.append(example)
            
        self.image_folder = folder + "/images/"
        self.transformations = \
            transforms.Compose([transforms.ToTensor()])
        
    def __getitem__(self, index):
        # stuff
        
        single_image_name = self.image_folder +self.examples[index][0]
        img_as_img = Image.open(single_image_name)
        img = self.transformations(img_as_img) 
        label = 1
        
        
        return (img, label)

    def __len__(self):
        return self.count # of how many data(images?) you have

In [None]:
batch_size = 16
validation_split = .2
random_seed= 42

In [None]:
from torch.utils.data.sampler import SubsetRandomSampler

datasets = []
for f in folders:
    folder = root + f
    datasets.append(ourDataset(folder))
dataset = torch.utils.data.ConcatDataset(datasets)

# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler)


In [None]:
N_CLASS = 20
model = RCNN().cuda()
