In [None]:
!pip install dioptra
# Restart the kernel for this to take effect
import dioptra
print('dioptra version:', dioptra.__version__)

In [None]:
!pip install datasets torchvision transformers evaluate

In [None]:
!pip install -r requirements.txt

In [None]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
!pip install wandb

In [None]:
import time
import torch
from torchvision import transforms

import numpy as np
import os

from dioptra.inference.torch.torch_runner import TorchInferenceRunner
from dioptra.lake.utils import select_datapoints, delete_datapoints
from dioptra.lake.datasets import Dataset as DioptraDataset
from dioptra.miners.random_miner import RandomMiner
from dioptra.miners.weighted_entropy_miner import WeightedEntropyMiner
import pandas as pd
import datetime

# import yolov7 specific things
import matplotlib.pyplot as plt
import cv2
import yaml
from utils.torch_utils import torch_distributed_zero_first
from utils.datasets import letterbox, LoadImagesAndLabels
from utils.general import non_max_suppression_mask_conf
from models.experimental import attempt_load
from detectron2.modeling.poolers import ROIPooler
from detectron2.structures import Boxes
from detectron2.utils.memory import retry_if_cuda_oom
from detectron2.layers import paste_masks_in_image

In [None]:
img_bucket = 'YOUR BUCKET NAME'
img_dir = f's3://{img_bucket}/instance_seg/cocoval'
method = 'weighted_entropy'
# Jerry's dev org
os.environ['DIOPTRA_API_KEY'] = 'YOUR API KEY HERE'
os.environ['DIOPTRA_UPLOAD_BUCKET'] = img_bucket
os.environ['DIOPTRA_UPLOAD_PREFIX'] = '/media/logs/'

In [None]:
#!aws s3 cp ./coco/images/val2017 {img_dir} --recursive --quiet

In [None]:
local_img_dir = './coco/images/val2017'
dirlist = os.listdir(local_img_dir)
files = []
for item in dirlist:
    file_name = os.path.join(local_img_dir, item)
    if os.path.isfile(file_name):
        files.append(file_name)

n = len(files)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# Do some yolo7 stuff
from utils.general import check_file, check_img_size
with open('data/hyp.scratch.mask.yaml') as f:
    hyp = yaml.load(f, Loader=yaml.FullLoader)
model = attempt_load('yolov7-mask.pt')
model = model.to(device)
_ = model.eval()
names = model.names
pooler_scale = model.pooler_scale
pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2)

batch_size = 5
gs = max(int(model.stride.max()), 32)
imgsz = check_img_size(640, gs)
cfg = check_file('./cfg/yolov7-mask.yaml')
num_workers = 8
world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
rank = global_rank
rect = False


In [None]:
import re

def _get_layer_by_name(model, name):
    split = name.split('.')
    current_layer = model
    for part in split:
        if re.match('\[[0-9-]+\]', part):
            index = int(part.replace('[', '').replace(']', ''))
            current_layer = current_layer[index]
        else:
            current_layer = getattr(current_layer, part)
    return current_layer

print(_get_layer_by_name(model, 'model.128'))

In [None]:
# build a modified yolov7 dataloader

# it needs to be a finite dataloader as we are inferring over a set of points
def create_dataloader(path, imgsz, batch_size, stride, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
                      rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix='', shuffle = False, finite = False):
    with torch_distributed_zero_first(rank):
        dataset = LoadImagesAndLabels(path, imgsz, batch_size,
                                      augment=augment,  # augment images
                                      hyp=hyp,  # augmentation hyperparameters
                                      rect=rect,  # rectangular training
                                      cache_images=cache,
                                      single_cls=False,
                                      stride=int(stride),
                                      pad=pad,
                                      image_weights=image_weights,
                                      prefix=prefix)

    batch_size = min(batch_size, len(dataset))
    nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers])  # number of workers
    if finite or image_weights:
        loader = torch.utils.data.DataLoader
    dataloader = loader(dataset,
                        batch_size=batch_size,
                        num_workers=nw,
                        shuffle=shuffle,
                        pin_memory=True,
                        collate_fn=LoadImagesAndLabels.collate_fn)
    return dataloader, dataset


In [None]:
# generate the metadata for the images
from tqdm import tqdm
test_path = './coco/val2017.txt'

dataloader, dataset = create_dataloader(test_path, imgsz, batch_size, gs,
                                            hyp=hyp, augment=False, cache=True, rect=rect, rank=rank,
                                            world_size=world_size, workers=num_workers,
                                            image_weights=[], quad=False, shuffle=False, finite = True)

num_range = [i for i in range(50)]

_inds = num_range
if _inds is not None:
    dataset.imgs = [dataset.imgs[i] for i in _inds]
    dataset.img_files = [dataset.img_files[i] for i in _inds]
    dataset.label_files = [dataset.label_files[i] for i in _inds]
    dataset.labels = [dataset.labels[i] for i in _inds]
    dataloader.dataset.imgs = dataset.imgs
    dataloader.dataset.img_files = dataset.img_files
    dataloader.dataset.label_files = dataset.label_files
    dataloader.dataset.labels = dataset.labels


metadata = []
for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc='Creating metadata')):
    for i in range(len(paths)):
        image = cv2.imread(paths[i])
        image = letterbox(image, 640, stride=64, auto=True)[0]
        image = transforms.ToTensor()(image)
        image = torch.tensor(np.array([image.numpy()]))

        file_name = paths[i].split('/')[-1]
        uri = os.path.join(img_dir, file_name)
        _, _, height, width = image.shape
        metadata.append({'image_metadata': {'uri': uri, 'width': width, 'height': height}})


In [None]:
!pip install imantics

In [None]:
embedding_resolution = (5,5)
def logits_transformer(logits, embeddings, model, pooler, hyp, metadata = []):
    # Assumes model being used has an MT head
    raw_prediction = logits['test']
    train_out = logits['bbox_and_cls']
    attn = logits['attn']
    mask_iou = logits['mask_iou']
    bases = logits['bases']
    sem = logits['sem']
    record = {}
    sem_bases = torch.cat([bases, sem], dim=1)
    names = model.names
    pooler_scale = model.pooler_scale
    
    output, output_mask, output_mask_score, output_ac, output_ab = non_max_suppression_mask_conf(raw_prediction, attn, bases,
                                                                        pooler, hyp, conf_thres=0.25, iou_thres=0.65,
                                                                        merge=False, mask_iou=None)
    # BELOW: nms threshold values are based on active learning experiments
    # output, output_mask, output_mask_score, output_ac, output_ab = non_max_suppression_mask_conf(raw_prediction, attn, bases,
    #                                                                     pooler, hyp, conf_thres=0.001332, iou_thres=0.3,
    #                                                                     merge=False, mask_iou=None)

    records = []
    for i in range(len(output)): #iterate over each image
        record = {}
        record['task_type'] = 'INSTANCE_SEGMENTATION'
        record['model_name'] = 'yolov7==0.1' #version of the model

        pred, pred_masks = output[i], output_mask[i]
        metrics = calc_metrics(raw_prediction[i,...])
        height = metadata[i]['image_metadata']['height']
        width = metadata[i]['image_metadata']['width']
        #calculate shift for each box dimension
        width_shift = (640 - width) / 2
        height_shift = (640 - height) / 2
        base = sem_bases[i]
        bbox_list = []
        if pred is not None:
            boxes = pred[:,:4]
            # # truncate box to correct size
            for i in range(len(boxes)):
                if boxes[i][0] > width:
                    boxes[i][0] = torch.clamp(boxes[i][0], 0, width)
                if boxes[i][1] > height:
                    boxes[i][1] = torch.clamp(boxes[i][1], 0, height)
                if boxes[i][2] > width:
                    boxes[i][2] = torch.clamp(boxes[i][2], 0, width)
                if boxes[i][3] > height:
                    boxes[i][3] = torch.clamp(boxes[i][3], 0, height)
            bboxes = Boxes(boxes)
            pooled_bases = pooler([base[None]], [bboxes])
            pooled_bases = torch.nn.functional.interpolate(pooled_bases, embedding_resolution, mode="bilinear")
            pooled_bases = pooled_bases.flatten(start_dim=1).cpu().numpy()
            original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution'])
            pred_masks = retry_if_cuda_oom(paste_masks_in_image)( original_pred_masks, bboxes, (height,width), threshold=0.5)
            pred_masks_np = pred_masks.detach().cpu().numpy()
            pred_cls = pred[:, 5].detach().cpu().numpy()
            pred_conf = pred[:, 4].detach().cpu().numpy()
            nbboxes = bboxes.tensor.detach().cpu().numpy().astype(int)
            # output is in record format with task type, model_name, bboxes
            # bboxes is list of dicts with class_names, confidences, segmentation_mask, objectness
            for one_mask, bbox, cls_name, conf, embedding in zip(pred_masks_np, nbboxes, pred_cls, pred_conf, pooled_bases):
                if conf < 0.25:
                    continue

                item = {'top': int(bbox[1] - height_shift),
                    'left': int(bbox[0] - width_shift),
                    'height': int(bbox[3] - bbox[1]),
                    'width': int(bbox[2] - bbox[0]),
                    'class_name': names[int(cls_name)],
                    'confidence': float(conf),
                    'embedding': embedding.tolist(),
                }
                bbox_list.append(item)
        record['bboxes'] = bbox_list
        record['metrics'] = {'weighted_entropy': float(metrics)}
        records.append(record)
    return records

def calc_metrics(prediction):
    confs = prediction[...,4:5]*prediction[...,5:]
    entropy = torch.special.entr(confs)
    return np.sum(entropy.cpu().numpy())

def data_transformer(batch):
    return batch[0].float()/255.0


In [None]:
from functools import partial 

delete_datapoints([])
test_path = './coco/val2017.txt'
dataloader, dataset = create_dataloader(test_path, imgsz, batch_size, gs,
                                            hyp=hyp, augment=False, cache=True, rect=rect, rank=rank,
                                            world_size=world_size, workers=num_workers,
                                            image_weights=[], quad=False, shuffle=False, finite = True)
num_range = [i for i in range(50)]

#Assumes roi function input is the ROI Pooler from Detectron 2
pooler_scale = model.pooler_scale
pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,),
                   sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2)

_inds = num_range
if _inds is not None:
    dataset.imgs = [dataset.imgs[i] for i in _inds]
    dataset.img_files = [dataset.img_files[i] for i in _inds]
    dataset.label_files = [dataset.label_files[i] for i in _inds]
    dataset.labels = [dataset.labels[i] for i in _inds]
    dataloader.dataset.imgs = dataset.imgs
    dataloader.dataset.img_files = dataset.img_files
    dataloader.dataset.label_files = dataset.label_files
    dataloader.dataset.labels = dataset.labels

runner = TorchInferenceRunner(
    model = model,
    model_type='INSTANCE_SEGMENTATION',
    model_name = 'yolov7',
    data_transform = data_transformer,
    datapoints_metadata=metadata,
    logits_layer='model.128',
    class_names=names,
    logits_transform=partial(logits_transformer, model = model, pooler=pooler, hyp = hyp),
    channel_last=False,
)
runner.max_batch_size = batch_size - 1
runner.run(dataloader)

In [None]:
time.sleep(2)
filters=[{
    'left': 'predictions.model_name',
    'op': '=',
    'right': 'yolov7'
}]
if method == 'weighted_entropy':
    miner = WeightedEntropyMiner(
        select_filters=filters,
        size=3,
        display_name='weighted entropy miner',
        model_name='yolov7',
    )
else:
    miner = RandomMiner(
        select_filters=filters,
        size=3,
        display_name='entropy miner',
        model_name='yolov7',
    )
miner.run()

miner_results = miner.get_results()

print(miner_results)

In [None]:
training_dataset = DioptraDataset()
training_dataset.create('train')
training_dataset.add_datapoints(miner_results)

In [None]:
###### Do some training ########

In [None]:
# Update the metadata now that our points have been uploaded to Dioptra
# Begin by grabbing datapoints for the current task:
filters = []
datapoints_df = select_datapoints(filters=filters)

metadata = []
for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc='Creating metadata')):
    for i in range(len(paths)):
        # grab matching datapoint id for the current image path
        image = cv2.imread(paths[i])
        image = letterbox(image, 640, stride=64, auto=True)[0]
        image = transforms.ToTensor()(image)
        image = torch.tensor(np.array([image.numpy()]))
        _, _, height, width = image.shape

        im_path = paths[i]
        file_name = paths[i].split('/')[-1]
        uri = os.path.join(img_dir, file_name)
        for index, row in datapoints_df.iterrows():  
            if row['metadata']['uri'] == uri:
                row_id = index
                break
        datapoint_id = datapoints_df.iloc[row_id]['id']
        metadata.append({'id': datapoint_id, 'image_metadata': {'uri': uri, 'width': width, 'height': height}})


In [None]:
# active learning loop: 
num_iter = 5 # number of active learning steps
for i in range(num_iter):
        
    #Run another inference
    runner = TorchInferenceRunner(
        model = model,
        model_type='INSTANCE_SEGMENTATION',
        model_name = 'yolov7_{}'.format(i),
        data_transform = data_transformer,
        datapoints_metadata=metadata,
        logits_layer='model.128',
        class_names=names,
        logits_transform=partial(logits_transformer, model = model, pooler=pooler, hyp = hyp),
        channel_last=False,
    )
    runner.max_batch_size = batch_size - 1
    runner.run(dataloader)
    
    training_df = training_dataset.download_datapoints()
    filters=[{
        'left': 'predictions.model_name',
        'op': '=',
        'right': 'yolov7_{}'.format(i)
    },{
        'left': 'id',
        'op': 'not in',
        'right': training_df['id'].values.tolist() if len(training_df) > 0 else []
    }]
    if method == 'weighted_entropy':
        miner = WeightedEntropyMiner(
            select_filters=filters,
            size=3,
            display_name='weighted entropy miner',
            model_name='yolov7_{}'.format(i),
        )
    else:
        miner = RandomMiner(
            select_filters=filters,
            size=3,
            display_name='entropy miner',
            model_name='yolov7_{}'.format(i),
        )
    miner.run()

    miner_results = miner.get_results()
    training_dataset.add_datapoints(miner_results)
    
    ###### label points and begin training ######
