In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Imports
import torch
from utils.data import build_dataset,build_xview_dataset, unwrap_collate_fn
from attrdict import AttrDict
from utils.group_by_aspect_ratio import create_aspect_ratio_groups, GroupedBatchSampler
from utils.fcos import fcos_resnet50_fpn
# from torchvision.models.detection import fcos_resnet50_fpn
from torchvision.models.detection import ssd300_vgg16

import datetime
import time
from tqdm import tqdm

from utils.engine import train_and_eval,eval_model
import torchvision
from pycocotools import mask as coco_mask
from pycocotools.coco import COCO
import math
from lr_schedulers import WarmupWrapper
from torch.optim.lr_scheduler import MultiStepLR

from utils.model import make_custom_object_detection_model_fcos, build_frcnn_model
import matplotlib.pyplot as plt
from train import load_dataset
import numpy as np

from determined.experimental import Determined
from PIL import Image, ImageDraw
# remove warnings
import warnings
warnings.filterwarnings('ignore')
from collections import OrderedDict
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction, predict, get_prediction
from sahi.utils.file import download_from_url
from sahi.utils.cv import read_image
from IPython.display import Image as Imagey
# Set up .detignore file so the checkpoints directory is not packaged into future experiments
!echo checkpoints > .detignore

In [None]:
def define_exp(lr=None,momentum=None,epochs=None):
    '''
    '''
    model = build_frcnn_model(dataset.num_classes)
    model.to(device)

    optimizer = torch.optim.SGD(
            model.parameters(),
            lr=lr,
            momentum=momentum,
            weight_decay=1e-4,
            nesterov="nesterov",
        )

    scheduler_cls = WarmupWrapper(MultiStepLR)
    scheduler = scheduler_cls(
        'linear',  # warmup schedule
        100,  # warmup_iters
        0.001,  # warmup_ratio
        optimizer,
        [177429, 236572],  # milestones
        0.1,  # gamma
    )
    print("Start training")
    start_time = time.time()

    losses, model = train_and_eval(model,data_loader,data_loader_test,optimizer,scheduler,device,cpu_device,epochs=epochs)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print(f"Training time {total_time_str}")

def load_determined_state_dict(ckpt):
    '''
    Removes module from state dict keys as determined saves model in DataParallel format:
    https://discuss.pytorch.org/t/solved-keyerror-unexpected-key-module-encoder-embedding-weight-in-state-dict/1686/4
    '''
    new_state_dict = OrderedDict()
    for k, v in ckpt['models_state_dict'][0].items():
        name = k[7:] # remove `module.`
        new_state_dict[name] = v
    return new_state_dict
def visualize_pred(inv_tensor,res,targets_t):
    '''
    '''
    img = Image.fromarray((255.*inv_tensor.cpu().permute((1,2,0)).numpy()).astype(np.uint8))
    draw = ImageDraw.Draw(img)
    # draw ground truth
    print("Num GT Boxes: ",targets_t[0]['boxes'].shape[0])
    for ind,(b,l) in enumerate(zip(targets_t[0]['boxes'],targets_t[0]['labels'])):
        # print(b.detach().numpy(), s.detach().numpy())
        x,y,x2,y2 = b.detach().numpy()
        # print( x,y,x2,y2,l.item())
        draw.rectangle([x,y,x2,y2],fill=None,outline=(0,255,0))
        draw.text([x,y-10],"{}".format(l),fill=None,outline=(0,255,0))

    idx = list(res.keys())[0]
    print("Num Pred Boxes: ",res[idx]['boxes'].shape[0])
    for ind,(b,s,l) in enumerate(zip(res[idx]['boxes'],res[idx]['scores'],res[idx]['labels'])):
        # print(b.detach().numpy(), s.detach().numpy())
        x,y,x2,y2 = b.detach().numpy()
        # print( x,y,x2,y2,s.item(),l.item())
        draw.rectangle([x,y,x2,y2],fill=None,outline=(255,0,0))
        draw.text([x,y-10],"{}".format(l),fill=None,outline=(255,0,0))



    plt.imshow(img)
def predict(model,images_t,targets_t):
    '''
    '''
    cpu_device = torch.device('cpu')
    device = torch.device('cuda')
    images_t = list(image.to(device) for image in images_t)
    outputs = model(images_t)
    # print(x,outputs)
    outputss = []
    for t in outputs:
        outputss.append({k: v.to(cpu_device) for k, v in t.items()})
    # model_time = time.time() - model_time
    res = {target["image_id"].item(): output for target, output in zip(targets_t, outputss)}
    visualize_pred(images_t[0],res,targets_t)
    return res

<img src="https://raw.githubusercontent.com/determined-ai/determined/master/determined-logo.png" align='right' width=150 />

# Building a Geospatial Detection Model with Determined

<img src="https://www.cis.upenn.edu/~jshi/ped_html/images/PennPed00071_1.png" width=400 />


This notebook will walk through the benefits of building a Deep Learning model with Determined.  We will build an object detection model trained on the [Penn-Fudan Database for Pedestrian Detection and Segmentation](https://www.cis.upenn.edu/~jshi/ped_html/).


# Table of Contents


<font size="3">
<ol>
  <li>What Modeling looks like Today</li>
  <li>Building a model with Determined
    <ol>
      <li>Single GPU training</li>
      <li>Cluster-scale multi-GPU training</li>
      <li>Adapative hyperparameter search</li>
    </ol>
  </li>
</ol>
</font>

# What modeling looks like without Determined

<font size="4">First let's look at the kind of work modelers do today.  Below, we train a model we found on Github and modified, printing validation set metrics after each epoch.</font>

In [None]:
DEVICE='cuda'
#Data loading code
device = torch.device(DEVICE)
cpu_device = torch.device(DEVICE)
print("Loading data")
TRAIN_DATA_DIR='determined-ai-xview-coco-dataset/train_sliced_no_neg/train_images_300_02/'
VAL_DATA_DIR='determined-ai-xview-coco-dataset/val_sliced_no_neg/val_images_300_02/'

dataset, num_classes, dataset_test,data_loader, data_loader_test= load_dataset(TRAIN_DATA_DIR=TRAIN_DATA_DIR,VAL_DATA_DIR=VAL_DATA_DIR,train_batch_size=8,test_batch_size=8)
print("Create Model")
model = build_frcnn_model(dataset.num_classes)
model.to(device)

optimizer = torch.optim.SGD(
        model.parameters(),
        lr=0.01,
        momentum=0.9,
        weight_decay=1e-4,
        nesterov="nesterov",
    )

scheduler_cls = WarmupWrapper(MultiStepLR)
scheduler = scheduler_cls(
    'linear',  # warmup schedule
    100,  # warmup_iters
    0.001,  # warmup_ratio
    optimizer,
    [177429, 236572],  # milestones
    0.1,  # gamma
)


In [None]:
print("Start training")
start_time = time.time()

losses, model = train_and_eval(model,data_loader,data_loader_test,optimizer,scheduler,device,cpu_device,epochs=1)

total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print(f"Training time {total_time_str}")

<font size="4">We might also roll our own simple hyperparameter tuning:</font>

```python
def hp_grid_search():
    for lr in np.logspace(-4, -2, num=10):
        for m in np.linspace(0.7, 0.95, num=10):
            print(f"Training model with learning rate {lr} and momentum {m}")
            losses, model = train_and_eval(model,data_loader,data_loader_test,optimizer,scheduler,device,cpu_device,epochs=1)


try:
    hp_grid_search()
except KeyboardInterrupt:
    pass
```

# What's Missing?

<font size="4">This approach works in theory -- we could get a good model, save it, and use it for predictions.  But we're missing a lot from the ideal state:</font>
<font size="4">
<ul style="margin-top: 15px">
  <li style="margin-bottom: 10px">Distributed training</li>
  <li style="margin-bottom: 10px">Parallel search</li>
  <li style="margin-bottom: 10px">Intelligent checkpointing</li>
  <li style="margin-bottom: 10px">Interruptibility and fault tolerance</li>
  <li                            >Logging of experiment configurations and results </li>
</ul>
</font>

<font size=6><b>Scaled Experimentation with Determined</b></font>

With less work than setting up a limited random search, you can get started with Determined.

## Our First Experiment

For our first example, we run a simple single-GPU training job with fixed hyperparameters.

<img src="https://raw.githubusercontent.com/determined-ai/public_assets/main/images/StartAnExperiment.png" align=left width=330/>

In [None]:
!det e create const-distributed.yaml ..

And evaluate its performance:

In [None]:
experiment_id = 666

In [None]:
checkpoint = Determined().get_experiment(experiment_id).top_checkpoint(sort_by='mAP')
checkpoint.uuid

Note: for this example, need to download checkpoint ahead of time

In [None]:
model=build_frcnn_model(num_classes=61)
ckpt = torch.load(f'/run/determined/workdir/xview-torchvision-coco/checkpoints/{checkpoint.uuid}/state_dict.pth',map_location='cpu')
m_ckpt = load_determined_state_dict(ckpt)
# print(m_ckpt)
model.load_state_dict(m_ckpt)
model.to('cuda')
model.eval()
print("Done Loading")

Let's see the prediction of the trained model

In [None]:
model.eval()
print("Done")

In [None]:
# 

In [None]:
for _ in range(10):
    images_t, targets_t= next(iter(data_loader_test))

In [None]:
# images_t[0]

In [None]:
result = predict(model,images_t,targets_t)

# Inference on Large Imagery

In [None]:
Imagey("/run/determined/workdir/1065.png")

In [None]:
from pycocotools.coco import COCO
coco = COCO('/run/determined/workdir/val.json')
cat_mapping = {0:'background'}
cat_mapping.update({str(int(i['id'])+1):i['name'] for i in coco.cats.values()})

In [None]:
from collections import Counter
from terminaltables import AsciiTable

def report_objects_detected(result):
    '''
    result: sahi.PredictionResult
    '''
    names = []
    for r in result.object_prediction_list:
        # print(r.bbox.to_xyxy())
        # print(r.score.value)
        # print(r.category.id)
        # print(r.category.name)
        names.append(r.category.name)
    resulting_objects_detected = list(dict(Counter(names)).items())
    resulting_objects_detected.insert(0,('Object Categories','Number of Objects Detected'))
    print(AsciiTable(resulting_objects_detected).table)


In [None]:
detection_model = AutoDetectionModel.from_pretrained(
    model_type='torchvision',
    model=model,
    confidence_threshold=0.5,
    image_size=800,
    device="cuda", # or "cuda:0"
    load_at_init=True,
    category_mapping=cat_mapping
)
result = get_sliced_prediction(
    "/run/determined/workdir/1065.png",
    detection_model,
    slice_height = 320,
    slice_width = 320,
    overlap_height_ratio = 0.2,
    overlap_width_ratio = 0.2,
    verbose=2
)
report_objects_detected(result)
result.export_visuals(export_dir="/run/determined/workdir/")
# Imagey("/run/determined/workdir/prediction_visual.png")

# Compare to Ground Truth annotations, see strong recall and precision with model predictions

In [None]:
# Load ground truth coco annotations

coco = COCO('/run/determined/workdir/val.json')
cat_mapping = {0:'background'}
cat_mapping.update({str(int(i['id'])+1):i['name'] for i in coco.cats.values()})
# coco.imgs[1]
# get specific annotation ids
ann_ids = coco.getAnnIds([1])
anns = [coco.anns[a] for a in ann_ids]
from PIL import Image, ImageDraw
# im = Image.open('/run/determined/workdir/prediction_visual.png')
im = Image.open('/run/determined/workdir/1065.png')
draw = ImageDraw.Draw(im)
names_gt = []
for a in anns:
    x,y,w,h = a['bbox']
    l = a['category_id']
    name = cat_mapping[str(l)]
    names_gt.append(name)
    x2 = x+w
    y2 = y+h
    # print(x,y,x2,y2,l)
    draw.rectangle([x,y,x2,y2],outline=(255,255,255),fill=None,width=2)
    draw.text([x,y-10],"{}:{}".format(l,name))


In [None]:
resulting_objects_detected = list(dict(Counter(names_gt)).items())
resulting_objects_detected.insert(0,('Object Categories','Number of GT Objects in Image'))
print(AsciiTable(resulting_objects_detected).table)

In [None]:
im


In [None]:
# predict(model, 'test.jpg', 0.5)

## Scaling up to Distributed Training

Determined makes it trivial to move from single-GPU to multi-GPU (and even multi-node) training. Here we'll simply modify the config above to request 8 GPUs instead of 1, and increase the global batch size to increase the data throughput 

In [None]:
!cat const-distributed.yaml

In [None]:
!det experiment create distributed.yaml .

<img src="https://raw.githubusercontent.com/determined-ai/public_assets/main/images/4GPUexperiment.png" align=left width=530 />

## Run Distributed Hyperparameter Tuning

By simply building a config file and adapting our code to meet the determined trial interface, we can conduct a sophisticated hyperparamter search.  Instructions for how to configure different types of experiments [can be found in the Determined documentation.](https://docs.determined.ai/latest/how-to/index.html)

In [None]:
# !cat search.yaml# Andrew(11/2021):TODO

## Create your Experiment

Now that you've described your experiment, you'll simply need to use the command line interface to submit it to the Determined Cluster.  

In [None]:
# !det experiment create search.yaml . # Andrew(11/2021):TODO

<img src="https://raw.githubusercontent.com/determined-ai/public_assets/main/images/12GPUexperiment.png" align=left width=800 />

# Model Registry

After training, we'll want to actually use our model in some sort of system.  Determined provides a model registry to version your trained models, making them easy to retrieve for inference.

In [None]:
experiment_id = 666
MODEL_NAME = "satellite-imagery-detection

In [None]:
# Get the best checkpoint from the training
checkpoint = Determined().get_experiment(experiment_id).top_checkpoint()

In [None]:
model = check_model(MODEL_NAME)

In [None]:
model.register_version(checkpoint.uuid)

# Inference

Once your model is versioned in the model registry, using that model for inference is straightforward:

In [None]:
# Retrieve latest checkpoint for a given model name
latest_version = model.get_version()

In [None]:
# Load the model checkpoint into memory
inference_model = latest_version.checkpoint.load().model

In [None]:
# Run inference as before
predict(inference_model, 'test.jpg')