In [21]:
import torch
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [27]:
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [36]:
# sample execution (requires torchvision)
from PIL import Image
from torchvision import transforms

model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
# model.to(mps_device)
model.eval()

input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

# move the input and model to GPU for speed if available
if torch.backends.mps.is_available():
    input_batch = input_batch.to(mps_device)
    model.to(mps_device)

with torch.no_grad():
    output = model(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
probabilities = torch.nn.functional.softmax(output[0], dim=0)
print(probabilities)

Using cache found in /Users/aryanindarapu/.cache/torch/hub/pytorch_vision_v0.10.0


tensor([-1.6531e+00, -4.3505e+00, -1.8172e+00, -4.2143e+00, -3.1914e+00,
         3.4164e-01,  1.0877e+00,  5.9350e+00,  8.0425e+00, -7.0242e-01,
        -9.4130e-01, -6.0822e-01, -2.4097e-01, -1.9946e+00, -1.5288e+00,
        -3.2656e+00, -5.5800e-01,  1.0524e+00,  1.9211e-01, -4.7202e+00,
        -3.3880e+00,  4.3048e+00, -1.0997e+00,  4.6132e+00, -5.7418e-03,
        -5.3437e+00, -4.7378e+00, -3.3974e+00, -4.1287e+00,  2.9064e-01,
        -3.2955e+00, -6.7051e+00, -4.7232e+00, -4.1778e+00, -2.1859e+00,
        -2.9469e+00,  3.0465e+00, -3.5882e+00, -6.3890e+00, -4.4203e+00,
        -3.3685e+00, -5.0983e+00, -4.9006e+00, -5.5235e+00, -3.7234e+00,
        -4.0204e+00,  2.6998e-01, -4.4702e+00, -5.6617e+00, -5.4880e+00,
        -2.6801e+00, -3.2129e+00, -1.6294e+00, -5.2289e+00, -2.7495e+00,
        -2.6286e+00, -1.8206e+00, -2.3196e+00, -5.2806e+00, -3.7652e+00,
        -3.0987e+00, -4.1421e+00, -5.2531e+00, -4.6504e+00, -3.5815e+00,
        -4.0189e+00, -4.0008e+00, -4.5512e+00, -3.2

In [47]:
import numpy as np
import matplotlib.pyplot as plt

from supervised_train import train
from unet_model import UNet
from dataset import *
from quantitative_results import *

In [32]:
config = {
  'seed': 1, # random seed
  'input_shape': (240, 360), # size of input images (height, width)
  'output_shape': (240, 360), # size of output target
  'n_train': 5, # number of training images per view
  'batch_size': 2, # number of images per optimization step
  'lr': 1e-2, # learning rate
  'n_epochs': 200, # number of passes through training data
  # optionally perform random cropping, specify integer < max(H, W) to use cropping for training
  'crop_size': 'None',
  # fraction of training epochs to lower learning rate by 1/10, e.g. [0.6, 0.8]
  # lowers learning rate at epochs 120 and 160 if we have 200 training epochs
  'milestones': [0.8],
  'views': ['Almond at Washington North'], # list of views for training
  'images_path': 'Annotations-Images', # path to RGB images
  'gt_path': 'Annotations-GT', # path to ground-truth segmentation masks
  'log_path': 'Almond at Washington North' # path to directory you make for saving results
}

In [55]:
# set up model
n_channels = 3 # RGB images
model = UNet(n_channels)

# set up datasets
seed = config['seed']
np.random.seed(seed) 
views = config['views']
logging_path = config['log_path']
if not os.path.exists(logging_path):
    os.mkdir(logging_path)

# make initial datasets for each view
print('Loading datasets...')
datasets = []
for v in views:
    datasets.append(BFSEvaluationDataset(config['images_path'],
                                         config['gt_path'],
                                         [v],
                                         config['input_shape'],
                                         config['output_shape']))
# split each view into training and validation sets
training_datasets, validation_datasets = [], []
n_train = config['n_train'] # number of training images per view, remainder are validation
save_train_indices = []
save_val_indices = []
for d in datasets:
    train_indices = np.random.choice(np.arange(len(d)), size=n_train, replace=False)
    val_indices = np.array([i for i in range(len(d)) if i not in train_indices])
    save_train_indices.append([int(i) for i in train_indices])
    save_val_indices.append([int(i) for i in val_indices])
    training_datasets.append((d, train_indices))
    validation_datasets.append((d, val_indices))
# combine into one training, one validation dataset
config['train_indices'] = save_train_indices
config['validation_indices'] = save_val_indices
print(training_datasets)
train_dataset = BFSConcatDataset(training_datasets)
val_dataset = BFSConcatDataset(validation_datasets) 
with open(os.path.join(logging_path, 'config.json'), 'w') as f:
    json.dump(config, f)


model, stat_dict = train(model, train_dataset, val_dataset, config)  
# saving and clean-up
torch.save(model, os.path.join(logging_path, 'model.pt'))

Loading datasets...
[(<dataset.BFSEvaluationDataset object at 0x29b4d43d0>, array([27, 35, 40, 38,  2]))]


  0%|          | 0/200 [00:12<?, ?it/s]


KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(10, 10))
plt.subplot(221)
plt.plot(stat_dict['Training']['Precision'])
plt.xlabel('Epoch')
plt.ylabel('Precision')
plt.title('Training precision')
plt.subplot(222)
plt.plot(stat_dict['Training']['Recall'])
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.title('Training recall')
plt.subplot(223)
plt.plot(stat_dict['Training']['F-measure'])
plt.xlabel('Epoch')
plt.ylabel('F-measure')
plt.title('Training F-measure')
plt.subplot(224)
plt.plot(stat_dict['Training']['Blob recall'])
plt.xlabel('Epoch')
plt.ylabel('Blob recall')
plt.title('Training blob recall')

: 

In [None]:
plt.figure(figsize=(10, 10))
plt.subplot(221)
plt.plot(stat_dict['Validation']['Precision'])
plt.xlabel('Epoch')
plt.ylabel('Precision')
plt.title('Validation precision')
plt.subplot(222)
plt.plot(stat_dict['Validation']['Recall'])
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.title('Validation recall')
plt.subplot(223)
plt.plot(stat_dict['Validation']['F-measure'])
plt.xlabel('Epoch')
plt.ylabel('F-measure')
plt.title('Validation F-measure')
plt.subplot(224)
plt.plot(stat_dict['Validation']['Blob recall'])
plt.xlabel('Epoch')
plt.ylabel('Blob recall')
plt.title('Validation blob recall')

: 

## Evaluation

In [50]:
from quantitative_results import *

def evaluate_view(model, config, view):
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    thresholds = [0.5] # only checking one foreground probability threshold
    test_dataset = BFSEvaluationDataset(config['images_path'], config['gt_path'], [test_view], config['input_shape'], config['output_shape'])
    test_precision, test_recall, test_f_measure, test_blob_recall = model_metrics(test_dataset, model, thresholds, device)
    return test_precision[0], test_recall[0], test_f_measure[0], test_blob_recall[0]

In [51]:
# check the Annotations-GT or Annotations-Images folders for available scenes
test_views = ['Almond at Washington East', 'Buffalo Grove at Deerfield North', 'Deerfield at Saunders South', 'Delany at Sunset North']
for test_view in test_views:
    p, r, f, b = evaluate_view(model, config, test_view)
    print('View: {}'.format(test_view))
    print('Precision = {:.3f} | Recall = {:.3f} | F-measure = {:.3f} | Blob recall = {:.3f}'.format(p, r, f, b))
    print('')

View: Almond at Washington East
Precision = 0.000 | Recall = 0.000 | F-measure = 0.000 | Blob recall = 0.000



KeyboardInterrupt: 

In [44]:
def visualize_predictions(model, config, view, num_examples=5, indices=None):
    # you may optionally specify the image indices within the dataset you would like to use
    # otherwise, num_examples random images will be sampled from the dataset
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    dataset = test_dataset = BFSEvaluationDataset(config['images_path'], config['gt_path'], [view], config['input_shape'], config['output_shape'])
    if indices is not None:
        num_images = len(indices)
        image_indices = indices
    else:
        num_images = num_examples
        image_indices = np.random.choice(np.arange(len(dataset)), size=num_images, replace=False)
    with torch.no_grad():
        plt.figure(figsize=(10, 8))
        for n in range(num_images):
            image, gt = dataset[n]
            image_numpy = image.permute(1, 2, 0).numpy()
            image_tensor = image.unsqueeze(0).to(device)
            prediction_tensor = model(image_tensor)
            prediction_numpy = prediction_tensor.squeeze(0, 1).cpu().numpy()
            gt_numpy = gt.squeeze(0).numpy()
            plt.subplot(num_images, 3, 3*n+1)
            plt.imshow(image_numpy)
            plt.axis(False)
            plt.title('Original image')
            plt.subplot(num_images, 3, 3*n+2)
            plt.imshow(gt_numpy, 'gray')
            plt.axis(False)
            plt.title('Ground-truth image')
            plt.subplot(num_images, 3, 3*n+3)
            plt.imshow(prediction_numpy, vmin=0, vmax=1, cmap='gray')
            plt.axis(False)
            plt.title('Model prediction image')
        plt.tight_layout()

In [None]:
view = 'Almond at Washington East'
num_examples = 5
visualize_predictions(model, config, view, num_examples=num_examples)