In [None]:
import json
import os
from pathlib import Path
import time
import copy

import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from fastai.dataset import open_image
import json
from PIL import ImageDraw, ImageFont
import matplotlib.pyplot as plt
from matplotlib import patches, patheffects
import cv2
from tqdm import tqdm

In [None]:
# params
SIZE = 224
EPOCHS = 5
BATCH_SIZE = 32
NUM_WORKERS = 4
SHOW_IMAGES = False

# static
IMAGES = 'images'
ANNOTATIONS = 'annotations'
CATEGORIES = 'categories'
ID = 'id'
NAME = 'name'
IMAGE_ID = 'image_id'
BBOX = 'bbox'
CATEGORY_ID = 'category_id'
FILE_NAME = 'file_name'
IMAGE = 'image'
CATEGORY = 'category'
TRAIN = 'train'
VAL = 'val'
TEST = 'test'

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device:', device)

In [None]:
!ls ../input/pascal/pascal

In [None]:
PATH = Path('../input/pascal/pascal')
list(PATH.iterdir())

In [None]:
train_data = json.load((PATH/'pascal_train2007.json').open())
val_data = json.load((PATH/'pascal_val2007.json').open())
test_data = json.load((PATH/'pascal_test2007.json').open())

print('train:', train_data.keys())
print('val:', val_data.keys())
print('test:', test_data.keys())

In [None]:
train_data[ANNOTATIONS][:1]

In [None]:
train_data[IMAGES][:2]

In [None]:
len(train_data[CATEGORIES])

In [None]:
next(iter(train_data[CATEGORIES]))

In [None]:
categories = {c[ID]:c[NAME] for c in train_data[CATEGORIES]}

# all categories are the same
val_categories = {c[ID]:c[NAME] for c in val_data[CATEGORIES]}
test_categories = {c[ID]:c[NAME] for c in test_data[CATEGORIES]}
assert categories == val_categories == test_categories

print('category count:', len(categories))
print(categories)

In [None]:
IMAGE_PATH = Path(PATH/'JPEGImages/')
list(IMAGE_PATH.iterdir())[:2]

Helper functions for setting up `pandas.DataFrame` fed to the torch `Dataset`

In [None]:
def get_filenames(data):
    filenames = {o[ID]:o[FILE_NAME] for o in data[IMAGES]}
    print('get_id_filename_dict')
    print('length:', len(filenames), 'next item:', next(iter(filenames.items())))
    return filenames

In [None]:
def get_image_ids(data):
    image_ids = [o[ID] for o in data[IMAGES]]
    print('get_image_ids')
    print('length:', len(image_ids), 'next item:', image_ids[0])
    return image_ids

In [None]:
def pascal_bb_hw(bb):
    return bb[2:]

bbox = train_data[ANNOTATIONS][0][BBOX]
pascal_bb_hw(bbox)

In [None]:
def get_image_w_area(data, image_ids):
    image_w_area = {i:None for i in image_ids}
    image_w_area = copy.deepcopy(image_w_area)
    for x in data[ANNOTATIONS]:
        bbox = x[BBOX]
        new_category_id = x[CATEGORY_ID]
        image_id = x[IMAGE_ID]
        h, w = pascal_bb_hw(bbox)
        new_area = h*w
        cat_id_area = image_w_area[image_id]
        if not cat_id_area:
            image_w_area[image_id] = (new_category_id, new_area)
        else:
            category_id, area = cat_id_area
            if new_area > area:
                image_w_area[image_id] = (new_category_id, new_area)
    print('get_image_w_area')
    print('length:', len(image_w_area), 'next item:', next(iter(image_w_area.items())))
    return image_w_area

train data structs

In [None]:
train_filenames = get_filenames(train_data)
train_image_ids = get_image_ids(train_data)
train_image_w_area = get_image_w_area(train_data, train_image_ids)

val data structs

In [None]:
val_filenames = get_filenames(val_data)
val_image_ids = get_image_ids(val_data)
val_image_w_area = get_image_w_area(val_data, val_image_ids)

test data structs

In [None]:
test_filenames = get_filenames(test_data)
test_image_ids = get_image_ids(test_data)
test_image_w_area = get_image_w_area(test_data, test_image_ids)

train data structs (Legacy)

In [None]:
train_filenames = {o[ID]:o[FILE_NAME] for o in train_data[IMAGES]}
print('length:', len(train_filenames))
image1_id, image1_fn = next(iter(train_filenames.items()))
image1_id, image1_fn

In [None]:
train_image_ids = [o[ID] for o in train_data[IMAGES]]
print('length:', len(train_image_ids))
train_image_ids[:BATCH_SIZE]

In [None]:
IMAGE_PATH

In [None]:
image1_path = IMAGE_PATH/image1_fn
image1_path

In [None]:
str(image1_path)

In [None]:
im = open_image(str(IMAGE_PATH/image1_fn))
print(type(im))

In [None]:
im.shape

In [None]:
len(train_data[ANNOTATIONS])

In [None]:
# get the biggest object label per image

In [None]:
train_data[ANNOTATIONS][0]

In [None]:
bbox = train_data[ANNOTATIONS][0][BBOX]
bbox

In [None]:
def fastai_bb(bb):
    return np.array([bb[1], bb[0], bb[3]+bb[1]-1, bb[2]+bb[0]-1])

print(bbox)
print(fastai_bb(bbox))

In [None]:
fbb = fastai_bb(bbox)
fbb

In [None]:
def fastai_bb_hw(bb):
    h= bb[3]-bb[1]+1
    w = bb[2]-bb[0]+1
    return [h,w]

fastai_bb_hw(fbb)

In [None]:
def pascal_bb_hw(bb):
    return bb[2:]

bbox = train_data[ANNOTATIONS][0][BBOX]
pascal_bb_hw(bbox)

show image training example

In [None]:
def show_img(im, figsize=None, ax=None):
    if not ax:
        fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(im)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    return ax

In [None]:
def draw_rect(ax, b):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor='white', lw=2))
    draw_outline(patch, 4)

In [None]:
def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])

In [None]:
def draw_text(ax, xy, txt, sz=14):
    text = ax.text(*xy, txt,
        verticalalignment='top', color='white', fontsize=sz, weight='bold')
    draw_outline(text, 1)

In [None]:
ax = show_img(im)
image1_ann = train_data[ANNOTATIONS][0]
b = image1_ann[BBOX]
print(b)
draw_rect(ax, b)
draw_text(ax, b[:2], categories[image1_ann[CATEGORY_ID]])

Pandas DataFrames

In [None]:
# TRAIN - create a Pandas dataframe for: image_id, filename, category
train_df = pd.DataFrame({
    IMAGE_ID: image_id,
    IMAGE: str(IMAGE_PATH/image_fn),
    CATEGORY: train_image_w_area[image_id][0]
} for image_id, image_fn in train_filenames.items())

print('count:', len(train_df))
print(train_df.iloc[0])
train_df.head()

In [None]:
# VAL - create a Pandas dataframe for: image_id, filename, category
val_df = pd.DataFrame({
    IMAGE_ID: image_id,
    IMAGE: str(IMAGE_PATH/image_fn),
    CATEGORY: val_image_w_area[image_id][0]
} for image_id, image_fn in val_filenames.items())

print('count:', len(val_df))
print(val_df.iloc[0])
val_df.head()

In [None]:
# NOTE: won't work in Kaggle Kernal b/c read-only file system
# BIGGEST_OBJECT_CSV = '../input/pascal/pascal/tmp/biggest-object.csv'
# train_df.to_csv(BIGGEST_OBJECT_CSV, index=False)

subclass Dataset

In [None]:
class BiggestObjectDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        im = open_image(self.df.iloc[idx][IMAGE]) # HW
        resized_image = cv2.resize(im, (SIZE, SIZE)) # HW
        image = np.transpose(resized_image, (2, 0, 1)) # CHW
        
        category =  self.df.iloc[idx][CATEGORY]

        return image, category
    
dataset = BiggestObjectDataset(train_df)
inputs, label = dataset[0]
print('label:', label, 'shape:', inputs.shape)

# DataLoader

In [None]:
BATCH_SIZE = 64
NUM_WORKERS = 0

dataloader = DataLoader(dataset, batch_size=BATCH_SIZE,
                        shuffle=True, num_workers=NUM_WORKERS)

batch_inputs, batch_labels = next(iter(dataloader))

In [None]:
batch_inputs.size()

In [None]:
batch_labels

In [None]:
val_dataset = BiggestObjectDataset(val_df)

val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE,
                            shuffle=True, num_workers=NUM_WORKERS)

In [None]:
dataloaders = {
    'train': dataloader,
    'val': val_dataloader
}

In [None]:
dataset_sizes = {
    'train': len(dataset),
    'val': len(val_dataset)
}

dataset_sizes

In [None]:
# train the model

In [None]:
NUM_CATEGORIES = len(categories)
NUM_CATEGORIES

In [None]:
model_ft = models.resnet18(pretrained=True)

for layer in model_ft.parameters():
    layer.requires_grad = False
    
num_ftrs = model_ft.fc.in_features
print(num_ftrs, NUM_CATEGORIES)

model_ft.fc = nn.Linear(num_ftrs, NUM_CATEGORIES)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

In [None]:
# epoch - w/ train

In [None]:
epoch_losses = []
epoch_accuracies = []

for epoch in tqdm(range(EPOCHS)):
    print('epoch:', epoch)
    running_loss = 0.0
    running_correct = 0

    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # clear gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model_ft(inputs)
        _, preds = torch.max(outputs, dim=1)
        labels_0_indexed = labels - 1
        loss = criterion(outputs, labels_0_indexed)
        
        # backwards pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        running_correct += torch.sum(preds == labels_0_indexed)
        
    epoch_loss = running_loss / len(dataset)
    epoch_acc = running_correct.double().item() / len(dataset)
    epoch_losses.append(epoch_loss)
    epoch_accuracies.append(epoch_acc)
    print('loss:', epoch_loss, 'acc:', epoch_acc)

In [None]:
# epoch - w/ train and val

In [None]:
epoch_loss = {'train': np.inf, 'val': np.inf}
epoch_acc = {'train': 0, 'val': 0}

epoch_losses = {'train': [], 'val': []}
epoch_accuracies = {'train': [], 'val': []}

for epoch in tqdm(range(EPOCHS)):
    print('epoch:', epoch)

    for phase in ['train', 'val']:
        if phase == 'train':
            model_ft.train()
        else:
            model_ft.eval()

        running_loss = 0.0
        running_correct = 0

        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # clear gradients
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                # forward pass
                outputs = model_ft(inputs)
                _, preds = torch.max(outputs, dim=1)
                labels_0_indexed = labels - 1
                loss = criterion(outputs, labels_0_indexed)

                # backwards pass
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_correct += torch.sum(preds == labels_0_indexed)

        epoch_acc[phase] = running_correct.double().item() / len(dataset)
        epoch_loss[phase] = running_loss / len(dataset)
        # running sums
        epoch_losses[phase].append(epoch_loss[phase])
        epoch_accuracies[phase].append(epoch_acc[phase])
    print('phase', phase, 'train loss:', epoch_loss['train'], 'train acc:', epoch_acc['train'], 'val loss:', epoch_loss['val'], 'val acc:', epoch_acc['val'])

Graph loss and accuracy

In [None]:
epoch_losses

In [None]:
epoch_accuracies

check predictions

In [None]:
plt.plot(epoch_losses['train'])
plt.plot(epoch_losses['val'])

In [None]:
plt.plot(epoch_accuracies['train'])
plt.plot(epoch_accuracies['val'])

show predictions

In [None]:
preds_count = len(preds)
fig, axes = plt.subplots(1, preds_count, figsize=(16, 16))
for i, ax in enumerate(axes.flat):
    im = np.transpose(inputs[i], (1, 2, 0))
    ax = show_img(im, ax=ax)
    draw_text(ax, (0,0), categories[preds[i].item()+1])