In [1]:
import numpy as np
import pandas as pd
import pickle
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from skimage import io, transform
from torchvision import transforms, utils
import torch.optim as optim
import time
import torch.nn.functional as F

In [2]:
data_path = "../data/"
whole_data = pickle.load(open("whole_data", "rb"))

In [3]:
np.random.seed(seed=1)

train_imgs = np.loadtxt(data_path + "referit_train_imlist.txt").astype("int")
valid_imgs = np.loadtxt(data_path + "referit_val_imlist.txt").astype("int")
test_imgs = np.loadtxt(data_path + "referit_test_imlist.txt").astype("int")

train_ids = np.isin(whole_data["img"].values.astype("int"), train_imgs)
tr_ids = list(np.argwhere(train_ids==True))
tr_ids = [item for sublist in tr_ids for item in sublist]
tr_ids = np.array(tr_ids)
all_tr_ids = tr_ids
tr_ids = np.random.choice(tr_ids, 64000, replace = False)
#tr_ids = np.random.choice(tr_ids, 64, replace = False)

valid_ids = np.isin(whole_data["img"].values.astype("int"), valid_imgs)
vld_ids = list(np.argwhere(valid_ids==True))
vld_ids = [item for sublist in vld_ids for item in sublist]
vld_ids = np.array(vld_ids)

vld_ids = np.random.choice(vld_ids, 6400, replace = False)
#vld_ids = np.random.choice(vld_ids, 32, replace = False)

test_ids = np.isin(whole_data["img"].values.astype("int"), test_imgs)
tst_ids = list(np.argwhere(test_ids==True))
tst_ids = [item for sublist in tst_ids for item in sublist]
tst_ids = np.array(tst_ids)

In [4]:
np.sum(np.isin(whole_data["img"].values.astype("int"), train_imgs))

595444

In [5]:
# mean iou of dataset
(whole_data.iloc[vld_ids]['IoU']-whole_data.iloc[tr_ids]['IoU'].mean()).abs().mean()

0.23376325211488436

In [6]:
# import the model 
import model_v3dot3 as modelClass

In [7]:
dataset = modelClass.LocalizationDataset(data_pickle=whole_data,
                                   data_path=data_path,
                                   transform=transforms.Compose([
                                               modelClass.Rescale((224,224), (224,224)),
                                               modelClass.ToTensor()
                                           ]))

train_sampler = SubsetRandomSampler(all_tr_ids)
valid_sampler = SubsetRandomSampler(vld_ids)
batch_size = 32

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler, num_workers=8)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler, num_workers=8)

In [8]:
whole_data.iloc[tr_ids]['IoU'].mean()

0.21307067824787698

In [9]:
whole_data.iloc[vld_ids]['IoU'].var()

0.09504117310886573

In [10]:
import warnings
warnings.filterwarnings("ignore")

In [11]:
device = 'cuda'

model = modelClass.myModel()
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9)
train_loss = []
val_loss = []

load = True
load_path = 'models/model_v3.3_full_data_iter66000.pt'

if load:
    checkpoint = torch.load(load_path)
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    model.load_state_dict(checkpoint['model_state_dict'])
    train_loss = checkpoint['train_loss']
    val_loss = checkpoint['validation_loss']

touch_vgg = True
requires_grad = True
if touch_vgg:
    for child in model.img_feature_extractor.children():
        for param in child.parameters():
            param.requires_grad = requires_grad

    for child in model.box_feature_extractor.children():
        for param in child.parameters():
            param.requires_grad = requires_grad
            


In [12]:
len(train_loader)

18608

In [None]:
max_epochs = 10
pr_freq = 500
save_freq = 2000
start_epoch = 0

if load:
    start_epoch = checkpoint['epoch'] + 1

i=66000
for epoch in range(start_epoch, max_epochs):
    # Training
    print('at epoch: ' + str(epoch))
    tick_epoch = time.time()
    tick_pr = time.time()
    for sample in train_loader:        
        i+=1
        
        image, bbox_image, loc_rel, embedding, IoU = modelClass.get_torch_data(sample)
        optimizer.zero_grad()
        outputs = model(image,bbox_image,loc_rel,embedding)
        loss = modelClass.my_loss(outputs, IoU)
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
        # Validation
        if(i%pr_freq == 0):
            tock_pr = time.time()
            print(str(pr_freq) + ' batch in ' + str((tock_pr-tick_pr)/60) + ' minutes')
            with torch.set_grad_enabled(False):
                l = []
                for sample in validation_loader:
                    image, bbox_image, loc_rel, embedding, IoU = modelClass.get_torch_data(sample)
                    outputs = model(image,bbox_image,loc_rel,embedding)
                    loss = modelClass.my_loss(outputs, IoU, margin= 0.004)
                    l.append(loss.item())
                val_l = np.average(l) 
                print('validation loss at iter ' + str(i) + ': ' + str(val_l))
                val_loss.append((i,val_l))
                print('train loss at iter ' + str(i) +': '+ str(np.average(train_loss[-pr_freq:])))
            tick_pr = tock_pr
        if(i%save_freq == 0):
            torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'validation_loss': val_loss
            }, 'models/model_v3.3_full_data_iter' + str(i) + '.pt')
    tock_epoch = time.time()
    optimizer.zero_grad()

    print('epoch in ' + str((tock_epoch - tick_epoch)/60) + ' minutes')

at epoch: 5


In [None]:
torch.cuda.memory_allocated(device=device)

In [None]:
torch.cuda.max_memory_allocated(device=device)

In [None]:
load = True
load_path = 'models/model18.pt'
if load:
    checkpoint = torch.load(load_path)
    start_epoch = checkpoint['epoch'] + 1
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    model.load_state_dict(checkpoint['model_state_dict'])
    train_loss = checkpoint['train_loss']
    val_loss = checkpoint['validation_loss']
i = 0
ground_truth_error = []
false_box_error = []
with torch.set_grad_enabled(False):
    for sample in validation_loader:
        image, bbox_image, loc_rel, embedding, IoU = get_torch_data(sample)
        outputs = model(image,bbox_image,loc_rel,embedding)
        break
        img_seg, word_seg = torch.split(outputs, 512, dim=1)
        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        cos_sim = cos(img_seg, word_seg)
        dist = (1-cos_sim)/2
        dist = dist.cpu().numpy()
        IoU = IoU.cpu().numpy()
        ground_truth_error.append(list(dist[IoU==1]))
        false_box_error.append(list(dist[IoU==0]))
print('DONE!!')



In [None]:
ground_truth_error = [item for sublist in ground_truth_error for item in sublist]
false_box_error = [item for sublist in false_box_error for item in sublist]
print(np.average(ground_truth_error))
print(np.std(ground_truth_error))

In [None]:
print(np.average(false_box_error))
print(np.std(false_box_error))

In [None]:
!pip install graphviz

In [None]:
import hiddenlayer as hl
import graphviz

In [None]:
hl_graph = hl.build_graph(model, img, transforms = transforms)
hl_graph.theme = hl.graph.THEMES["blue"].copy()
hl_graph

20.22411298751831 / 32 

10.583017110824585 / 16

8.651746273040771 / 8

In [None]:
batch_size = 1
img_shape = torch.Size((batch_size, 3, 240, 240))
img = torch.cuda.FloatTensor(img_shape)

box_shape = torch.Size((batch_size, 3, 240, 240))
box = torch.cuda.FloatTensor()

box_data_shape = torch.Size((batch_size, 4))
box_data = torch.cuda.FloatTensor(box_data_shape)

embeddings_shape = torch.Size((batch_size, 512))
embedding = torch.cuda.FloatTensor(embeddings_shape)

torch.randn(img_shape, out=img)
torch.randn(box_shape, out=box)
torch.randn(box_data_shape, out=box_data)
torch.randn(embeddings_shape, out=embedding)

In [None]:
whole_data

In [26]:
batch_size = 1
img_shape = torch.Size((batch_size, 512, 7, 7))
img = torch.randn(img_shape)

In [38]:
conv1 = nn.Conv2d(512, 128, 3, stride=2)

In [39]:
conv1(img).shape

torch.Size([1, 128, 3, 3])

In [35]:
for child in model.img_feature_extractor.children():
    print(child)

Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
ReLU(inplace)
Conv2d(512, 512, kernel_size=(3, 3), stride

In [33]:
for child in model.img_feature_extractor.children():
    for param in child.parameters():
        param.requires_grad = not freeze_vgg
