In [1]:
#Most of the code is copied from https://github.com/mangye16/Unsupervised_Embedding_Learning

from __future__ import print_function

import sys
import torch.optim as optim
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

import os
import argparse
import time
import numpy as np
import models
import datasets
import math
from easydict import EasyDict as edict

from BatchAverage import BatchCriterion
from utils import *
from tensorboardX import SummaryWriter


ModuleNotFoundError: No module named 'models'

In [3]:
# Taken from original code

# batch-t: softmax temperature parameter (0.05-0.1)
# low-dim: the feature embedding dimension (default: 128)

args = edict({'dataset':'cifar', 'lr': .03, 'resume': '', 'log_dir': 'log/', 'model_dir': 'checkpoint/',
              'test_epoch': 1, 'low_dim': 128, 'batch_t': .1, 'batch_m': 1, 'batch_size': 128, 'gpu': '2, 3'
             }) 
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
device = 'cuda:' if torch.cuda.is_available() else 'cpu'


# device = 'cuda:'+args.gpu[0] if torch.cuda.is_available() else 'cpu'

NameError: name 'edict' is not defined

In [61]:
dataset = args.dataset
if dataset =='cifar':
    img_size = 32
    pool_len = 4
elif dataset == 'stl':
    img_size = 96
    pool_len = 7
    
    
log_dir = args.log_dir + dataset + '_log/'
test_epoch = args.test_epoch
if not os.path.isdir(log_dir):
    os.makedirs(log_dir)
    
suffix = dataset + '_batch_0nn_{}'.format(args.batch_size)
suffix = suffix + '_temp_{}_km_{}_alr'.format(args.batch_t, args.batch_m)
    
if len(args.resume)>0:
    suffix = suffix + '_r'

# log the output
test_log_file = open(log_dir + suffix + '.txt', "w")                
vis_log_dir = log_dir + suffix + '/'
if not os.path.isdir(vis_log_dir):
    os.makedirs(vis_log_dir)
writer = SummaryWriter(vis_log_dir)  

In [62]:
# Data Preparation
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(size=img_size, scale=(0.2,1.)),
    transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

if dataset =='cifar':
    # cifar-10 dataset 
    trainset = datasets.CIFAR10Instance(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, 
        batch_size=args.batch_size, shuffle=True, num_workers=4,drop_last =True)

    testset = datasets.CIFAR10Instance(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, 
        batch_size=100, shuffle=False, num_workers=4)
elif dataset == 'stl':
    # stl-10 dataset 
    trainset = datasets.STL10Instance(root='./data', split='train+unlabeled', download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, 
        batch_size=args.batch_size, shuffle=True, num_workers=4,drop_last =True)

    valset = datasets.STL10Instance(root='./data', split='train', download=True, transform=transform_test)
    valloader = torch.utils.data.DataLoader(valset, 
        batch_size=100, shuffle=False, num_workers=4,drop_last =True)
    
    nvdata = valset.__len__()
    testset = datasets.STL10Instance(root='./data', split='test', download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, 
        batch_size=100, shuffle=False, num_workers=4)

ndata = trainset.__len__()

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [63]:
print('==> Building model..')
net = models.__dict__['ResNet18'](pool_len = pool_len, low_dim=args.low_dim)

# define leminiscate: inner product within each mini-batch (Ours)

if device == 'cuda':
#     net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    net = torch.nn.DataParallel(net, device_ids=[int(s) for s in args.gpu.split(',')])
    cudnn.benchmark = True

# define loss function: inner product loss within each mini-batch
criterion = BatchCriterion(args.batch_m, args.batch_t, args.batch_size)

net.to(device)
criterion.to(device)
# define optimizer
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)

==> Building model..


In [64]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed at 120, 160 and 200"""
    lr = args.lr
    if epoch >= 120 and epoch < 160:
        lr = args.lr * 0.1
    elif epoch >= 160 and epoch <200:
        lr = args.lr * 0.05
    elif epoch >= 200:
        lr = args.lr * 0.01
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr   
    writer.add_scalar('lr',  lr, epoch)
    
# Training
def train(epoch):
    print('\nEpoch: %d' % epoch)
    adjust_learning_rate(optimizer, epoch)
    train_loss = AverageMeter()
    data_time = AverageMeter()
    batch_time = AverageMeter()

    # switch to train mode
    net.train()

    end = time.time()
    for batch_idx, (inputs1, inputs2, _, indexes) in enumerate(trainloader):
        data_time.update(time.time() - end)

        inputs1, inputs2, indexes = inputs1.to(device), inputs2.to(device), indexes.to(device)
        
        inputs = torch.cat((inputs1,inputs2), 0)
        optimizer.zero_grad()

        features = net(inputs)
        loss = criterion(features, indexes)

        loss.backward()
        optimizer.step()
        
        train_loss.update(loss.item(), inputs.size(0))         
        
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        
        if batch_idx%10 ==0:
            print('Epoch: [{}][{}/{}] '
                  'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                  'Data: {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f})'.format(
                  epoch, batch_idx, len(trainloader), batch_time=batch_time, data_time=data_time, train_loss=train_loss))
    # add log
    writer.add_scalar('loss',  train_loss.avg, epoch)

In [None]:
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

for epoch in range(start_epoch, start_epoch+301):
    
    # training 
    train(epoch)
    
    # testing every test_epoch
    if epoch%test_epoch ==0:
        net.eval()
        print('----------Evaluation---------')
        start = time.time()
        
        if dataset == 'cifar':
            acc = kNN(epoch, net, trainloader, testloader, 200, args.batch_t, ndata, low_dim = args.low_dim)
        elif dataset == 'stl':
            acc = kNN(epoch, net, valloader, testloader, 200, args.batch_t, nvdata, low_dim = args.low_dim)
        
        print("Evaluation Time: '{}'s".format(time.time()-start))
        writer.add_scalar('nn_acc', acc, epoch)

        if acc > best_acc:
            print('Saving..')
            state = {
                'net': net.state_dict(),
                'acc': acc,
                'epoch': epoch,
            }
            if not os.path.isdir(args.model_dir):
                os.mkdir(args.model_dir)
            torch.save(state, args.model_dir + suffix + '_best.t')
            best_acc = acc
            
        print('accuracy: {}% \t (best acc: {}%)'.format(acc,best_acc))
        print('[Epoch]: {}'.format(epoch), file = test_log_file)
        print('accuracy: {}% \t (best acc: {}%)'.format(acc,best_acc), file = test_log_file)
        test_log_file.flush()

In [7]:
# class Normalize(nn.Module):

#     def __init__(self, power=2):
#         super(Normalize, self).__init__()
#         self.power = power
    
#     def forward(self, x):
#         norm = x.pow(self.power).sum(1, keepdim=True).pow(1./self.power)
#         out = x.div(norm)
#         return out

# class BasicBlock(nn.Module):
#     expansion = 1

#     def __init__(self, in_planes, planes, stride=1):
#         super(BasicBlock, self).__init__()
#         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
#         self.bn1 = nn.BatchNorm2d(planes)
#         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
#         self.bn2 = nn.BatchNorm2d(planes)

#         self.shortcut = nn.Sequential()
#         if stride != 1 or in_planes != self.expansion*planes:
#             self.shortcut = nn.Sequential(
#                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
#                 nn.BatchNorm2d(self.expansion*planes)
#             )

#     def forward(self, x):
#         out = F.relu(self.bn1(self.conv1(x)))
#         out = self.bn2(self.conv2(out))
#         out += self.shortcut(x)
#         out = F.relu(out)
#         return out


# class Bottleneck(nn.Module):
#     expansion = 4

#     def __init__(self, in_planes, planes, stride=1):
#         super(Bottleneck, self).__init__()
#         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
#         self.bn1 = nn.BatchNorm2d(planes)
#         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
#         self.bn2 = nn.BatchNorm2d(planes)
#         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
#         self.bn3 = nn.BatchNorm2d(self.expansion*planes)

#         self.shortcut = nn.Sequential()
#         if stride != 1 or in_planes != self.expansion*planes:
#             self.shortcut = nn.Sequential(
#                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
#                 nn.BatchNorm2d(self.expansion*planes)
#             )

#     def forward(self, x):
#         out = F.relu(self.bn1(self.conv1(x)))
#         out = F.relu(self.bn2(self.conv2(out)))
#         out = self.bn3(self.conv3(out))
#         out += self.shortcut(x)
#         out = F.relu(out)
#         return out


# class ResNet(nn.Module):
#     def __init__(self, block, num_blocks, pool_len =4, low_dim=128):
#         super(ResNet, self).__init__()
#         self.in_planes = 64

#         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
#         self.bn1 = nn.BatchNorm2d(64)
#         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
#         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
#         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
#         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
#         self.linear_embedding = nn.Linear(512*block.expansion, low_dim)
#         self.linear_class = nn.Linear(512 * block.expansion, 10)
#         self.l2norm = Normalize(2)
#         self.pool_len = pool_len
#         # for m in self.modules():
#             # if isinstance(m, nn.Conv2d):
#                 # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
#                 # m.weight.data.normal_(0, math.sqrt(2. / n))
#             # elif isinstance(m, nn.BatchNorm2d):
#                 # m.weight.data.fill_(1)
#                 # m.bias.data.zero_()
                

#     def _make_layer(self, block, planes, num_blocks, stride):
#         strides = [stride] + [1]*(num_blocks-1)
#         layers = []
#         for stride in strides:
#             layers.append(block(self.in_planes, planes, stride))
#             self.in_planes = planes * block.expansion
#         return nn.Sequential(*layers)

#     def forward(self, x):
#         out = F.relu(self.bn1(self.conv1(x)))
#         out = self.layer1(out)
#         out = self.layer2(out)
#         out = self.layer3(out)
#         out = self.layer4(out)
#         out = F.avg_pool2d(out, self.pool_len)
#         out = out.view(out.size(0), -1)
#         out_embedding = self.linear_embedding(out)
#         out_embedding = self.l2norm(out_embedding)
#         out_class = self.linear_class(out)
#         return out_embedding, out_class


# def ResNet18(pool_len = 4, low_dim=128):
#     return ResNet(BasicBlock, [2,2,2,2], pool_len, low_dim)