In [1]:
import torchvision
import torchvision.transforms as transforms
import torch
from utils import generate_training_data_set, generate_testing_data_set, TinyImageNet
from torch.utils.data.sampler import SubsetRandomSampler
import pickle

transform_train = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.RandomCrop(224, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
transform_test = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor(),
])
#generate_training_data_set(save=True)

In [3]:
img_triplet, label_triplet = pickle.load(open("./pickle/train_1.p",'rb'))
train = TinyImageNet(img_triplet, label_triplet, train=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(train, batch_size=2,
                                           shuffle=False, sampler=SubsetRandomSampler(range(4)))

In [4]:
for idx, (data, target) in enumerate(train_loader):
    print(idx, data[0].shape, target[2])

0 torch.Size([2, 3, 224, 224]) tensor([139,  63])
1 torch.Size([2, 3, 224, 224]) tensor([166, 132])


In [4]:
img, label = generate_testing_data_set()
test = TinyImageNet(img, label, train=False, transform=transform_train)
test_loader = torch.utils.data.DataLoader(test, batch_size=2,
                                           shuffle=False, sampler=SubsetRandomSampler(range(4)))

In [8]:
net = torchvision.models.resnet.ResNet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2])
net.load_state_dict(torch.load("../data/model/resnet18-5c106cde.pth"))

In [12]:
import torchvision
import torchvision.transforms as transforms
import torch
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

import os
import argparse
import logging
import numpy as np
import pickle
import utils
transform_train = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.RandomCrop(224, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
epoch = 0
use_cuda = torch.cuda.is_available()
net =  torchvision.models.resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3])
net.load_state_dict(torch.load("../data/model/resnet101-5d3b4d8f.pth"))
net.fc = nn.Linear(in_features=net.fc.in_features, out_features=4096)
optimizer = optim.SGD(net.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)

criterion = nn.TripletMarginLoss(margin=1.0, p=2, eps=1e-06)
training_loss_seq = []
testing_accuracy_seq = []
testing_loss_seq = []
testing_best_accuracy = 0
if not os.path.isfile("./pickle/train_{}.p".format(epoch)):
    img_triplet, label_triplet = utils.generate_training_data_set(save=True,epoch_idx=epoch)
else:
    img_triplet, label_triplet = pickle.load(open("./pickle/train_{}.p".format(epoch), 'rb'))
train_dataset = utils.TinyImageNet(img_triplet, label_triplet, transform=transform_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=5, num_workers=4,
                                           shuffle=False, sampler=SubsetRandomSampler(range(50)))
net.train()
loss_epoch = []
for _, (images, _) in enumerate(train_loader):
    if use_cuda:
        q, p, n = images[0].cuda(), images[1].cuda(), images[2].cuda()
    else:
        q, p, n = images[0], images[1], images[2]
    optimizer.zero_grad()
    q, p, n = Variable(q), Variable(p), Variable(n)
    f_q, f_p, f_n = net(q), net(p), net(n)
    loss = criterion(f_q, f_p, f_n)
    loss.backward()
    optimizer.step()
    if torch.__version__ == '0.4.1':
        loss_epoch.append(loss.item())
    else:
        loss_epoch.append(loss.data[0])
loss = np.mean(loss_epoch)
print("=> Epoch: [{}/{}] | Loss:[{}]".format(epoch + 1, 30, loss))

=> Epoch: [1/30] | Loss:[4.910797208547592]


In [13]:
loss_epoch

[1.0770889520645142,
 0.7435991168022156,
 1.519863486289978,
 4.663661956787109,
 3.9388535022735596,
 4.634669780731201,
 2.446793556213379,
 4.2676496505737305,
 13.153558731079102,
 12.662233352661133]

In [191]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=5, num_workers=4,
                                           shuffle=False, sampler=SubsetRandomSampler([0,503,1003,1503,2003,2503,3003,3503,4003,4503]))
for _, (imgs, labels) in enumerate(train_loader):
    print(labels[0])

tensor([8, 2, 5, 1, 6])
tensor([4, 3, 0, 9, 7])


In [192]:
topk = 2
img, label = utils.generate_testing_data_set()
test_dataset = utils.TinyImageNet(img, label, train=False, transform=transform_train)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=2,
                                          sampler=SubsetRandomSampler(range(8)))
f_img_train = []
label_train = []
for _, (imgs, labels) in enumerate(train_loader):
    if use_cuda:
        imgs, lables = imgs.cuda(), labels.cuda()
    f_img_train.append(net(imgs[0]))
    label_train.append(labels[0])
f_img_train = torch.cat(f_img_train, dim=0)
label_train = torch.cat(label_train, dim=0)

In [198]:
for _, (imgs, labels) in enumerate(test_loader):
    print(labels)

tensor([161, 140, 107,  69])
tensor([ 69, 147, 139,  73])


In [193]:
f_img_test = []
label_test = []
for idx, (imgs, labels) in enumerate(test_loader):
    if use_cuda:
        imgs, lables = imgs.cuda(), labels.cuda()
    f_img_test.append(net(imgs))
    label_test.append(labels)
f_img_test = torch.cat(f_img_test, dim=0)
label_test = torch.cat(label_test, dim=0)

In [232]:
test_accuracy = []
for fig_embedding_current, lable_current in zip(f_img_test, label_test):
    fig_embedding_current = fig_embedding_current.reshape(1, 4096)
    fig_embedding_current = fig_embedding_current.expand(f_img_train.shape[0], 4096)
    pdist = nn.PairwiseDistance(p=2)
    distance = pdist(fig_embedding_current, f_img_train)
    predicted = label_train[distance.topk(topk)[1]]
    print(predicted, lable_current)
    test_accuracy.append(float(torch.sum(torch.eq(predicted, lable_current))) / topk)
test_accuracy_epoch = np.mean(test_accuracy)

tensor([6, 9]) tensor(139)
tensor([1, 3]) tensor(140)
tensor([1, 3]) tensor(161)
tensor([6, 9]) tensor(147)
tensor([1, 9]) tensor(73)
tensor([1, 3]) tensor(69)
tensor([6, 1]) tensor(107)
tensor([6, 9]) tensor(69)


In [238]:
import torchvision
import torchvision.transforms as transforms
import torch
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

import os
import argparse
import logging
import numpy as np
import pickle

import utils
class Args:
    def __init__(self, num_epochs=2, batch_size=100, train_all=True, resume='./checkpoint.pth.tar', test_only=True):
        self.num_epochs=2
        self.batch_size=10
        self.train_all=True
        self.resume='./checkpoint.pth.tar'
        self.test_only=False
args = Args()

In [240]:
log_level = logging.INFO
logger = logging.getLogger()
logger.setLevel(log_level)
handler = logging.FileHandler("hw5.log")
handler.setLevel(log_level)
formatter = logging.Formatter('%(asctime)s - [%(levelname)s] - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.info("torch version: {}".format(torch.__version__))


# Hyper Parameters
batch_size = args.batch_size
topk = 30
pdist = nn.PairwiseDistance(p=2)
# Data Preparation

# note that mean and std is calculated channel-wise
# reference: https://discuss.pytorch.org/t/normalization-in-the-mnist-example/457/10
print("Data Preparation...")
logger.info("Data Preparation...")
transform_train = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.RandomCrop(224, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

transform_test = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


print("Loading Data...")
logger.info("Loading Data...")
img, label = utils.generate_testing_data_set()
test_dataset = utils.TinyImageNet(img, label, train=False, transform=transform_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=8)

if args.test_only:
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=2,
                                              sampler=SubsetRandomSampler(range(8)))
print("Model setting...")
logger.info("Model setting...")

use_cuda = torch.cuda.is_available()
start_epoch = 0
net = torchvision.models.resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3])
net.load_state_dict(torch.load("../data/model/resnet101-5d3b4d8f.pth"))

# Do not change the layers that are pre-trained with the only exception
# on the last full-connected layer.
if not args.train_all:
    for param in net.parameters():
        param.requires_grad = False
# change the last fc layer for cifar100
net.fc = nn.Linear(in_features=net.fc.in_features, out_features=4096)

#optimizer = optim.Adam(net.parameters())
optimizer = optim.SGD(net.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)

criterion = nn.TripletMarginLoss(margin=1.0, p=2, eps=1e-06)
training_loss_seq = []
training_accuracy_seq = []
testing_accuracy_seq = []
testing_best_accuracy = 0

if args.resume:
    print("Resume from the checkpoint...")
    logger.info("Resume from the checkpoint...")
    if os.path.isfile(args.resume):
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        start_epoch = checkpoint['epoch'] + 1
        net.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        training_loss_seq = checkpoint['training_loss_seq']
        training_accuracy_seq = checkpoint['training_accuracy_seq']
        testing_accuracy_seq = checkpoint['testing_accuracy_seq']
        testing_best_accuracy = checkpoint['testing_best_accuracy']
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(args.resume, (checkpoint['epoch'] + 1)))
        logger.info("=> loaded checkpoint '{}' (epoch {})"
                    .format(args.resume, (checkpoint['epoch'] + 1)))
    else:
        print("=> no checkpoint found at '{}'".format(args.resume))
        logger.info("=> no checkpoint found at '{}'".format(args.resume))
        print("=> Training based on the resnet-101 from scratch...")
        logger.info("=> Training based on the resnet-101 from scratch...")
else:
    print("=> Training based on the resnet-18 from scratch...")
    logger.info("=> Training based on the resnet-18 from scratch...")


print("Model Training...")
logger.info("Model Training...")

# use up-to-date learning rate; for resume purpose
for param_group in optimizer.param_groups:
    current_learningRate = param_group['lr']

if use_cuda:
    net.cuda()
    net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    print(torch.cuda.device_count())
    cudnn.benchmark = True


def train(epoch,topk=30):
    if args.test_only:
        topk = 3
        img_triplet, label_triplet = pickle.load(open("./pickle/train_1.p", 'rb'))
        train_dataset = utils.TinyImageNet(img_triplet, label_triplet, transform=transform_train)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=5, num_workers=2,
                                           shuffle=False, sampler=SubsetRandomSampler([0,503,1003,1503,2003,2503,3003,3503,4003,4503]))
    else:
        if not os.path.isfile("./pickle/train_{}.p".format(epoch)):
            img_triplet, label_triplet = utils.generate_training_data_set(save=True, epoch_idx=epoch)
        else:
            img_triplet, label_triplet = pickle.load(open("./pickle/train_{}.p".format(epoch), 'rb'))
        train_dataset = utils.TinyImageNet(img_triplet, label_triplet, train=True, transform=transform_train)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8)

    global current_learningRate
    net.train()
    if (epoch+1) % 10 == 0:
        current_learningRate /= 2
        logger.info("=> Learning rate is updated!")
        utils.update_learning_rate(optimizer, current_learningRate)

    f_img_train = []
    label_train = []
    for _, (images, lables) in enumerate(train_loader):
        if use_cuda:
            q, p, n, q_label = images[0].cuda(), images[1].cuda(), images[2].cuda(), lables[0].cuda()
        else:
            q, p, n, q_label = images[0], images[1], images[2], lables[0]
        optimizer.zero_grad()
        q, p, n = Variable(q), Variable(p), Variable(n)
        f_q, f_p, f_n = net(q), net(p), net(n)
        loss = criterion(f_q, f_p, f_n)
        loss.backward()
        optimizer.step()
        f_img_train.append(f_q)
        label_train.append(q_label)

    if torch.__version__ == '0.4.1':
        loss_epoch = loss.item()
    else:
        loss_epoch = loss.data[0]

    f_img_train = torch.cat(f_img_train, dim=0)
    label_train = torch.cat(label_train, dim=0)

    train_accuracy = []
    # calculate train_acc so use train_loader as the test_loader
    for f_img_test_current, label_test_current in zip(f_img_train, label_train):
        f_img_test_current = f_img_test_current.reshape(1, 4096)
        f_img_test_current = f_img_test_current.expand(f_img_train.shape[0], 4096)
        distance = pdist(f_img_test_current, f_img_train)
        predicted = label_train[distance.topk(topk)[1]]
        train_accuracy.append(float(torch.sum(torch.eq(predicted, label_test_current))) / topk)
    train_accuracy_epoch = np.mean(train_accuracy)

    print("=> Epoch: [{}/{}] | Loss:[{}] | Training Accuracy: [{}]".format(epoch + 1, args.num_epochs, loss_epoch, train_accuracy_epoch))
    logger.info("=> Epoch: [{}/{}] | Loss:[{}] | Training Accuracy: [{}]".format(epoch + 1, args.num_epochs, loss_epoch, train_accuracy_epoch))
    return loss_epoch, train_accuracy_epoch, f_img_train, label_train


def test(epoch, f_img_train, label_train, topk=30):
    net.eval()
    if args.test_only:
        topk = 3
    #f_img_train = []
    #label_train = []
    # for _, (imgs_train, labels_train) in enumerate(train_loader):
    #     if use_cuda:
    #         imgs_train, labels_train = imgs_train.cuda(), labels_train.cuda()
    #     f_img_train.append(net(imgs_train[0]))
    #     label_train.append(labels_train[0])
    # f_img_train = torch.cat(f_img_train, dim=0)
    # label_train = torch.cat(label_train, dim=0)

    f_img_test = []
    label_test = []
    for _, (imgs_test, labels_test) in enumerate(test_loader):
        if use_cuda:
            imgs_test, labels_test = imgs_test.cuda(), labels_test.cuda()
        #f_img_test, label_test = Variable(f_img_test), Variable(label_test)
        f_img_test.append(net(imgs_test))
        label_test.append(labels_test)

    f_img_test = torch.cat(f_img_test, dim=0)
    label_test = torch.cat(label_test, dim=0)

    test_accuracy = []
    for f_img_test_current, label_test_current in zip(f_img_test, label_test):
        f_img_test_current = f_img_test_current.reshape(1, 4096)
        f_img_test_current = f_img_test_current.expand(f_img_train.shape[0], 4096)
        distance = pdist(f_img_test_current, f_img_train)
        predicted = label_train[distance.topk(topk)[1]]
        test_accuracy.append(float(torch.sum(torch.eq(predicted, label_test_current))) / topk)
    test_accuracy_epoch = np.mean(test_accuracy)

    print("=> Epoch: [{}/{}] | Testing Accuracy: [{}]".format(
        epoch + 1, args.num_epochs, test_accuracy_epoch))
    logger.info("=> Epoch: [{}/{}] | Testing Accuracy: [{}]".format(
        epoch + 1, args.num_epochs, test_accuracy_epoch))

    return test_accuracy_epoch


# for epoch in range(start_epoch, args.num_epochs):
#     train_loss, train_accuracy, f_img_train, label_train = train(epoch)
#     test_accuracy = test(epoch, f_img_train, label_train)

#     training_loss_seq.append(train_loss)
#     training_accuracy_seq.append(train_accuracy)
#     testing_accuracy_seq.append(test_accuracy)

#     is_best = testing_accuracy_seq[-1] > testing_best_accuracy
#     testing_best_accuracy = max(testing_best_accuracy, testing_accuracy_seq[-1])

#     state = {
#         "epoch": epoch,
#         "state_dict": net.state_dict(),  # if use_cuda else net.module.state_dict()
#         "optimizer": optimizer.state_dict(),
#         "training_loss_seq": training_loss_seq,
#         "training_accuracy_seq": training_accuracy_seq,
#         "testing_accuracy_seq": testing_accuracy_seq,
#         "testing_best_accuracy": testing_best_accuracy
#     }
#     utils.save_checkpoint(state, is_best, filename='checkpoint.pth.tar', extra="hw5_")
#     if is_best:
#         logger.info("=> Best parameters are updated")


# logger.info("=> Trained on [{}] epoch, with test accuracy [{}].\n \
#  During the training stages, historical best test accuracy is \
#  [{}]".format(args.num_epochs, testing_accuracy_seq[-1], testing_best_accuracy))
# print("=> Trained on [{}] epoch, with test accuracy [{}].\n \
#  During the training stages, historical best test accuracy is \
#  [{}]".format(args.num_epochs, testing_accuracy_seq[-1], testing_best_accuracy))


Data Preparation...
Loading Data...
Model setting...
Resume from the checkpoint...
=> no checkpoint found at './checkpoint.pth.tar'
=> Training based on the resnet-101 from scratch...
Model Training...


In [241]:
train_loss, train_accuracy, f_img_train, label_train = train(epoch)
test_accuracy = test(epoch, f_img_train, label_train)

=> Epoch: [1/2] | Loss:[0.31766223907470703] | Training Accuracy: [0.0]


In [247]:
test_accuracy = test(epoch, f_img_train, label_train)

=> Epoch: [1/2] | Testing Accuracy: [0.0]


In [246]:
def test(epoch, f_img_train, label_train):
    net.eval()
    if args.test_only:
        topk = 3
    #f_img_train = []
    #label_train = []
    # for _, (imgs_train, labels_train) in enumerate(train_loader):
    #     if use_cuda:
    #         imgs_train, labels_train = imgs_train.cuda(), labels_train.cuda()
    #     f_img_train.append(net(imgs_train[0]))
    #     label_train.append(labels_train[0])
    # f_img_train = torch.cat(f_img_train, dim=0)
    # label_train = torch.cat(label_train, dim=0)

    f_img_test = []
    label_test = []
    for _, (imgs_test, labels_test) in enumerate(test_loader):
        if use_cuda:
            imgs_test, labels_test = imgs_test.cuda(), labels_test.cuda()
        #f_img_test, label_test = Variable(f_img_test), Variable(label_test)
        f_img_test.append(net(imgs_test))
        label_test.append(labels_test)

    f_img_test = torch.cat(f_img_test, dim=0)
    label_test = torch.cat(label_test, dim=0)

    test_accuracy = []
    for f_img_test_current, label_test_current in zip(f_img_test, label_test):
        f_img_test_current = f_img_test_current.reshape(1, 4096)
        f_img_test_current = f_img_test_current.expand(f_img_train.shape[0], 4096)
        distance = pdist(f_img_test_current, f_img_train)
        predicted = label_train[distance.topk(topk)[1]]
        test_accuracy.append(float(torch.sum(torch.eq(predicted, label_test_current))) / topk)
    test_accuracy_epoch = np.mean(test_accuracy)

    print("=> Epoch: [{}/{}] | Testing Accuracy: [{}]".format(
        epoch + 1, args.num_epochs, test_accuracy_epoch))
    logger.info("=> Epoch: [{}/{}] | Testing Accuracy: [{}]".format(
        epoch + 1, args.num_epochs, test_accuracy_epoch))

    return test_accuracy_epoch

In [2]:
import torchvision
train_dataset = torchvision.datasets.CIFAR100(root='../data', train=True, download=True, transform=None)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ../data/cifar-100-python.tar.gz


In [4]:
import torchvision.models as models
resnet18 = models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /Users/ym/.torch/models/resnet18-5c106cde.pth
100.0%


In [5]:
resnet18 = models.resnet101(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to /Users/ym/.torch/models/resnet101-5d3b4d8f.pth
100.0%
