In [151]:
from __future__ import print_function

import os
import socket
import time
import sys
import subprocess
import numpy as np
import warnings
warnings.filterwarnings('ignore',category=FutureWarning)
print("parse_option")

import torch
import torch.optim as optim
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader

from models import model_pool
from models.util import create_model
from models.resnet_language import LangPuller

from dataset.mini_imagenet import ImageNet, MetaImageNet
from dataset.tiered_imagenet import TieredImageNet, MetaTieredImageNet
from dataset.transform_cfg import transforms_options, transforms_list

from util import adjust_learning_rate, create_and_save_embeds, create_and_save_descriptions
from eval.util import accuracy, AverageMeter, validate


parse_option


In [2]:
from configs import parse_option_supervised

In [3]:
import os
import pickle
from PIL import Image
import numpy as np
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import re

In [193]:
from PIL import Image

In [None]:
class Cub200(Dataset):
    def __init__(self, 
                 args, 
                 split='train',
                 phase=None,
                 is_sample=False, 
                 k=4096,
                 transform=None):
        super(Dataset, self).__init__()
        self.split = split
        self.phase = phase
        self.data_aug = args.data_aug
        self.mean = [120.39586422 / 255.0, 115.59361427 / 255.0, 104.54012653 / 255.0]
        self.std = [70.68188272 / 255.0, 68.27635443 / 255.0, 72.54505529 / 255.0]
        self.normalize = transforms.Normalize(mean=self.mean, std=self.std)
        self.unnormalize = transforms.Normalize(mean=-np.array(self.mean)/self.std, std=1/np.array(self.std))
        
        np.random.seed(args.set_seed)

        if transform is None:
            if self.split == 'train' and self.data_aug:
                self.transform = transforms.Compose([
                    lambda x: Image.fromarray(x),
                    transforms.RandomCrop(84, padding=8),
                    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
                    transforms.RandomHorizontalFlip(),
                    lambda x: np.asarray(x),
                    transforms.ToTensor(),
                    self.normalize
                ])
            else:
                self.transform = transforms.Compose([
                    lambda x: Image.fromarray(x),
                    transforms.ToTensor(),
                    self.normalize
                ])
        else:
            self.transform = transform 

        if args.continual:
            file_pattern = "all.pickle" # data root should be data/continual
        else:
            if self.split == "train":
                file_pattern = 'miniImageNet_category_split_train_phase_{}.pickle'.format(phase)
            else:
                file_pattern = 'miniImageNet_category_split_{}.pickle'.format(split)

        self.data = {}
        with open(os.path.join('./cub', file_pattern), 'rb') as f:
            data = pickle.load(f, encoding='latin1')
            self.imgs = data['data']
            self.labels = data['labels']
            self.cat2label = data['catname2label']
            

In [6]:
from configs import parse_option_eval

In [11]:
args = lambda x: None
args.n_ways = 5
args.n_shots = 5
args.n_queries = 30
args.data_root = 'data'
args.data_aug = True
args.n_test_runs = 5
args.n_aug_support_samples = 1
args.set_seed = 20
args.continual = True
args.eval_mode = "few-shot-incremental-fine-tune"
args.n_base_support_samples = 1
args.n_base_aug_support_samples = 0

In [107]:

base_test_loader = DataLoader(ImageNet(args=args, split='train', phase='test'),
                                      batch_size=6000,
                                      shuffle=False,
                                      drop_last=False,
                                      num_workers=10 )

In [108]:
meta_valloader = DataLoader(MetaImageNet(args=args, split='val',
                                                 
                                                 fix_seed=True, use_episodes=False, disjoint_classes=True),
                                    batch_size=6000, shuffle=False, drop_last=False,
                                    num_workers=10)

In [183]:
type(base_test_loader.dataset.imgs)

numpy.ndarray

In [109]:
len(meta_valloader.dataset.imgs)

37523

In [176]:
type(base_test_loader.dataset.imgs)

numpy.ndarray

In [110]:
import itertools

In [111]:
base_valloader_it = itertools.cycle(iter(base_test_loader))

In [112]:
base_batch = next(base_valloader_it)

In [113]:
base_batch[1]

tensor([57, 18, 48,  ..., 30, 29,  4])

In [114]:
len(meta_valloader.dataset.imgs)

37523

In [115]:
meta_valloader_it = itertools.cycle(iter(meta_valloader))

In [116]:
meta_next = next(meta_valloader_it)

In [117]:
len(meta_next[0])

5

In [126]:
support_xs, support_ys, query_xs, query_ys = meta_next
batch_size, _, height, width, channel = support_xs.size()

In [127]:
query_ys = query_ys.view(-1).detach().numpy()

In [128]:
np.sort(np.unique(query_ys))

array([ 6, 13, 17, 18, 20, 22, 25, 26, 28, 34, 40, 43, 58, 61, 67, 71, 73,
       78, 81, 85, 86, 90, 95, 97, 99])

In [135]:
class CUB:
    #index_path -> txt_path, 
    #index -> base size
    def __init__(self, root='./cub', train=True,
                 index_path=None, index=None, base_sess=None):
        self.root = os.path.expanduser(root)
        self.train = train  # training set or test set
        self._pre_operate()

        if train:
            
            self.transform = transforms.Compose([
                    lambda x: Image.fromarray(x),
                    transforms.RandomCrop(84, padding=8),
                    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
                    transforms.RandomHorizontalFlip(),
                    lambda x: np.asarray(x),
                    transforms.ToTensor(),
                    self.normalize
            ])
            # self.data, self.targets = self.SelectfromTxt(self.data2label, index_path)
            #base는 100까지 따라서 index = 100
            if base_sess:
                self.data, self.targets = self.SelectfromClasses(self.data, self.targets, index)
            #novel session에 대한 세션 정보 줘야함
            else:
                self.data, self.targets = self.SelectfromTxt(self.data2label, index_path)
        else:
            self.transform = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
            if base_sess:
                
                self.data, self.targets = self.SelectfromTxt(self.data, self.targets, index)
            else:
                #modifying
                self.data, self.targets = self.SelectfromClasses(self.data, self.targets, index)
                       
            #HSJ self.labels
        self.labels = self.targets
        self.imgs = self.getImg(self.data)
            #HSJ self.imgs 

    def getImg(self,d_list):
        img_list = []
        for d_path in d_list:
            c_img = Image.open(d_path).convert('RGB')
            c_img_transformed = self.transform(c_img).reshape(84,84,3)
            img_list.append(c_img_transformed.numpy())
        img_list_np = np.array(img_list)
        return img_list_np

        
    def SelectfromTxt(self, data2label, index_path):
        index = open('./cub/CUB_200_2011/index_list/session_'+index_path + '.txt').read().splitlines()
        data_tmp = []
        targets_tmp = []
        for i in index:
            img_path = os.path.join(self.root, i)
            data_tmp.append(img_path)
            targets_tmp.append(data2label[img_path])

        return data_tmp, targets_tmp

    def SelectfromClasses(self, data, targets, index):
        data_tmp = []
        targets_tmp = []
        for i in range(1,index+1):
            ind_cl = np.where(i == targets)[0]
            for j in ind_cl:
                data_tmp.append(data[j])
                targets_tmp.append(targets[j])

        return data_tmp, targets_tmp
    def list2dict(self, list):
        dict = {}
        for l in list:
            s = l.split(' ')
            id = int(s[0])
            cls = s[1]
            if id not in dict.keys():
                dict[id] = cls
            else:
                raise EOFError('The same ID can only appear once')
        return dict
    
    def text_read(self, file):
        with open(file, 'r') as f:
            lines = f.readlines()
            for i, line in enumerate(lines):
                lines[i] = line.strip('\n')
        return lines
        
    def _pre_operate(self):
            image_file = os.path.join('./cub', 'CUB_200_2011/images.txt')
            split_file = os.path.join('./cub', 'CUB_200_2011/train_test_split.txt')
            class_file = os.path.join('./cub', 'CUB_200_2011/image_class_labels.txt')
            id2image = self.list2dict(self.text_read(image_file))
            id2train = self.list2dict(self.text_read(split_file))  # 1: train images; 0: test iamges
            id2class = self.list2dict(self.text_read(class_file))
            train_idx = []
            test_idx = []
            for k in sorted(id2train.keys()):
                if id2train[k] == '1':
                    train_idx.append(k)
                else:
                    test_idx.append(k)

            self.data = []
            self.targets = []
            self.data2label = {}
            if self.train:
                for k in train_idx:
                    image_path = os.path.join('./cub', 'CUB_200_2011/images', id2image[k])
                    self.data.append(image_path)
                    self.targets.append(int(id2class[k]) - 1)
                    self.data2label[image_path] = (int(id2class[k]) - 1)

            else:
                for k in test_idx:
                    image_path = os.path.join('./cub', 'CUB_200_2011/images', id2image[k])
                    self.data.append(image_path)
                    self.targets.append(int(id2class[k]) - 1)
                    self.data2label[image_path] = (int(id2class[k]) - 1)
                    
    def __getitem__(self, item):
        if self.base_sess:
            img = self.imgs
            target = self.labels
            
            return img, target
        else:
            if self.split == "train" and self.phase == "train" and self.n_base_support_samples > 0:
                    assert self.n_base_support_samples > 0
                    # These samples will be stored in memory for every episode.
                    support_xs = []
                    support_ys = []
                    if self.fix_seed:
                        np.random.seed(item)
                    cls_sampled = np.random.choice(self.classes, len(self.classes), False)
                    
                    for idx, cls in enumerate(np.sort(cls_sampled)):
                        imgs = np.asarray(self.data[cls]).astype('uint8')
                        support_xs_ids_sampled = np.random.choice(range(imgs.shape[0]),
                                                                  self.n_base_support_samples,
                                                                  False)
                        support_xs.append(imgs[support_xs_ids_sampled])
                        support_ys.append([cls] * self.n_base_support_samples)    
                    support_xs, support_ys = np.array(support_xs), np.array(support_ys)
                    num_ways, n_queries_per_way, height, width, channel = support_xs.shape
                    support_xs = support_xs.reshape((-1, height, width, channel))
                    if self.n_base_aug_support_samples > 1:
                        support_xs = np.tile(support_xs, (self.n_base_aug_support_samples, 1, 1, 1))
                        support_ys = np.tile(support_ys.reshape((-1, )), (self.n_base_aug_support_samples))
                    support_xs = np.split(support_xs, support_xs.shape[0], axis=0)
                    support_xs = torch.stack(list(map(lambda x: self.train_transform(x.squeeze()), support_xs)))

                    # Dummy query.
                    query_xs = support_xs
                    query_ys = support_ys
            else:
            
                if self.fix_seed:
                    np.random.seed(item)

                #몇개로 나눌지(cub는 의미 없음)
                """BytesWarning
                if self.disjoint_classes:
                    cls_sampled = self.classes[:self.n_ways] # 
                    self.classes = self.classes[self.n_ways:]
                else:
                    cls_sampled = np.random.choice(self.classes, self.n_ways, False)
                """
                cls_sampled = self.targets

                support_xs = []
                support_ys = []
                query_xs = []
                query_ys = []
                for idx, cls in enumerate(np.sort(cls_sampled)):
                    #support_xs_ids_sampled = np.random.choice(range(imgs.shape[0]), self.n_shots, False)
                    support_xs.append(self.imgs)
                    #support_xs.append(imgs[support_xs_ids_sampled])
                    lbl = idx
                    if self.eval_mode in ["few-shot-incremental-fine-tune"]:
                        lbl = cls
                    support_ys.append([lbl] * self.n_shots) #

                    #query_xs_ids = np.setxor1d(np.arange(imgs.shape[0]), support_xs_ids_sampled)
                    #query_xs_ids = np.random.choice(query_xs_ids, self.n_queries, False)
                    query_xs.append(self.imgs)
                    #query_xs.append(imgs[query_xs_ids])
                    query_ys.append([lbl] * 30) #

                support_xs, support_ys, query_xs, query_ys = np.array(support_xs), np.array(support_ys), np.array(query_xs), np.array(query_ys)
                num_ways, n_queries_per_way, height, width, channel = query_xs.shape

                query_xs = query_xs.reshape((num_ways * n_queries_per_way, height, width, channel))
                query_ys = query_ys.reshape((num_ways * n_queries_per_way, ))

                support_xs = support_xs.reshape((-1, height, width, channel))
                """
                if self.n_aug_support_samples > 1:
                    support_xs = np.tile(support_xs, (self.n_aug_support_samples, 1, 1, 1))
                    support_ys = np.tile(support_ys.reshape((-1, )), (self.n_aug_support_samples))
                """
                support_xs = np.split(support_xs, support_xs.shape[0], axis=0)
                query_xs = query_xs.reshape((-1, height, width, channel))
                query_xs = np.split(query_xs, query_xs.shape[0], axis=0)

                support_xs = torch.stack(list(map(lambda x: self.train_transform(x.squeeze()), support_xs)))
                query_xs = torch.stack(list(map(lambda x: self.test_transform(x.squeeze()), query_xs)))

        return support_xs.float(), support_ys, query_xs.float(), query_ys
            

In [272]:
type(meta_valloader.dataset.data)

dict

In [273]:
support_xs_ids_sampled = np.random.choice(range(imgs.shape[0]), 5, False)

In [264]:
support_xs = []
support_xs.append(imgs[support_xs_ids_sampled])

In [266]:
len(support_xs[0])

5

In [268]:
support_ys = []
lbl = 1
lbl = 16
support_ys.append([lbl] * 5) #

In [269]:
support_ys

[[16, 16, 16, 16, 16]]

In [260]:
support_xs_ids_sampled

array([  58,  589,  545,  457, 1126])

In [136]:
k = CUB()

In [162]:
t = CUB(train=False)

TypeError: 'NoneType' object is not iterable

In [35]:
k._pre_operate()

In [144]:
len(set(k.targets))

100

In [None]:
#data에 담긴 path로 이미지 저장
#30개씩 train이므로 이걸 원본의 data[img]에 넣고
#label에도 맞게 넣어준다
#그다음 cat2label에도 맞춰 넣어주고
#이름이랑 매칭시켜줌 -> classname label
#그럼 img에는 원래대로 이미지가 들어가 있게 됨
#원본에서는 split, phase에 따라 구분하고 있음
#split = train & train 에는 base img -> base가 댐
#split = train & val 에는 test를 넣는다..?
# => base 학습 완료(class 100까지)
#split = train & test 에도 test를 넣음?
#split = val => novel 임
#split = val 은 train/test 구분이 없는데?
#아마도 random choice 해서 그런듯
#이거 대신 val train/test를 구분할 필요가 있음
#따라서 val의 train img, test img로 코드를 바꿔야 함
#sample을 고정하는 코드를 찾아가서 수정하면 될 것
#이렇게 하면 cub 데이터 적용이 완료됨


#EfficientNetB0에 대해서는 좀더 고민해야 함
#pretrained 모델 사용에 있어 코드를 잘 살펴봐야함

In [169]:
k.data2label[k.data[35]]

1

In [177]:
k.data[0]

'./cub/CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0041_796108.jpg'

In [235]:
test_im = Image.open(k.data[0])

In [236]:
test_im_array = np.array(test_im)

In [237]:
test_im_array.shape

(500, 500, 3)

In [186]:
    mean_t = [120.39586422 / 255.0, 115.59361427 / 255.0, 104.54012653 / 255.0]
    std_t = [70.68188272 / 255.0, 68.27635443 / 255.0, 72.54505529 / 255.0]

In [188]:
normalize = transforms.Normalize(mean_t, std_t)

In [238]:
transform_test = transforms.Compose([
                    lambda x: Image.fromarray(x),
                    transforms.RandomCrop(84, padding=8),
                    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
                    transforms.RandomHorizontalFlip(),
                    lambda x: np.asarray(x),
                    transforms.ToTensor(),
                    normalize])

In [239]:
imgs_test = transform_test(test_im_array).reshape(84,84,3)

In [240]:
imgs_test.shape

torch.Size([84, 84, 3])

In [222]:
imgs_test.reshape(84,84,3)

tensor([[[-0.4159, -0.8545, -1.0242],
         [-1.0808, -0.9677, -0.7554],
         [-0.4159, -0.1895, -0.5008],
         ...,
         [ 0.0368,  0.2774,  0.2915],
         [ 0.2066, -0.0056, -0.2603],
         [-0.4725, -0.5998, -0.6140]],

        [[-0.9394, -1.0101, -0.8403],
         [-0.4159,  0.1925, -0.0197],
         [-0.2603, -0.5149, -0.7271],
         ...,
         [-0.7979, -1.0808, -1.0950],
         [-0.9394, -0.6281, -0.2886],
         [-0.2037, -0.1895, -0.1612]],

        [[-0.3310, -0.3169, -0.3169],
         [-0.2178, -0.3452, -0.4017],
         [-0.3452, -0.3452, -0.3169],
         ...,
         [-0.2178, -0.1046,  0.0510],
         [ 0.1642,  0.2774,  0.3057],
         [ 0.2208,  0.0934,  0.0510]],

        ...,

        [[ 0.6818,  0.8196,  0.9437],
         [ 0.8334,  0.9713,  0.9850],
         [ 0.8885,  0.6680,  0.6542],
         ...,
         [ 0.9437,  1.0264,  0.9299],
         [ 0.8885,  0.8472,  0.8748],
         [ 0.9437,  0.9988,  0.9437]],

        [[

In [245]:
img_list = []
img_list.append(imgs_test.numpy())

In [246]:
img_list_np = np.array(img_list)

In [247]:
img_list_np.shape

(1, 84, 84, 3)

In [243]:
img_list_np.reshape(1,84,84,3)

array([[[[-1.7033483 , -1.7033483 , -1.7033483 ],
         [-1.7033483 , -1.7033483 , -0.9959533 ],
         [-0.64225596, -0.61396015, -0.9535096 ],
         ...,
         [-0.28855854, -0.31685433, -0.0763401 ],
         [-0.16122748, -0.30270645, -0.26026276],
         [-0.4300375 , -0.7837349 , -0.9252139 ]],

        [[-1.7033483 , -1.7033483 , -1.7033483 ],
         [-1.7033483 , -1.7033483 , -1.7033483 ],
         [-1.7033483 , -1.0949886 , -1.1657281 ],
         ...,
         [-1.1091365 , -1.0383971 , -0.98180544],
         [-1.0949886 , -1.3213551 , -1.3637987 ],
         [-1.2506155 , -1.179876  , -1.0949886 ]],

        [[-1.7033483 , -1.7033483 , -1.7033483 ],
         [-1.7033483 , -1.7033483 , -1.7033483 ],
         [-1.7033483 , -1.7033483 , -1.6326088 ],
         ...,
         [-0.3875938 , -0.37344593, -0.35929802],
         [-0.18952326, -0.1329317 , -0.090488  ],
         [ 0.06513885,  0.00854727, -0.06219221]],

        ...,

        [[-0.73802584, -0.7104568 , -0

In [270]:
(base_test_loader.dataset.data)

{}

In [250]:
base_test_loader.dataset.cat2label

{'n04509417': 33,
 'n07697537': 38,
 'n03924679': 23,
 'n02089867': 5,
 'n03888605': 22,
 'n04435653': 31,
 'n02823428': 12,
 'n03337140': 16,
 'n02687172': 10,
 'n02108089': 7,
 'n13133613': 40,
 'n01704323': 2,
 'n01532829': 0,
 'n02108915': 8,
 'n03220513': 15,
 'n03047690': 13,
 'n02457408': 9,
 'n01843383': 4,
 'n03207743': 14,
 'n02105505': 6,
 'n04251144': 26,
 'n04389033': 30,
 'n01558993': 1,
 'n04515003': 34,
 'n03998194': 24,
 'n04258138': 27,
 'n02795169': 11,
 'n03400231': 17,
 'n01770081': 3,
 'n03527444': 18,
 'n07747607': 39,
 'n04443257': 32,
 'n04275548': 28,
 'n04596742': 35,
 'n04612504': 37,
 'n04604644': 36,
 'n04243546': 25,
 'n03676483': 19,
 'n03854065': 21,
 'n04296562': 29,
 'n03838899': 20,
 'n03075370': 42,
 'n02950826': 50,
 'n03770439': 44,
 'n02091244': 45,
 'n02114548': 46,
 'n03417042': 47,
 'n03535780': 41,
 'n03773504': 49,
 'n02981792': 43,
 'n03584254': 48,
 'n02110341': 51,
 'n02219486': 52,
 'n02871525': 56,
 'n02099601': 53,
 'n03775546': 54,
 '

In [249]:
base_test_loader.dataset.label2human

['house finch',
 'robin',
 'triceratops',
 'harvestman',
 'toucan',
 'walker hound',
 'komondor',
 'boxer',
 'french bulldog',
 'three-toed sloth',
 'aircraft carrier',
 'barrel',
 'beer bottle',
 'clog',
 'dishrag',
 'dome',
 'file',
 'frying pan',
 'holster',
 'lipstick',
 'oboe',
 'organ',
 'parallel bars',
 'photocopier',
 'prayer rug',
 'slot',
 'snorkel',
 'solar dish',
 'spider web',
 'stage',
 'tank',
 'tile roof',
 'tobacco shop',
 'unicycle',
 'upright',
 'wok',
 'worm fence',
 'yawl',
 'hotdog',
 'orange',
 'ear',
 'horizontal bar',
 'combination lock',
 'catamaran',
 'miniskirt',
 'ibizan hound',
 'white wolf',
 'garbage truck',
 'ipod',
 'missile',
 'cannon',
 'dalmatian',
 'ant',
 'golden retriever',
 'mixing bowl',
 'malamute',
 'bookshop',
 'crate',
 'hourglass',
 'trifle',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '']

In [291]:
label2human =[""] *201
with open(os.path.join('./cub/CUB_200_2011/', 'classes.txt'), 'r') as f:
        for line in f.readlines():
            catname, humanname = line.strip().lower().split(' ')
            num,humanname = humanname.strip().lower().split('.')
            humanname = " ".join(humanname.split('_'))
            if int(catname) in range(1,201):
                label2human[int(catname)]= humanname

In [293]:
label2human[200]

'common yellowthroat'

In [323]:
basec = np.sort(np.arange(100))
        
# Create mapping for base classes as they are not consecutive anymore.
basec_map = dict(zip(basec, np.arange(len(basec))))

In [24]:
def list2dict(list):
    dict = {}
    for l in list:
        s = l.split(' ')
        id = int(s[0])
        cls = s[1]
        if id not in dict.keys():
            dict[id] = cls
        else:
            raise EOFError('The same ID can only appear once')
    return dict

def text_read( file):
    with open(file, 'r') as f:
        lines = f.readlines()
        for i, line in enumerate(lines):
            lines[i] = line.strip('\n')
    return lines

In [311]:
image_file = os.path.join('./cub', 'CUB_200_2011/images.txt')
split_file = os.path.join('./cub', 'CUB_200_2011/train_test_split.txt')
class_file = os.path.join('./cub', 'CUB_200_2011/image_class_labels.txt')
id2image = list2dict(text_read(image_file))
id2train = list2dict(text_read(split_file))  # 1: train images; 0: test iamges
id2class = list2dict(text_read(class_file))
train_idx = []
test_idx = []
for k in sorted(id2train.keys()):
    if id2train[k] == '1':
        train_idx.append(k)
    else:
        test_idx.append(k)

data = []
targets = []
data2label = {}
if True:
    for k in train_idx:
        image_path = os.path.join('./cub', 'CUB_200_2011/images', id2image[k])
        data.append(image_path)
        targets.append(int(id2class[k]) - 1)
        data2label[image_path] = (int(id2class[k]) - 1)

else:
    for k in test_idx:
        image_path = os.path.join('./cub', 'CUB_200_2011/images', id2image[k])
        self.data.append(image_path)
        self.targets.append(int(id2class[k]) - 1)
        self.data2label[image_path] = (int(id2class[k]) - 1)

In [2]:
image_path = './cub/'+ 'CUB_200_2011/images/'+ "test.txt"

In [4]:
index_path = 1

In [6]:
index = open('./cub/CUB_200_2011/index_list/session_'+ str(index_path) + '.txt').read().splitlines()

In [7]:
index

['CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0041_796108.jpg',
 'CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0038_212.jpg',
 'CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0060_796076.jpg',
 'CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0077_796114.jpg',
 'CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0017_796098.jpg',
 'CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0032_796115.jpg',
 'CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0007_796138.jpg',
 'CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0031_100.jpg',
 'CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0039_796132.jpg',
 'CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0079_796122.jpg',
 'CUB_200_2011/images/001.Black_footed_Albatross/Black_Footed_Albatross_0036_796127.jpg',
 'CUB_200_2011/i

In [139]:
import os
import pickle
from PIL import Image
import numpy as np
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import re

# torch.multiprocessing.set_sharing_strategy('file_system')
class cub200(Dataset):
    #index_path -> txt_path, 
    #index -> base size
    def __init__(self, args,root='./cub', train=True,
                 index_path=None, index=None, base_sess=None,transform=None,):
        super(Dataset, self).__init__()
        self.root = root
        self.base_sess = base_sess
        self.transform = transform
        self.index_path = index_path
        self.index = index

        self.train = train  # training set or test set
        self._pre_operate()
        self.mean = [120.39586422 / 255.0, 115.59361427 / 255.0, 104.54012653 / 255.0]
        self.std = [70.68188272 / 255.0, 68.27635443 / 255.0, 72.54505529 / 255.0]
        self.normalize = transforms.Normalize(mean=self.mean, std=self.std)
        self.unnormalize = transforms.Normalize(mean=-np.array(self.mean)/self.std, std=1/np.array(self.std))

        if transform is None:
            if self.base_sess == True:
                self.transform = transforms.Compose([
                    lambda x: Image.fromarray(x),transforms.Resize(256),
                    transforms.RandomCrop(224),
                    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
                    transforms.RandomHorizontalFlip(),
                    lambda x: np.asarray(x),
                    transforms.ToTensor(),
                    self.normalize
                ])
            else:
                self.transform = transforms.Compose([
                    lambda x: Image.fromarray(x),
                    transforms.ToTensor(),
                    self.normalize
                ])
        else:
            self.transform = transform

        if self.train:
            # self.data, self.targets = self.SelectfromTxt(self.data2label, index_path)
            #base는 100까지 따라서 index = 100
            if base_sess:
                self.data, self.targets = self.SelectfromClasses(self.data, self.targets, index)
            #novel session에 대한 세션 정보 줘야함
            else:
                self.data, self.targets = self.SelectfromTxt(self.data2label, index_path)
        else:
            if base_sess:
                
                self.data, self.targets = self.SelectfromClasses(self.data, self.targets, index)
            else:
                #modifying
                self.data, self.targets = self.SelectfromTxt(self.data, self.targets, index_path)
                       
            #HSJ self.labels
        self.labels = self.targets
        self.imgs = self._getImg(self.data)
            #HSJ self.imgs
        #HSJ LABELTOHUMAN

        # Labels are available by codes by default. Converting them into human readable labels.
        self.label2human =[""] *200
        with open('./cub/CUB_200_2011/' +'classes.txt', 'r') as f:
            for line in f.readlines():
                catname, humanname = line.strip().lower().split(' ')
                num,humanname = humanname.strip().lower().split('.')
                humanname = " ".join(humanname.split('_'))
                if int(catname) in range(1,200):
                    self.label2human[int(catname)-1]= humanname
        #HSJ LABELTOHUMAN

        #HSJ basec_map
        basec = np.sort(np.arange(200))
                
        # Create mapping for base classes as they are not consecutive anymore.
        basec_map = dict(zip(basec, np.arange(len(basec))))
        #HSJ basec_map

    def _getImg(self,d_list):
        img_list = []
        for d_path in d_list:
            c_img = Image.open(d_path).convert('RGB')
            c_img = np.array(c_img)
            c_img_transformed = self.transform(c_img)
            img_list.append(c_img_transformed.numpy())
        img_list_np = np.array(img_list)
        return img_list_np

        
    def SelectfromTxt(self, data2label, index_path):
        index = open('./cub/CUB_200_2011/index_list/session_'+ str(index_path) + '.txt').read().splitlines()
        data_tmp = []
        targets_tmp = []
        for i in index:
            img_path = self.root + str(i)
            data_tmp.append(img_path)
            targets_tmp.append(data2label[img_path])

        return data_tmp, targets_tmp

    def SelectfromClasses(self, data, targets, index):
        data_tmp = []
        targets_tmp = []
        for i in range(1,index+1):
            ind_cl = np.where(i == targets)[0]
            for j in ind_cl:
                data_tmp.append(data[j])
                targets_tmp.append(targets[j])

        return data_tmp, targets_tmp
    def list2dict(self, list):
        dict = {}
        for l in list:
            s = l.split(' ')
            id = int(s[0])
            cls = s[1]
            if id not in dict.keys():
                dict[id] = cls
            else:
                raise EOFError('The same ID can only appear once')
        return dict
    
    def text_read(self, file):
        with open(file, 'r') as f:
            lines = f.readlines()
            for i, line in enumerate(lines):
                lines[i] = line.strip('\n')
        return lines
        
    def _pre_operate(self):
            image_file = './cub/'+ 'CUB_200_2011/images.txt'
            split_file = './cub/'+ 'CUB_200_2011/train_test_split.txt'
            class_file = './cub/'+ 'CUB_200_2011/image_class_labels.txt'
            id2image = self.list2dict(self.text_read(image_file))
            id2train = self.list2dict(self.text_read(split_file))  # 1: train images; 0: test iamges
            id2class = self.list2dict(self.text_read(class_file))
            train_idx = []
            test_idx = []
            for k in sorted(id2train.keys()):
                if id2train[k] == '1':
                    train_idx.append(k)
                else:
                    test_idx.append(k)

            self.data = []
            self.targets = []
            self.data2label = {}
            if self.train:
                for k in train_idx:
                    image_path = './cub/'+ 'CUB_200_2011/images/'+ str(id2image[k])
                    self.data.append(image_path)
                    self.targets.append(int(id2class[k]) - 1)
                    self.data2label[image_path] = (int(id2class[k]) - 1)

            else:
                for k in test_idx:
                    image_path = './cub/'+ 'CUB_200_2011/images/'+ str(id2image[k])
                    self.data.append(image_path)
                    self.targets.append(int(id2class[k]) - 1)
                    self.data2label[image_path] = (int(id2class[k]) - 1)
            self.targets = np.array(self.targets)
                    
    def __getitem__(self, item):
        if self.base_sess:
            img = self.imgs[item]
            target = self.targets[item] - min(self.labels)
            
            return img, target, item
        else:
            if self.split == "train" and self.phase == "train" and self.n_base_support_samples > 0:
                    assert self.n_base_support_samples > 0
                    # These samples will be stored in memory for every episode.
                    support_xs = []
                    support_ys = []
                    if self.fix_seed:
                        np.random.seed(item)
                    cls_sampled = np.random.choice(self.classes, len(self.classes), False)
                    
                    for idx, cls in enumerate(np.sort(cls_sampled)):
                        imgs = np.asarray(self.data[cls]).astype('uint8')
                        support_xs_ids_sampled = np.random.choice(range(imgs.shape[0]),
                                                                  self.n_base_support_samples,
                                                                  False)
                        support_xs.append(imgs[support_xs_ids_sampled])
                        support_ys.append([cls] * self.n_base_support_samples)    
                    support_xs, support_ys = np.array(support_xs), np.array(support_ys)
                    num_ways, n_queries_per_way, height, width, channel = support_xs.shape
                    support_xs = support_xs.reshape((-1, height, width, channel))
                    if self.n_base_aug_support_samples > 1:
                        support_xs = np.tile(support_xs, (self.n_base_aug_support_samples, 1, 1, 1))
                        support_ys = np.tile(support_ys.reshape((-1, )), (self.n_base_aug_support_samples))
                    support_xs = np.split(support_xs, support_xs.shape[0], axis=0)
                    support_xs = torch.stack(list(map(lambda x: self.train_transform(x.squeeze()), support_xs)))

                    # Dummy query.
                    query_xs = support_xs
                    query_ys = support_ys
            else:
            
                if self.fix_seed:
                    np.random.seed(item)

                #몇개로 나눌지(cub는 의미 없음)
                """BytesWarning
                if self.disjoint_classes:
                    cls_sampled = self.classes[:self.n_ways] # 
                    self.classes = self.classes[self.n_ways:]
                else:
                    cls_sampled = np.random.choice(self.classes, self.n_ways, False)
                """
                cls_sampled = self.targets

                support_xs = []
                support_ys = []
                query_xs = []
                query_ys = []
                for idx, cls in enumerate(np.sort(cls_sampled)):
                    #support_xs_ids_sampled = np.random.choice(range(imgs.shape[0]), self.n_shots, False)
                    support_xs.append(self.imgs)
                    #support_xs.append(imgs[support_xs_ids_sampled])
                    lbl = idx
                    if self.eval_mode in ["few-shot-incremental-fine-tune"]:
                        lbl = cls
                    support_ys.append([lbl] * self.n_shots) #

                    #query_xs_ids = np.setxor1d(np.arange(imgs.shape[0]), support_xs_ids_sampled)
                    #query_xs_ids = np.random.choice(query_xs_ids, self.n_queries, False)
                    query_xs.append(self.imgs)
                    #query_xs.append(imgs[query_xs_ids])
                    query_ys.append([lbl] * 30) #

                support_xs, support_ys, query_xs, query_ys = np.array(support_xs), np.array(support_ys), np.array(query_xs), np.array(query_ys)
                num_ways, n_queries_per_way, height, width, channel = query_xs.shape

                query_xs = query_xs.reshape((num_ways * n_queries_per_way, height, width, channel))
                query_ys = query_ys.reshape((num_ways * n_queries_per_way, ))

                support_xs = support_xs.reshape((-1, height, width, channel))
                """
                if self.n_aug_support_samples > 1:
                    support_xs = np.tile(support_xs, (self.n_aug_support_samples, 1, 1, 1))
                    support_ys = np.tile(support_ys.reshape((-1, )), (self.n_aug_support_samples))
                """
                support_xs = np.split(support_xs, support_xs.shape[0], axis=0)
                query_xs = query_xs.reshape((-1, height, width, channel))
                query_xs = np.split(query_xs, query_xs.shape[0], axis=0)

                support_xs = torch.stack(list(map(lambda x: self.train_transform(x.squeeze()), support_xs)))
                query_xs = torch.stack(list(map(lambda x: self.test_transform(x.squeeze()), query_xs)))

        return support_xs.float(), support_ys, query_xs.float(), query_ys
            

    def __len__(self):
        return len(self.targets)

In [84]:
def _getImg(d_list):
    img_list = []
    for d_path in d_list:
        c_img = Image.open(d_path).convert('RGB')
        c_img
        c_img_transformed = k.transform(c_img).reshape(84,84,3)
        img_list.append(c_img_transformed.numpy())
    img_list_np = np.array(img_list)
    return img_list_np


In [131]:
k = cub200(args=args, base_sess = True, train=False,index = 100, index_path = 1)

In [113]:
from torch.utils.data import DataLoader

In [140]:
val_loader = DataLoader(cub200(args=args, base_sess = True, train=False, index = 100, index_path = 1),
                               64, shuffle=False, drop_last=False,
                                num_workers=4)

In [126]:
val_loader.dataset

100

In [164]:
len(val_loader.dataset.targets)

2854

In [163]:
len(train_loader.dataset.labels)

30000

In [146]:
ikt = 0
for idx, (input, target, _) in enumerate(val_loader):
    print(len(input))


64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
64
38


In [167]:
len(set(val_loader.dataset.label2human))

200

In [104]:
img_list = []
for i in k.data:
    c_img = Image.open(k.data[0]).convert('RGB')
    c_img = np.array(c_img)
    c_img_transformed = k.transform(c_img).reshape(84,84,3)
    img_list.append(c_img_transformed.numpy())

In [None]:
val_loader.dataset.

In [172]:
train_loader.dataset.basec_map

{0: 0,
 1: 1,
 2: 2,
 4: 3,
 5: 4,
 8: 5,
 11: 6,
 12: 7,
 14: 8,
 19: 9,
 21: 10,
 23: 11,
 24: 12,
 27: 13,
 29: 14,
 30: 15,
 31: 16,
 33: 17,
 35: 18,
 36: 19,
 37: 20,
 38: 21,
 39: 22,
 41: 23,
 42: 24,
 44: 25,
 45: 26,
 46: 27,
 47: 28,
 48: 29,
 49: 30,
 50: 31,
 51: 32,
 52: 33,
 53: 34,
 54: 35,
 55: 36,
 56: 37,
 59: 38,
 60: 39,
 63: 40,
 64: 41,
 65: 42,
 66: 43,
 68: 44,
 69: 45,
 70: 46,
 72: 47,
 74: 48,
 76: 49,
 77: 50,
 80: 51,
 82: 52,
 87: 53,
 88: 54,
 89: 55,
 92: 56,
 93: 57,
 94: 58,
 96: 59}

In [178]:
val_loader.dataset.label2human[100:]

['white pelican',
 'western wood pewee',
 'sayornis',
 'american pipit',
 'whip poor will',
 'horned puffin',
 'common raven',
 'white necked raven',
 'american redstart',
 'geococcyx',
 'loggerhead shrike',
 'great grey shrike',
 'baird sparrow',
 'black throated sparrow',
 'brewer sparrow',
 'chipping sparrow',
 'clay colored sparrow',
 'house sparrow',
 'field sparrow',
 'fox sparrow',
 'grasshopper sparrow',
 'harris sparrow',
 'henslow sparrow',
 'le conte sparrow',
 'lincoln sparrow',
 'nelson sharp tailed sparrow',
 'savannah sparrow',
 'seaside sparrow',
 'song sparrow',
 'tree sparrow',
 'vesper sparrow',
 'white crowned sparrow',
 'white throated sparrow',
 'cape glossy starling',
 'bank swallow',
 'barn swallow',
 'cliff swallow',
 'tree swallow',
 'scarlet tanager',
 'summer tanager',
 'artic tern',
 'black tern',
 'caspian tern',
 'common tern',
 'elegant tern',
 'forsters tern',
 'least tern',
 'green tailed towhee',
 'brown thrasher',
 'sage thrasher',
 'black capped vir

In [102]:
k.transform(c_img).reshape(84,84,3)

tensor([[[-0.9677, -1.0384, -0.5715],
         [-0.8120, -0.9252, -0.7837],
         [-1.0950, -1.3214, -0.4725],
         ...,
         [-1.1374, -1.3497, -1.1374],
         [-0.7413, -1.2365, -0.9960],
         [-0.9818, -0.9252, -0.7130]],

        [[-1.2365, -0.6847, -0.8545],
         [-0.7979, -0.5715, -0.6564],
         [-0.6847, -0.6423, -0.8828],
         ...,
         [-1.1799, -1.3921, -1.4204],
         [-1.2931, -0.7554, -0.8545],
         [-1.0950, -0.7979, -0.7413]],

        [[-0.6140, -0.7413, -0.3734],
         [-0.3310, -0.7554, -0.6281],
         [-0.4725, -0.3027, -0.6847],
         ...,
         [-1.4204, -1.1233, -1.1799],
         [-1.1940, -1.2082, -1.1233],
         [-1.1657, -1.0384, -1.0808]],

        ...,

        [[ 0.0201,  0.2131, -0.7518],
         [-0.3934,  0.1580, -0.1453],
         [-0.6691, -0.4210, -0.1177],
         ...,
         [-1.3032, -1.3170, -1.3721],
         [-1.1929, -0.9172, -1.0826],
         [-1.3308, -1.2067, -0.9448]],

        [[

In [85]:
_getImg(k.data)

AttributeError: __array_interface__

In [33]:
k._pre_operate()

In [35]:
k.targets

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,


In [36]:
k.SelectfromClasses(k.data,k.targets,100)

([], [])

In [48]:
len(k.data)

5994

In [65]:
for i in range(1,101):
    k.targets = np.array(k.targets) 
    ind_cl = np.where(i==k.targets)[0]
    for j in ind_cl:
        print('catch')

catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catch
catc

In [179]:
label2human =[""] *200
with open('./cub/CUB_200_2011/' +'classes.txt', 'r') as f:
    for line in f.readlines():
        catname, humanname = line.strip().lower().split(' ')
        num,humanname = humanname.strip().lower().split('.')
        humanname = " ".join(humanname.split('_'))
        if int(catname) in range(1,201):
            label2human[int(catname)-1]= humanname

In [185]:
label2human[199]

'common yellowthroat'

numpy.ndarray

In [155]:
train_loader = DataLoader(ImageNet(args=args, split="train", phase="train"),
                                64, shuffle=True, drop_last=True,
                                num_workers=5)

In [158]:
ikt = 0
for idx, (input, target, _) in enumerate(val_loader):
    ikt = ikt + 1
print(ikt)


45


In [27]:
image_file = './cub/'+ 'CUB_200_2011/images.txt'
split_file = './cub/'+ 'CUB_200_2011/train_test_split.txt'
class_file = './cub/'+ 'CUB_200_2011/image_class_labels.txt'
id2image = list2dict(text_read(image_file))
id2train = list2dict(text_read(split_file))  # 1: train images; 0: test iamges
id2class = list2dict(text_read(class_file))
train_idx = []
test_idx = []
for k in sorted(id2train.keys()):
    if id2train[k] == '1':
        train_idx.append(k)
    else:
        test_idx.append(k)

data = []
targets = []
data2label = {}
if False:
    for k in train_idx:
        image_path = os.path.join('./cub', 'CUB_200_2011/images', id2image[k])
        data.append(image_path)
        targets.append(int(id2class[k]) - 1)
        data2label[image_path] = (int(id2class[k]) - 1)

else:
    for k in test_idx:
        image_path = os.path.join('./cub', 'CUB_200_2011/images', id2image[k])
        data.append(image_path)
        targets.append(int(id2class[k]) - 1)
        data2label[image_path] = (int(id2class[k]) - 1)

In [29]:
k

AttributeError: 'int' object has no attribute 'data'

i