In [1]:
from __future__ import print_function

import os
from syslog import LOG_MAIL
from urllib3 import Retry
import random
import numpy as np
import matplotlib.pyplot as plt

# torch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader

# 
import time
import collections.abc
from flow_trainer import FlowTrainer
from tqdm import tqdm
from config import argumentParse

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from dataloader_cifar import cifar_dataloader as dataloader
from PreResNet_cifar import *

wandb = None

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# name = 'siamese_lossNll_UNICON_Tf_1.0_0.7_w_FlowSp_T_0.5_center_0.8_lamba_f_0.1_fixJSD'
name = 'twoNet_lossNLL_wo_blur'
args = argumentParse(input_args=['--gpuid', '0,1', '--ratio', '0.9', '--config', './config/cifar100.yaml', '--name', name])
## GPU Setup 
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpuid
# torch.cuda.set_device(args.gpuid)
random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)

start to argument parse


In [3]:

## Download the Datasets
if args.dataset== 'cifar10':
    torchvision.datasets.CIFAR10(args.data_path,train=True, download=True)
    torchvision.datasets.CIFAR10(args.data_path,train=False, download=True)
else:
    torchvision.datasets.CIFAR100(args.data_path,train=True, download=True)
    torchvision.datasets.CIFAR100(args.data_path,train=False, download=True)


Files already downloaded and verified
Files already downloaded and verified


In [4]:
# folder = args.dataset + '_' + args.noise_mode + '_' + str(args.ratio)
folder = args.dataset + '_' + args.noise_mode + '_' + str(args.ratio)  + '_flow_' + args.name
model_save_loc = './checkpoint/' + folder

In [5]:
def create_model():
    model = ResNet18(num_classes=args.num_class, feature_dim=args.cond_size)
    model = model.cuda()
    return model

In [6]:
## Test Accuracy
def getFeature(net):
    net.eval()
    
    features = []
    labels = []
    # num_samples = 1000
    # correct = 0
    # total = 0
    # loss_x = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.cuda(), targets.cuda()
            feature, _ = net1(inputs)
            # print("feature ", feature.size())
            features.append(feature)
            labels.append(targets)
    features = torch.cat(features, dim=0)
    labels = torch.cat(labels, dim=0)
    print("features list", features.size())
    return features, labels

In [7]:
## Call the dataloader
loader = dataloader(args.dataset, r=args.ratio, noise_mode=args.noise_mode,batch_size=args.batch_size,num_workers=args.num_workers,\
            root_dir=model_save_loc, noise_file='%s/clean_%.4f_%s.npz'%(args.data_path,args.ratio, args.noise_mode))

In [8]:
print('| Building net')
net1 = create_model()
net2 = create_model()
cudnn.benchmark = True


# flow model
flowTrainer = FlowTrainer(args)
flowNet1 = flowTrainer.create_model()
flowNet2 = flowTrainer.create_model()

optimizer1 = optim.SGD(net1.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay)
optimizer2 = optim.SGD(net2.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) 

optimizerFlow1 = optim.SGD(flowNet1.parameters(), lr=args.lr_f, momentum=0.9, weight_decay=args.weight_decay)
optimizerFlow2 = optim.SGD(flowNet2.parameters(), lr=args.lr_f, momentum=0.9, weight_decay=args.weight_decay)

scheduler1 = optim.lr_scheduler.CosineAnnealingLR(optimizer1, args.num_epochs, args.lr / 1e2)
schedulerFlow1 = optim.lr_scheduler.CosineAnnealingLR(optimizerFlow1, args.num_epochs, args.lr_f / 1e2)
scheduler2 = optim.lr_scheduler.CosineAnnealingLR(optimizer2, args.num_epochs, args.lr / 1e2)
schedulerFlow2 = optim.lr_scheduler.CosineAnnealingLR(optimizerFlow2, args.num_epochs, args.lr_f / 1e2)

| Building net
Training T : 1.0
Number of trainable parameters of Point CNF: 391457
Training T : 1.0
Number of trainable parameters of Point CNF: 391457


In [9]:
def load_model(path, net, optimizer, scheduler):
    device = torch.device('cuda', torch.cuda.current_device())
    net_pth = torch.load(path, map_location=device)
    net.load_state_dict(net_pth['net'])
    # optimizer.load_state_dict(net_pth['optimizer'])
    # scheduler.load_state_dict(net_pth['scheduler'])

    model_epoch = net_pth['epoch']
    return model_epoch

In [10]:
## load warmup model
_ = load_model(os.path.join(model_save_loc, "Net_warmup_1.pth"), net1, optimizer1, scheduler1)
_ = load_model(os.path.join(model_save_loc, "Net_warmup_2.pth"), net2, optimizer2, scheduler2)
_ = load_model(os.path.join(model_save_loc, "FlowNet_warmup_1.pth"), flowNet1, optimizerFlow1, schedulerFlow1)
epoch = load_model(os.path.join(model_save_loc, "FlowNet_warmup_2.pth"), flowNet2, optimizerFlow2, schedulerFlow2)

In [11]:
# ## load warmup model
# _ = load_model(os.path.join(model_save_loc, "Net_1.pth"), net1, optimizer1, scheduler1)
# _ = load_model(os.path.join(model_save_loc, "Net_2.pth"), net2, optimizer2, scheduler2)
# _ = load_model(os.path.join(model_save_loc, "FlowNet_1.pth"), flowNet1, optimizerFlow1, schedulerFlow1)
# epoch = load_model(os.path.join(model_save_loc, "FlowNet_2.pth"), flowNet2, optimizerFlow2, schedulerFlow2)

In [12]:
# gpus
if len(args.gpuid) > 1:
    net1 = nn.DataParallel(net1)
    flowNet1 = nn.DataParallel(flowNet1)
    net2 = nn.DataParallel(net2)
    flowNet2 = nn.DataParallel(flowNet2)

In [13]:
test_loader = loader.run(0, 'val')
acc, confidence = flowTrainer.testByFlow(epoch, net1, flowNet1, net2, flowNet2, test_loader)
print("epoch : ", epoch, " acc : ", acc)


====Test====



10it [00:14,  1.46s/it]


| Test Epoch #28	 Accuracy: 12.87%	 Condifence: 0.03%

epoch :  28  acc :  12.87





In [14]:
features, labels = getFeature(net1)

features list torch.Size([10000, 512])


In [15]:
target_num = 10
labels_target = labels[labels <= target_num]
features_target = features[labels <= target_num]

In [16]:
from sklearn.manifold import TSNE

def tsne_reduce(data):
    tsne = TSNE(n_components=2, random_state=1)
    return tsne.fit_transform(data)

In [17]:
feature2D = tsne_reduce(features_target.cpu())

In [18]:
unique_labels = np.unique(labels_target.cpu())
data_by_label = [feature2D[labels_target.cpu() == label] for label in unique_labels]

In [19]:
import matplotlib.pyplot as plt

f = plt.figure()

colors = plt.cm.rainbow(np.linspace(0, 1, len(unique_labels)))
for label, color, data in zip(unique_labels, colors, data_by_label):
    plt.scatter(data[:, 0], data[:, 1], color=color, label=label, alpha=0.9, edgecolors='w')

plt.show()

f.savefig(f"{model_save_loc}/{args.dataset}_{args.noise_mode}_{str(args.ratio)}_{epoch}_t-sne.pdf", bbox_inches='tight')

In [20]:
eval_loader = loader.run(0, 'eval_train')   
JSDs = flowTrainer.Calculate_JSD(net1, flowNet1, net2, flowNet2, args.num_samples, eval_loader)

100it [00:37,  2.63it/s]


In [21]:
threshold = torch.mean(JSDs)
SR = torch.sum(JSDs<threshold).item()/args.num_samples

In [22]:
def logJSD(epoch, threshold, labeled_trainloader, unlabeled_trainloader):
    labeled_idx = labeled_trainloader.dataset.pred_idx
    unlabeled_idx = unlabeled_trainloader.dataset.pred_idx
    origin_prob =  labeled_trainloader.dataset.origin_prob
    labeled_prob = [origin_prob[i] for i in labeled_idx]
    unlabeled_prob = [origin_prob[i] for i in unlabeled_idx]
    sample_ratio = torch.sum(origin_prob<threshold).item()/args.num_samples

    num_cleanset, num_noiseset = len(labeled_trainloader.dataset), len(unlabeled_trainloader.dataset)
    num_wholeset = num_cleanset + num_noiseset

    cleanset_o_label, cleanset_n_label = labeled_trainloader.dataset.origin_label, labeled_trainloader.dataset.noise_label
    noiseset_o_label, noiseset_n_label = unlabeled_trainloader.dataset.origin_label, unlabeled_trainloader.dataset.noise_label

    cleanset_noise_mask = (cleanset_o_label != cleanset_n_label).astype(float)
    noiseset_noise_mask = (noiseset_o_label != noiseset_n_label).astype(float)
    
    num_cleanset_noise = cleanset_noise_mask.sum()
    num_noiseset_noise = noiseset_noise_mask.sum()
    num_noise = num_cleanset_noise + num_noiseset_noise

    num_cleanset_clean = num_cleanset - num_cleanset_noise
    num_noiseset_clean = num_noiseset - num_noiseset_noise
    num_clean = num_wholeset - num_noise

    eps = 1e-20
    clean_recall = num_cleanset_clean / (num_clean + eps)
    clean_precision = num_cleanset_clean / (num_cleanset + eps)
    clean_f1 = (2 * clean_recall * clean_precision) / (clean_recall + clean_precision + eps)

    noise_recall = num_noiseset_noise / (num_noise + eps)
    noise_precision = num_noiseset_noise / (num_noiseset + eps)
    noise_f1 = (2 * noise_recall * noise_precision) / (noise_recall + noise_precision + eps)

    # draw JSD dis
    clean_prob = []
    noise_prob = []
    clean_density = []
    noise_density = []
    for idx_noise_zip in [zip(labeled_idx, cleanset_noise_mask), zip(unlabeled_idx, noiseset_noise_mask)]:
        for idx, is_noise in idx_noise_zip:
            p = origin_prob[idx]
            if is_noise == 1.0:
                noise_prob.append(float(p))
            else:
                clean_prob.append(float(p))

    plt.clf()
    kwargs = dict(histtype='stepfilled', alpha=0.75, density=False, bins=20)
    plt.hist(clean_prob, color='green', range=(0., 1.), label='clean', **kwargs)
    plt.hist(noise_prob, color='red'  , range=(0., 1.), label='noisy', **kwargs)

    plt.axvline(x=threshold,          color='black')
    plt.axvline(x=origin_prob.mean(), color='gray')
    plt.xlabel('JSD Values')
    plt.ylabel('count')
    plt.title(f'JSD Distribution of N Samples epoch :{epoch}')
    plt.xlim(0, 1)
    plt.grid(True)
    plt.savefig(f"{model_save_loc}/{args.dataset}_{args.noise_mode}_{str(args.ratio)}_{epoch}_JSD.pdf", bbox_inches='tight')

In [23]:
labeled_trainloader, unlabeled_trainloader = loader.run(SR, 'train', prob= JSDs) # Uniform Selection

In [24]:
logJSD(epoch, threshold, labeled_trainloader, unlabeled_trainloader)