In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as dataset
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from copy import deepcopy as d_copy
import random
from torch.utils.tensorboard import SummaryWriter

In [2]:
print("===INFO===")
print("torch ver : %s\ntorchvision ver : %s " %(torch.__version__, torchvision.__version__))
print("GPU model :",torch.cuda.get_device_name(0))
writer = SummaryWriter("./runs/fc1_retrain_5e-1")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

===INFO===
torch ver : 1.8.0
torchvision ver : 0.2.2 
GPU model : TITAN RTX


In [3]:
def set_randomness(seed=0):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

def get_dataset(num_train,batch_size,
                dataset_path,retrain_model_path):
    if os.path.isdir(retrain_model_path) is False:
        # make folder
        os.mkdir(retrain_model_path)
        print("retrain model path created :",os.listdir(retrain_model_path+"../"))
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    transforms_train = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    transforms_test = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])
    train_dataset = dataset.ImageFolder(root=dataset_path+"train",
                                       transform=transforms_train)
    subset_train_dataset,_ = torch.utils.data.random_split(train_dataset, 
                                        [num_train,len(train_dataset)-num_train])
    test_dataset = dataset.ImageFolder(root=dataset_path+"val",
                                       transform=transforms_test)
    
    train_dataloader = torch.utils.data.DataLoader(subset_train_dataset,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        num_workers=4) # for using subset
    test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        num_workers=4)
    print("train dataset[%d], test dataset[%d] are loaded"%(len(train_dataloader),len(test_dataloader)))
    return train_dataloader,test_dataloader

In [4]:
seed=0
dataset_path = "/media/2/Network/Imagenet_dup/"
retrain_model_path = "/media/0/Network/0821_to_fullmodels/"
batch_size = 32
num_train= 128000
error_index = 128
num_error = 64

In [5]:
set_randomness(seed)
vgg16 = torchvision.models.vgg16(pretrained=True).to(device)
#vgg16

In [6]:
optimizer = torch.optim.SGD(vgg16.parameters(),lr=5e-1,weight_decay=1e-4)
loss_fn = nn.CrossEntropyLoss()

log_file = "./fc1_retrain.txt"
f = open(log_file,"w")
f.close()

In [7]:
train_dataloader,test_dataloader = get_dataset(num_train,batch_size, dataset_path,retrain_model_path)

train dataset[4000], test dataset[1563] are loaded


In [8]:
original_features = []
error_features = []
file_idx = 0
#import h5py
F_PATH = "/media/1/Conv5-1_E128~192_Feature/"
def error_injection(name):
    def hook(model,input):
        global error_index, num_error, original_features
        start = error_index
        end = error_index + num_error
        #original_features.append(input.cpu())
        #normalize = nn.BatchNorm2d(512)
        input[0][0][:, start:end]=0
        #error_features.append(input.cpu())
        #input = normalize(input)
        
    return hook
def hook_register(model):
    for name,layers in model.named_modules():
        #print(name,layer)
        for idx,layer in enumerate(layers.features):
            #print(idx,layer)
            if idx == 24 and isinstance(layer, torch.nn.modules.conv.Conv2d) :
                print("input",name,layer) # target layer Conv5_1
                layer.register_forward_pre_hook(error_injection(name))
        break

In [9]:
# evaluation phasetraining
def eval(model,dataloader,epoch,loss_fn,batch_size,
         log_file,TensorBoardWriter):
    
    
    model.cuda()
    model.eval()
    total = 0
    correct =0
    total_loss =0.0
    with torch.no_grad():
        print("======eval start=======")
        for i, data in enumerate(dataloader):
            inputs,labels = data
            inputs,labels = inputs.cuda(), labels.cuda()
        
            y_hat = model(inputs)
            
            _, predicted = torch.max(y_hat, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            loss = loss_fn(y_hat,labels)
            total_loss +=loss.item()
            
            if(i%200 == 199):
                print("step : %d / %d acc : %.3f"
                      %(i + 1,int(len(dataloader)), correct*100/total))
                #print(".",end="")
        print("")
    acc = 100*correct/total
    #print(total_loss, len(dataloader))
    avg_loss = total_loss / (len(dataloader)*batch_size)
    print("Eval acc of model on imagenet : %.4f %%, Loss : %.4f" %(acc,avg_loss))
    f = open(log_file,"a")
    print("Eval acc of model on imagenet : %.4f %%, Loss : %.4f" %(acc,avg_loss),file=f)
    f.close()
    TensorBoardWriter.add_scalar("Fc1_Retrain/ACC_EVAL",acc,epoch)
    TensorBoardWriter.add_scalar("Fc1_Retrain/LOSS_EVAL",avg_loss,epoch)
    print("======eval  end ======")  
    return acc
def test_eval(model,dataloader,epoch,loss_fn,batch_size,
         log_file):
    
    
    model.cuda()
    model.eval()
    total = 0
    correct =0
    total_loss =0.0
    with torch.no_grad():
        print("======eval start=======")
        for i, data in enumerate(dataloader):
            inputs,labels = data
            inputs,labels = inputs.cuda(), labels.cuda()
        
            y_hat = model(inputs)
            
            _, predicted = torch.max(y_hat, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            loss = loss_fn(y_hat,labels)
            total_loss +=loss.item()
            
            if(i%200 == 199):
                print("step : %d / %d acc : %.3f"
                      %(i + 1,int(len(dataloader)), correct*100/total))
                #print(".",end="")
        print("")
    acc = 100*correct/total
    #print(total_loss, len(dataloader))
    avg_loss = total_loss / (len(dataloader)*batch_size)
    print("Eval acc of model on imagenet : %.4f %%, Loss : %.4f" %(acc,avg_loss))
    print("======eval  end ======")  
    return acc

In [10]:
acc = test_eval(vgg16,test_dataloader,-1,loss_fn,batch_size,log_file)
print("original acc : %.3f" % (acc))

step : 200 / 1563 acc : 71.938
step : 400 / 1563 acc : 71.867
step : 600 / 1563 acc : 71.776
step : 800 / 1563 acc : 71.750
step : 1000 / 1563 acc : 71.547
step : 1200 / 1563 acc : 71.516
step : 1400 / 1563 acc : 71.529

Eval acc of model on imagenet : 71.5920 %, Loss : 0.0358
original acc : 71.592


In [11]:
hook_register(vgg16)

input  Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [12]:
acc = test_eval(vgg16,test_dataloader,-1,loss_fn,batch_size,log_file)
print("original acc with pkt error : %.3f" % (acc))

step : 200 / 1563 acc : 71.484
step : 400 / 1563 acc : 71.477
step : 600 / 1563 acc : 71.464
step : 800 / 1563 acc : 71.586
step : 1000 / 1563 acc : 71.603
step : 1200 / 1563 acc : 71.669
step : 1400 / 1563 acc : 71.672

Eval acc of model on imagenet : 71.5920 %, Loss : 0.0358
original acc with pkt error : 71.592


In [13]:
for param in vgg16.parameters():
    param.requires_grad = False

In [14]:
for name,layers in vgg16.named_modules():
    for idx,layer in enumerate(layers.features):
        #print(idx,layer)
        if idx == 24:
            #print(idx,layer)
            for param in layer.parameters():
                #print(param[0])
                nn.init.normal_(param) # conv5-1 init
                #print(param[0])
                param.requires_grad = True
    break
        

In [15]:
for param in vgg16.parameters():
    print(param.size(),param.requires_grad)

torch.Size([64, 3, 3, 3]) False
torch.Size([64]) False
torch.Size([64, 64, 3, 3]) False
torch.Size([64]) False
torch.Size([128, 64, 3, 3]) False
torch.Size([128]) False
torch.Size([128, 128, 3, 3]) False
torch.Size([128]) False
torch.Size([256, 128, 3, 3]) False
torch.Size([256]) False
torch.Size([256, 256, 3, 3]) False
torch.Size([256]) False
torch.Size([256, 256, 3, 3]) False
torch.Size([256]) False
torch.Size([512, 256, 3, 3]) False
torch.Size([512]) False
torch.Size([512, 512, 3, 3]) False
torch.Size([512]) False
torch.Size([512, 512, 3, 3]) False
torch.Size([512]) False
torch.Size([512, 512, 3, 3]) True
torch.Size([512]) True
torch.Size([512, 512, 3, 3]) False
torch.Size([512]) False
torch.Size([512, 512, 3, 3]) False
torch.Size([512]) False
torch.Size([4096, 25088]) False
torch.Size([4096]) False
torch.Size([4096, 4096]) False
torch.Size([4096]) False
torch.Size([1000, 4096]) False
torch.Size([1000]) False


In [16]:
acc = test_eval(vgg16,test_dataloader,0,loss_fn,batch_size,log_file)
print("after init acc with pkt error : %.3f" % (acc))

step : 200 / 1563 acc : 0.141
step : 400 / 1563 acc : 0.141
step : 600 / 1563 acc : 0.120
step : 800 / 1563 acc : 0.145
step : 1000 / 1563 acc : 0.138
step : 1200 / 1563 acc : 0.130
step : 1400 / 1563 acc : 0.129

Eval acc of model on imagenet : 0.1320 %, Loss : 1.9768
after init acc with pkt error : 0.132


In [None]:
# training
max_epochs = 80
for epoch in range(1,max_epochs+1):
    print("=== epoch %d ==="%(epoch))
    total = 0
    correct = 0
    running_loss = 0.0
    total_loss = []
    total_avg_loss = 0.0
    vgg16.train()
    for i, data in enumerate(train_dataloader):
        inputs, labels = data
        inputs,labels = inputs.to(device), labels.to(device)
        y_hat = vgg16(inputs)
        _,predictd = torch.max(y_hat,1)
        
        #print(labels.size()[0])
        total += labels.size()[0]
        correct +=(predictd == labels).sum().item()
        
        loss = loss_fn(y_hat, labels)
        running_loss += loss.item()
        vgg16.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 200 == 199: 
            total_loss.append(running_loss/200)
            #print("")
            print('[%d, %5d] loss: %.6f' % (epoch, i+1, running_loss/(200*batch_size) )) 
            running_loss = 0.0
    #print("")
    if len(total_loss) != 0:
        total_avg_loss = sum(total_loss)/(len(train_dataloader)*batch_size)
    acc = 100*correct/total
    #avg_loss = total_loss / (len(train_dataloader)*batch_size)
    print("==epoch %d ==  train acc : %.4f" %(epoch,acc))
    f = open(log_file,"a")
    print("==epoch %d ==  train acc : %.4f" %(epoch,acc),file =f)
    f.close()
    writer.add_scalar("Fc1_Retrain/ACC_TRAIN",acc,epoch)
    writer.add_scalar("Fc1_Retrain/LOSS_TRAIN",total_avg_loss,epoch)
    acc = eval(vgg16,test_dataloader,epoch,loss_fn,batch_size,log_file,writer)

=== epoch 1 ===
[1,   200] loss: 0.295904
[1,   400] loss: 0.243193
[1,   600] loss: 0.233382
[1,   800] loss: 0.231703
[1,  1000] loss: 0.228593
[1,  1200] loss: 0.224917
[1,  1400] loss: 0.225739
[1,  1600] loss: 0.223347
[1,  1800] loss: 0.222322
[1,  2000] loss: 0.221255
[1,  2200] loss: 0.220169
[1,  2400] loss: 0.219828
[1,  2600] loss: 0.218811
[1,  2800] loss: 0.218092
[1,  3000] loss: 0.217383
[1,  3200] loss: 0.216850
[1,  3400] loss: 0.216310
[1,  3600] loss: 0.215297
[1,  3800] loss: 0.214680
[1,  4000] loss: 0.214003
==epoch 1 ==  train acc : 0.2422
step : 200 / 1563 acc : 0.641
step : 400 / 1563 acc : 0.617
step : 600 / 1563 acc : 0.583
step : 800 / 1563 acc : 0.637
step : 1000 / 1563 acc : 0.634
step : 1200 / 1563 acc : 0.628
step : 1400 / 1563 acc : 0.629

Eval acc of model on imagenet : 0.6360 %, Loss : 0.2077
=== epoch 2 ===
[2,   200] loss: 0.213250
[2,   400] loss: 0.212349
[2,   600] loss: 0.211384
[2,   800] loss: 0.211555
[2,  1000] loss: 0.210193
[2,  1200] loss

In [None]:
acc = test_eval(vgg16,test_dataloader,max_epochs+1,loss_fn,batch_size,log_file)
print("after train acc with pkt error : %.3f" % (acc))

In [None]:
from copy import deepcopy
print(original_features[0][0].size())
print(original_features[0][0][:,0:32].size())
tmp = deepcopy(original_features)

In [None]:
""" verfiying zeros
all_zeros = torch.zeros((32,32,14,14)).to(device)
#all_zeros.type(torch.cuda.FloatTensor)
tmp[0][0][:,0:32] = 0
print(all_zeros.type())
print(tmp[0][0][:,0:32].size(),all_zeros.size())
print(tmp[0][0].type())
if torch.equal(tmp[0][0][:,0:32], all_zeros):
    print("true")
else :
    print("not same ",tmp[0][0][:,0:32])
"""