In [1]:
import sys
import os
import math
import random
import heapq 
import time
import copy
import gc
import numpy as np
import pandas as pd
from functools import reduce
from scipy.spatial.distance import pdist
from PIL import Image
import matplotlib.pyplot as plt
import cv2
#import faiss
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.cuda.set_device(5)
print (torch.cuda.current_device())

5


In [2]:
#1. Read data with List storage Data:[name,type],I:[img],Y[type]
root_dir = '/data/fjsdata/fundus/kaggle_DR/train/' #the path of images
trainset = pd.read_csv("/data/fjsdata/fundus/kaggle_DR/CBIR_train.csv" , sep=',')#load dataset
testset = pd.read_csv("/data/fjsdata/fundus/kaggle_DR/CBIR_test.csv" , sep=',')#load dataset
tstart = time.time()
#read train image with CV
trData, trI, trY = [],[],[]
for iname, itype in np.array(trainset).tolist():
    try:
        image_path = os.path.join(root_dir, iname+'.jpeg')
        img = cv2.resize(cv2.imread(image_path).astype(np.float32), (512, 512))#(1024,1024,3)->(512,512,3)
        trData.append([iname,itype])
        trI.append(img)
        trY.append(itype)  
    except:
        print(iname+":"+str(image_path))
    sys.stdout.write('\r{} / {} '.format(len(trData),trainset.shape[0]))
    sys.stdout.flush()
print('The length of train set is %d'%len(trData))
#read test image with CV
teData, teI, teY = [],[],[]
for iname, itype in np.array(testset).tolist():
    try:
        image_path = os.path.join(root_dir, iname+'.jpeg')
        img = cv2.resize(cv2.imread(image_path).astype(np.float32), (512, 512))#(1024,1024,3)->(512,512,3)
        teData.append([iname,itype])
        teI.append(img)
        teY.append(itype)  
    except:
        print(iname+":"+str(image_path))
    sys.stdout.write('\r{} / {} '.format(len(teData),testset.shape[0]))
    sys.stdout.flush()
print('The length of train set is %d'%len(teData))
elapsed = time.time() - tstart    
print('Completed buliding index in %d seconds' % int(elapsed))

7200 / 7200 The length of train set is 7200
800 / 800 The length of train set is 800
Completed buliding index in 2115 seconds


In [3]:
#2. define Attention-based Hashing network with pytorch
class SpatialAttention(nn.Module):#spatial attention layer
    def __init__(self):
        super(SpatialAttention, self).__init__()

        self.conv1 = nn.Conv2d(2, 1, kernel_size=3, padding=1, bias=False)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)
    
class AHNet(nn.Module): #deep Hashint Network:DHNet
    def __init__(self,inChannels=3):
        super(AHNet, self).__init__()
        #(channels, Height, Width)
        #layer1: Convolution, (3,512,512)->(8,256,256)
        self.conv1 = nn.Conv2d(in_channels=inChannels, out_channels=8, kernel_size=3, padding=1, stride=2)
        self.bn1 = nn.BatchNorm2d(8)
        self.relu1 = nn.ReLU(inplace=True)
        #layer2: max pooling,(8,256,256)->(8,128,128)
        self.maxpool = nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
        self.bn2 = nn.BatchNorm2d(8)
        #layer3: Spatial Attention Layer, (8,256,256)->(8,256,256)
        self.sa = SpatialAttention()
        #layer4: Convolution, (8,128,128)->(2,64,64)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=2, kernel_size=3, padding=1, stride=2)
        self.bn3 = nn.BatchNorm2d(2)
        self.relu2 = nn.ReLU(inplace=True)
        #layer5: mean pooling, (2,64,64)->(2,32,32)
        self.avgpool = nn.AvgPool2d(kernel_size=3, padding=1, stride=2)
        self.bn4 = nn.BatchNorm2d(2)
        #layer6: fully connected, 2*32*32->512
        self.fcl1 = nn.Linear(2*32*32,512)
        self.relu3 = nn.ReLU(inplace=True)
        #layer7: Hashing layer, 512->16
        self.fcl2 = nn.Linear(512,16)#
        self.tanh = nn.Tanh() #{-1,1}
              
    def forward(self,x):
        #input: (batch_size, in_channels, Height, Width)
        #output: (batch_size, out_channels, Height, Width)
        #layer1: convolution
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        #layer2: max pooling
        x = self.maxpool(x)
        x = self.bn2(x)
        #layer3: Attention
        x = self.sa(x)*x
        #layer4: Convolution
        x = self.conv2(x)
        x = self.bn3(x)
        x = self.relu2(x)
        #layer5: mean pooling
        x = self.avgpool(x)
        x = self.bn4(x)
        #layer6:fully connected
        x = x.view(x.size(0),-1) #transfer three dims to one dim
        x = self.fcl1(x)
        x = self.relu3(x)
        #layer7: Hashing layer
        x = self.fcl2(x)
        x = self.tanh(x)
                
        return x
    
#https://pytorch-cn.readthedocs.io/zh/latest/    
#https://github.com/filipradenovic/cnnimageretrieval-pytorch/blob/master/cirtorch/layers/functional.py
class HashLossFunc(nn.Module):
    def __init__(self, margin=0.5, alpha=0.01):
        super(HashLossFunc, self).__init__()
        self.alpha = alpha #regularization
        self.margin = margin #margin threshold
    
    def forward(self,h1,h2,y): 
        #h1=h2:NxD,y:N
        dim = h1.shape[1]
        euc_dist = F.pairwise_distance(h1, h2, p=2, eps=1e-06) # Calcualte Euclidean Distance
        sim_term = 0.5*(1-y)*euc_dist #penalize the similar iamge pairs when y=0
        unsim_term = 0.5*y*torch.clamp(self.margin*dim-euc_dist,0)#penalize the unsimlar image pairs when y =1
        reg_term = self.alpha * ( torch.sum((torch.abs(h1)-1),dim=1) + torch.sum((torch.abs(h2)-1),dim=1) ) #regularization term
        #loss = torch.mean(sim_term + unsim_term + reg_term) 
        loss = torch.sum(sim_term + unsim_term+ reg_term) 
        return loss

#test network: valid
x1 = torch.rand(10,3,512,512)#.cuda()
x2 = torch.rand(10,3,512,512)#.cuda()
y = torch.FloatTensor([0,1,1,0,1,0,0,0,1,1])#.cuda()
model = AHNet()#.cuda()
criterion  = HashLossFunc(margin=0.5)#.cuda() #define loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
for epoch in range(10):
    optimizer.zero_grad()
    
    out1 = model(x1)#out.grad_fn
    out2 = model(x2)
    loss = criterion(out1,out2,y)
    print (loss.item())
    loss.backward()
    optimizer.step()
    #observe the variant of model.parameters
    for i in model.named_parameters():
        print(i[0])
        print(i[1][0][0][0])
        break
#output
x3 = torch.rand(10,3,512,512)#.cuda()
out3 = model(x3)
print (out3)
out3 = torch.sign(out3) #Binarization,[-1,1]->{-1,1}
print (out3)
print (out3.size())
del x1,x2,x3,out1,out2,out3,model
gc.collect()

17.003007888793945
conv1.weight
tensor([-0.0124, -0.0430, -0.1587])
15.397676467895508
conv1.weight
tensor([-0.0132, -0.0440, -0.1592])
10.940499305725098
conv1.weight
tensor([-0.0140, -0.0449, -0.1599])
6.863186359405518
conv1.weight
tensor([-0.0147, -0.0456, -0.1608])
4.300360202789307
conv1.weight
tensor([-0.0152, -0.0462, -0.1614])
3.1618640422821045
conv1.weight
tensor([-0.0154, -0.0464, -0.1620])
2.2985761165618896
conv1.weight
tensor([-0.0154, -0.0462, -0.1625])
1.3906198740005493
conv1.weight
tensor([-0.0151, -0.0464, -0.1628])
0.9467839598655701
conv1.weight
tensor([-0.0151, -0.0464, -0.1633])
0.26750442385673523
conv1.weight
tensor([-0.0153, -0.0465, -0.1637])
tensor([[-0.1012,  0.4197, -0.3445,  0.3865,  0.6688,  0.4833,  0.5186,
         -0.1486,  0.0218,  0.5793, -0.0399,  0.1491, -0.4094,  0.0993,
         -0.0377,  0.0435],
        [-0.1433, -0.1996, -0.5023,  0.6747,  0.2354,  0.3245,  0.2907,
         -0.0835,  0.0879,  0.6794, -0.3931,  0.1807, -0.7247, -0.1588,
     

0

In [4]:
#3.train and evaluate model
def onlineGenImgPairs(batchSize):
    idx_sf = random.sample(range(0, len(trY)),2*batchSize)
    trI1_sf, trI2_sf, trY1_sf, trY2_sf = [],[],[],[]
    flag = 0
    for i in idx_sf:
        if flag==0:
            trI1_sf.append(trI[i])
            trY1_sf.append(trY[i])
            flag =1
        else:
            trI2_sf.append(trI[i])
            trY2_sf.append(trY[i])
            flag =0
    trY_sf = np.where((np.array(trY1_sf)-np.array(trY2_sf))!=0,1,0)
    return np.array(trI1_sf),np.array(trI2_sf),trY_sf
        
#define model
model = AHNet().cuda()
criterion  = HashLossFunc(margin=0.5).cuda() #define loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
#train model
best_net, best_loss = None, float('inf')
for epoch in range(2):#iteration:len(trY)/2
    batchSize = 10
    batches = len(trY)//batchSize
    losses = []
    for batch in range(batches):
        #grad vanish
        optimizer.zero_grad() 
        #genenate training images pair
        trI1_sf, trI2_sf, trY_sf = onlineGenImgPairs(batchSize)
        I1_batch = torch.from_numpy(trI1_sf).type(torch.FloatTensor).cuda()
        I2_batch = torch.from_numpy(trI2_sf).type(torch.FloatTensor).cuda()
        Y_batch = torch.from_numpy(trY_sf).type(torch.FloatTensor).cuda()
        #forword
        X1_batch = model(I1_batch.permute(0, 3, 1, 2))#permute the dims of matrix
        X2_batch = model(I2_batch.permute(0, 3, 1, 2))
        #binary-like loss
        loss = criterion(X1_batch,X2_batch,Y_batch)
        #backward
        loss.backward()
        #update parameters
        optimizer.step()
        #show loss
        sys.stdout.write('\r {} / {} : loss = {}'.format(batch+1, batches, float('%0.6f'%loss.item())))
        sys.stdout.flush()     
        losses.append(loss.item())
    print("Eopch: %5d mean_loss = %.6f" % (epoch + 1, np.mean(losses)))
    if np.mean(losses) < best_loss:
        best_loss = np.mean(losses)
        best_net = copy.deepcopy(model)
print("best_loss = %.6f" % (best_loss))
#release gpu memory
model = model.cpu()
torch.cuda.empty_cache()
#hash code of train data from model
batchSize = 10
num_batches = len(trI) // batchSize
trF = []
for i in range(num_batches):
    min_idx = i * batchSize
    max_idx = np.min([len(trI), (i+1)*batchSize])
    I_batch = torch.from_numpy(np.array(trI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
    X_batch = torch.sign(best_net(I_batch.permute(0, 3, 1, 2)))#forword
    I_batch = I_batch.cpu()
    X_batch = X_batch.cpu()
    torch.cuda.empty_cache()#release gpu memory
    trF.extend(X_batch.data.numpy().tolist())
    sys.stdout.write('\r {} / {} '.format(i, num_batches))
    sys.stdout.flush()
    
#hash code of test data from model
teF = []
num_batches = len(teI) // batchSize
for i in range(num_batches):
    min_idx = i * batchSize
    max_idx = np.min([len(teI), (i+1)*batchSize])
    I_batch = torch.from_numpy(np.array(teI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
    X_batch = torch.sign(best_net(I_batch.permute(0, 3, 1, 2)))#forword
    I_batch = I_batch.cpu()
    X_batch = X_batch.cpu()
    torch.cuda.empty_cache()#release gpu memory
    teF.extend(X_batch.data.numpy().tolist())
    sys.stdout.write('\r {} / {} '.format(i, num_batches))
    sys.stdout.flush()
    
#train data with list: trData, trI, trF, trY
#test data with list: teData, teI, teF, teY
for topk in [5,10,15,20]:
    MHR = [] #mean Hit ratio 
    MAP = [] #mean average precision
    MRR = [] #mean reciprocal rank
    for i, teVal in enumerate(teF):
        stype = teY[i]
        map_item_score = {}
        for j, trVal in enumerate(trF):
            map_item_score[j] = pdist(np.vstack([teVal,trVal]),'hamming')
        ranklist = heapq.nsmallest(topk, map_item_score, key=map_item_score.get)
        #perfromance
        pos_len = 0
        rank_len = 0
        mrr_flag = 0
        for j in ranklist:
            dtype = trY[j]
            rank_len=rank_len+1
            if stype==dtype:  #hit
                MHR.append(1)
                pos_len = pos_len +1
                MAP.append(pos_len/rank_len) 
                if mrr_flag==0: 
                    MRR.append(pos_len/rank_len)
                    mrr_flag =1
            else: 
                MHR.append(0)
                MAP.append(0)   
    print("mHR@{}={:.6f}, mAP@{}={:.6f}, mRR@{}={:.6f}".format(topk,np.mean(MHR),topk,np.mean(MAP), topk, np.mean(MRR)))

 720 / 720 : loss = 17.309586Eopch:     1 mean_loss = 20.511786
 720 / 720 : loss = 23.353271Eopch:     2 mean_loss = 20.166824
best_loss = 20.166824
 79 / 80 0 mHR@5=0.423500, mAP@5=0.422763, mRR@5=0.993169
mHR@10=0.425875, mAP@10=0.422137, mRR@10=0.968943
mHR@15=0.427083, mAP@15=0.422500, mRR@15=0.966402
mHR@20=0.426750, mAP@20=0.421302, mRR@20=0.953809
