In [1]:
import sys
import os
import math
import random
import heapq 
import time
import copy
import numpy as np
import pandas as pd
from functools import reduce
from scipy.spatial.distance import pdist
from PIL import Image
import matplotlib.pyplot as plt
import cv2
#import faiss
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.cuda.set_device(7)
print (torch.cuda.current_device())

7


In [2]:
#1. Read data with List storage Data:[name,type],I:[img],Y[type]
def TypetoNum(itype): #map the type into number.
    if itype =='AMD': return 0
    elif itype =='DR': return 1
    elif itype =='glaucoma': return 2
    elif itype =='myopia': return 3
    else: return 4 #norm
    
root_dir = '/data/fjsdata/fundus/iSee/iSee_multi_dataset/' #the path of images
trainset = pd.read_csv("/data/fjsdata/fundus/iSee/iSee_multi_dataset/CBIR_iSee_train.csv" , sep=',')#load trainset
testset = pd.read_csv("/data/fjsdata/fundus/iSee/iSee_multi_dataset/CBIR_iSee_test.csv" , sep=',')#load testset
tstart = time.time()
#read train image with CV
trData, trI, trY = [],[],[]
norm = 699
for iname, itype in np.array(trainset).tolist():
    if iname.endswith(".jpg"):
        try:
            image_dir = root_dir+'img_data_%s'%itype
            image_path = os.path.join(image_dir, iname)
            if itype == 'norm':
                if norm>0:
                    img = cv2.resize(cv2.imread(image_path).astype(np.float32), (1024, 1024))#(1920,1920,3)->(1024,1024,3)
                    trData.append([iname,itype])
                    trI.append(img)
                    trY.append(TypetoNum(itype))
                    norm = norm - 1
            else:
                img = cv2.resize(cv2.imread(image_path).astype(np.float32), (1024, 1024))#(1920,1920,3)->(1024,1024,3)
                trData.append([iname,itype,img])
                trI.append(img)
                trY.append(TypetoNum(itype))    
        except:
            print(iname+":"+str(image_path))
        sys.stdout.write('\r{} / {} '.format(len(trData),trainset.shape[0]))
        sys.stdout.flush()
print('The length of train set is %d'%len(trData))
#read test image with CV
teData, teI, teY = [],[],[]
norm = 77
for iname, itype in np.array(testset).tolist():
    if iname.endswith(".jpg"):
        try:
            image_dir = root_dir+'img_data_%s'%itype
            image_path = os.path.join(image_dir, iname)
            if itype == 'norm':
                if norm>0:
                    img = cv2.resize(cv2.imread(image_path).astype(np.float32), (1024, 1024))#(1920,1920,3)->(1024,1024,3)
                    teData.append([iname,itype])
                    teI.append(img)
                    teY.append(TypetoNum(itype))
                    norm = norm - 1
            else:
                img = cv2.resize(cv2.imread(image_path).astype(np.float32), (1024, 1024))#(1920,1920,3)->(1024,1024,3)
                teData.append([iname,itype,img])
                teI.append(img)
                teY.append(TypetoNum(itype)) 
        except:
            print(iname+":"+str(image_path))
        sys.stdout.write('\r{} / {} '.format(len(teData),testset.shape[0]))
        sys.stdout.flush()
print('The length of test set is %d'%len(teData))
elapsed = time.time() - tstart    
print('Completed buliding index in %d seconds' % int(elapsed))

2706 / 9000 The length of train set is 2706
300 / 1000 The length of test set is 300
Completed buliding index in 217 seconds


In [4]:
#2. define Attention-based Hashing network with pytorch
class SpatialAttention(nn.Module):#spatial attention layer
    def __init__(self):
        super(SpatialAttention, self).__init__()

        self.conv1 = nn.Conv2d(2, 1, kernel_size=3, padding=1, bias=False)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)
    
class AttHashNet(nn.Module): #Attention-based Hashint Network:AHNet
    def __init__(self,inChannels=3):
        super(AttHashNet, self).__init__()
        #(channels, Height, Width)
        #layer1: Convolution, (3,1024,1024)->(16,512,512)
        self.conv1 = nn.Conv2d(in_channels=inChannels, out_channels=16, kernel_size=3, padding=1, stride=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU(inplace=True)
        #layer2: max pooling,(16,512,512)->(16,256,256)
        self.maxpool = nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
        self.bn2 = nn.BatchNorm2d(16)
        #layer3: Spatial Attention Layer, (16,256,256)->(16,256,256)
        self.sa = SpatialAttention()
        #layer4: Convolution, (16,256,256)->(8,128,128)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1, stride=2)
        self.bn3 = nn.BatchNorm2d(8)
        self.relu2 = nn.ReLU(inplace=True)
        #layer5: mean pooling, (8,128,128)->(8,64,64)
        self.avgpool1 = nn.AvgPool2d(kernel_size=3, padding=1, stride=2)
        self.bn4 = nn.BatchNorm2d(8)
        #layer6: Convolution, (8,64,64)->(4,32,32)
        self.conv3 = nn.Conv2d(in_channels=8, out_channels=4, kernel_size=3, padding=1, stride=2)
        self.bn5 = nn.BatchNorm2d(4)
        self.relu3 = nn.ReLU(inplace=True)
        #layer7: mean pooling, (4,32,32)->(4,16,16)
        self.avgpool2 = nn.AvgPool2d(kernel_size=3, padding=1, stride=2)
        self.bn6 = nn.BatchNorm2d(4)
        #layer8: fully connected, 4*16*16->512
        self.fcl1 = nn.Linear(4*16*16,512)
        self.relu4 = nn.ReLU(inplace=True)
        #layer9: Hashing layer, 512->16
        self.fcl2 = nn.Linear(512,16)#
        self.tanh = nn.Tanh() #{-1,1}
              
    def forward(self,x):
        #input: (batch_size, in_channels, Height, Width)
        #output: (batch_size, out_channels, Height, Width)
        #layer1: convolution
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        #layer2: max pooling
        x = self.maxpool(x)
        x = self.bn2(x)
        #layer3: Attention
        x = self.sa(x)*x
        #layer4: Convolution
        x = self.conv2(x)
        x = self.bn3(x)
        x = self.relu2(x)
        #layer5: mean pooling
        x = self.avgpool1(x)
        x = self.bn4(x)
        #layer6: Convolution
        x = self.conv3(x)
        x = self.bn5(x)
        x = self.relu3(x)
        #layer7: mean pooling
        x = self.avgpool2(x)
        x = self.bn6(x)
        #layer8:fully connected
        x = x.view(x.size(0),-1) #transfer three dims to one dim
        x = self.fcl1(x)
        x = self.relu4(x)
        #layer9: Hashing layer
        x = self.fcl2(x)
        x = self.tanh(x)
        
        return x
    
#https://pytorch-cn.readthedocs.io/zh/latest/    
#https://github.com/filipradenovic/cnnimageretrieval-pytorch/blob/master/cirtorch/layers/functional.py
class HashLossFunc(nn.Module):
    def __init__(self, margin=0.5, alpha=0.01):
        super(HashLossFunc, self).__init__()
        self.alpha = alpha #regularization
        self.margin = margin #margin threshold
        
    def forward(self,h1,h2,y): 
        #h1=h2:NxD,y:N
        dim = h1.shape[1]
        euc_dist = F.pairwise_distance(h1, h2, p=2, eps=1e-06) # Calcualte Euclidean Distance
        sim_term = 0.5*(1-y)*euc_dist #penalize the similar iamge pairs when y=0
        unsim_term = 0.5*y*torch.clamp(self.margin*dim-euc_dist,0)#penalize the unsimlar image pairs when y =1
        reg_term = self.alpha * ( torch.sum((torch.abs(h1)-1),dim=1) + torch.sum((torch.abs(h2)-1),dim=1) ) #regularization term
        #loss = torch.mean(sim_term + unsim_term + reg_term) 
        loss = torch.sum(sim_term + unsim_term+ reg_term) 
        return loss

#test network: valid
x1 = torch.rand(10,3,1024,1024)#.cuda()
x2 = torch.rand(10,3,1024,1024)#.cuda()
y = torch.FloatTensor([0,1,1,0,1,0,0,0,1,1])#.cuda()
model = AttHashNet()#.cuda()
criterion  = HashLossFunc(margin=0.5)#.cuda() #define loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
for epoch in range(10):
    optimizer.zero_grad()
    
    out1 = model(x1)#out.grad_fn
    out2 = model(x2)
    loss = criterion(out1,out2,y)
    print (loss.item())
    loss.backward()
    optimizer.step()
    #observe the variant of model.parameters
    for i in model.named_parameters():
        print(i[0])
        print(i[1][0][0][0])
        break
#output
x3 = torch.rand(10,3,1024,1024)#.cuda()
out3 = model(x3)
print (out3)
out3 = torch.sign(out3) #Binarization,[-1,1]->{-1,1}
print (out3)
print (out3.size())

16.973474502563477
conv1.weight
tensor([-0.0891, -0.1904, -0.1932])
14.561971664428711
conv1.weight
tensor([-0.0900, -0.1895, -0.1939])
11.177960395812988
conv1.weight
tensor([-0.0909, -0.1885, -0.1944])
7.792870998382568
conv1.weight
tensor([-0.0918, -0.1876, -0.1943])
6.397558212280273
conv1.weight
tensor([-0.0926, -0.1869, -0.1947])
5.35432243347168
conv1.weight
tensor([-0.0933, -0.1862, -0.1953])
3.8476147651672363
conv1.weight
tensor([-0.0940, -0.1854, -0.1959])
2.394160270690918
conv1.weight
tensor([-0.0948, -0.1846, -0.1961])
2.3385796546936035
conv1.weight
tensor([-0.0956, -0.1839, -0.1963])
1.7661316394805908
conv1.weight
tensor([-0.0962, -0.1833, -0.1966])
tensor([[-0.0483,  0.1939, -0.0687, -0.0665,  0.0502, -0.1055, -0.2227,
          0.1499, -0.3042,  0.0651,  0.2311,  0.1104,  0.2581, -0.6016,
         -0.4715,  0.3705],
        [ 0.0878, -0.1001,  0.2517, -0.2332, -0.3573,  0.0679,  0.0597,
         -0.2617,  0.3333,  0.1726, -0.1621, -0.0552, -0.1382,  0.2058,
         

In [6]:
#3. Train and evaluate model 
def onlineGenImgPairs(batchSize):
    idx_sf = random.sample(range(0, len(trY)),2*batchSize)
    trI1_sf, trI2_sf, trY1_sf, trY2_sf = [],[],[],[]
    flag = 0
    for i in idx_sf:
        if flag==0:
            trI1_sf.append(trI[i])
            trY1_sf.append(trY[i])
            flag =1
        else:
            trI2_sf.append(trI[i])
            trY2_sf.append(trY[i])
            flag =0
    trY_sf = np.where((np.array(trY1_sf)-np.array(trY2_sf))!=0,1,0)
    return np.array(trI1_sf),np.array(trI2_sf),trY_sf
        
#define model
model = AttHashNet().cuda()
criterion  = HashLossFunc(margin=0.5).cuda() #define loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
#train model
batchSize = 10
epochSize = len(trY)//batchSize+1
losses = []
for epoch in range(2*epochSize):#iteration
    #grad vanish
    optimizer.zero_grad() 
    #genenate training images pair
    trI1_sf, trI2_sf, trY_sf = onlineGenImgPairs(batchSize)
    I1_batch = torch.from_numpy(trI1_sf).type(torch.FloatTensor).cuda()
    I2_batch = torch.from_numpy(trI2_sf).type(torch.FloatTensor).cuda()
    Y_batch = torch.from_numpy(trY_sf).type(torch.FloatTensor).cuda()
    #forword
    X1_batch = model(I1_batch.permute(0, 3, 1, 2))#permute the dims of matrix
    X2_batch = model(I2_batch.permute(0, 3, 1, 2))
    #binary-like loss
    loss = criterion(X1_batch,X2_batch,Y_batch)
    #backward
    loss.backward()
    #update parameters
    optimizer.step()
    #show loss
    sys.stdout.write('\r {} / {} : loss = {}'.format(epoch, 2*epochSize, float('%0.6f'%loss.item())))
    sys.stdout.flush()     
    losses.append(loss.item())
print("mean_loss = %.6f" % (np.mean(losses)))

#release gpu memory
#model = model.cpu()
#torch.cuda.empty_cache()
#hash code of train data from model
batchSize = 10
num_batches = len(trI) // batchSize
trF = []
for i in range(num_batches):
    min_idx = i * batchSize
    max_idx = np.min([len(trI), (i+1)*batchSize])
    I_batch = torch.from_numpy(np.array(trI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
    X_batch = torch.sign(model(I_batch.permute(0, 3, 1, 2)))#forword
    I_batch = I_batch.cpu()
    X_batch = X_batch.cpu()
    torch.cuda.empty_cache()#release gpu memory
    trF.extend(X_batch.data.numpy().tolist())
    sys.stdout.write('\r {} / {} '.format(i, num_batches))
    sys.stdout.flush()
    
#hash code of test data from model
teF = []
num_batches = len(teI) // batchSize
for i in range(num_batches):
    min_idx = i * batchSize
    max_idx = np.min([len(teI), (i+1)*batchSize])
    I_batch = torch.from_numpy(np.array(teI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
    X_batch = torch.sign(model(I_batch.permute(0, 3, 1, 2)))#forword
    I_batch = I_batch.cpu()
    X_batch = X_batch.cpu()
    torch.cuda.empty_cache()#release gpu memory
    teF.extend(X_batch.data.numpy().tolist())
    sys.stdout.write('\r {} / {} '.format(i, num_batches))
    sys.stdout.flush()
    
#Evaluate model
#train data with list: trData, trI, trF, trY
#test data with list: teData, teI, teF, teY
for topk in [5,10,15,20]:
    MHR = [] #mean Hit ratio 
    MAP = [] #mean average precision
    MRR = [] #mean reciprocal rank
    for i, teVal in enumerate(teF):
        stype = teY[i]
        map_item_score = {}
        for j, trVal in enumerate(trF):
            map_item_score[j] = pdist(np.vstack([teVal,trVal]),'hamming')
        ranklist = heapq.nsmallest(topk, map_item_score, key=map_item_score.get)
        #perfromance
        pos_len = 0
        rank_len = 0
        mrr_flag = 0
        for j in ranklist:
            dtype = trY[j]
            rank_len=rank_len+1
            if stype==dtype:  #hit
                MHR.append(1)
                pos_len = pos_len +1
                MAP.append(pos_len/rank_len) 
                if mrr_flag==0: 
                    MRR.append(pos_len/rank_len)
                    mrr_flag =1
            else: 
                MHR.append(0)
                MAP.append(0)   
    print("mHR@{}={:.6f}, mAP@{}={:.6f}, mRR@{}={:.6f}".format(topk,np.mean(MHR),topk,np.mean(MAP), topk, np.mean(MRR)))

 541 / 542 : loss = 12.958289mean_loss = 17.267654
 29 / 30 0 mHR@5=0.248667, mAP@5=0.231767, mRR@5=0.827257
mHR@10=0.229667, mAP@10=0.207737, mRR@10=0.721920
mHR@15=0.225556, mAP@15=0.196997, mRR@15=0.679519
mHR@20=0.221833, mAP@20=0.184072, mRR@20=0.579847
