In [6]:
import sys
import os
import math
import random
import heapq 
import time
import copy
import numpy as np
import pandas as pd
from functools import reduce
from PIL import Image
import matplotlib.pyplot as plt
import cv2
#import faiss
import torch
import torch.nn as nn
import torch.nn.functional as F
print (torch.cuda.is_available())
print (torch.version.cuda)
print (torch.cuda.get_device_name(torch.cuda.current_device()))
torch.cuda.set_device(6)
print (torch.cuda.current_device())

True
9.0.176
TITAN Xp
6


In [2]:
root_dir = '/data/fjsdata/fundus/iSee/iSee_multi_dataset/' #the path of images
trainset = pd.read_csv("/data/fjsdata/fundus/iSee/iSee_multi_dataset/CBIR_iSee_train.csv" , sep=',')#load trainset
testset = pd.read_csv("/data/fjsdata/fundus/iSee/iSee_multi_dataset/CBIR_iSee_test.csv" , sep=',')#load testset
tstart = time.time()
#read train image with CV
trData = []
for iname, itype in np.array(trainset).tolist():
    if iname.endswith(".jpg"):
        try:
            image_dir = root_dir+'img_data_%s'%itype
            image_path = os.path.join(image_dir, iname)
            img = cv2.resize(cv2.imread(image_path).astype(np.float32), (1024, 1024))#(1920,1920,3)->(1024,1024,3)
            trData.append([iname,itype,img])
        except:
            print(iname+":"+str(image_path))
        sys.stdout.write('\r{} / {} '.format(len(trData),trainset.shape[0]))
        sys.stdout.flush()
print('The length of train set is %d'%len(trData))
#read test image with CV
teData = []
for iname, itype in np.array(testset).tolist():
    if iname.endswith(".jpg"):
        try:
            image_dir = root_dir+'img_data_%s'%itype
            image_path = os.path.join(image_dir, iname)
            img = cv2.resize(cv2.imread(image_path), (1024, 1024))#(1920,1920,3)->(1024,1024,3)
            teData.append([iname,itype,img])
        except:
            print(iname+":"+str(image_path))
        sys.stdout.write('\r{} / {} '.format(len(teData),testset.shape[0]))
        sys.stdout.flush()
print('The length of test set is %d'%len(teData))
elapsed = time.time() - tstart    
print('Completed buliding index in %d seconds' % int(elapsed))

9000 / 9000 The length of train set is 9000
1000 / 1000 The length of test set is 1000
Completed buliding index in 1981 seconds


In [3]:
class SpatialAttention(nn.Module):#spatial attention layer
    def __init__(self):
        super(SpatialAttention, self).__init__()

        self.conv1 = nn.Conv2d(2, 1, kernel_size=3, padding=1, bias=False)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)
    
class AttHashNet(nn.Module): #Attention-based Hashint Network:AHNet
    def __init__(self,inChannels=3):
        super(AttHashNet, self).__init__()
        #(channels, Height, Width)
        #layer1: Convolution, (3,1024,1024)->(16,1024,1024)
        self.conv1 = nn.Conv2d(in_channels=inChannels, out_channels=16, kernel_size=3, padding=1, stride=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU(inplace=True)
        #layer2: max pooling,(16,1024,1024)->(16,512,512)
        self.maxpool = nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
        self.bn2 = nn.BatchNorm2d(16)
        #layer3: Spatial Attention Layer, (16,512,512)->(16,512,512)
        self.sa = SpatialAttention()
        #layer4: Convolution, (16,512,512)->(8,256,256)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1, stride=2)
        self.bn3 = nn.BatchNorm2d(8)
        self.relu2 = nn.ReLU(inplace=True)
        #layer5: mean pooling, (8,256,256)->(8,128,128)
        self.avgpool = nn.AvgPool2d(kernel_size=3, padding=1, stride=2)
        self.bn4 = nn.BatchNorm2d(8)
        #layer6: fully connected, (8,128,128)->(4*64*64)
        self.fcl = nn.Conv2d(in_channels = 8, out_channels=4, kernel_size=1, stride=2)
        self.bn5 = nn.BatchNorm2d(4)
        self.relu3 = nn.ReLU(inplace=True)
        #layer7: hash layer, binary-like，(4*64*64)->(1*32*32)
        self.hl = nn.Conv2d(in_channels = 4, out_channels=1, kernel_size=1, stride=2)
        self.bn6 = nn.BatchNorm2d(1)
        self.tanh = nn.Tanh()#hyberpolic tangent activation
              
    def forward(self,x):
        #input: (batch_size, in_channels, Height, Width)
        #output: (batch_size, out_channels, Height, Width)
        #layer1: convolution
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        #layer2: max pooling
        x = self.maxpool(x)
        x = self.bn2(x)
        #layer3: Attention
        x = self.sa(x)*x
        #layer4: Convolution
        x = self.conv2(x)
        x = self.bn3(x)
        x = self.relu2(x)
        #layer5: mean pooling
        x = self.avgpool(x)
        x = self.bn4(x)
        #layer6: fully connected
        x = self.fcl(x)
        x = self.bn5(x)
        x = self.relu3(x)
        #layer7: hash layer
        x = self.hl(x)
        x = self.bn6(x)
        x = self.tanh(x) 
        
        return x
    
#https://pytorch-cn.readthedocs.io/zh/latest/    
#https://github.com/filipradenovic/cnnimageretrieval-pytorch/blob/master/cirtorch/layers/functional.py
class HashLossFunc(nn.Module):
    def __init__(self,margin=0.5, alpha=0.01):
        super(HashLossFunc, self).__init__()
        self.alpha = alpha #regularization
        self.margin = margin #margin threshold
    
    def forward(self,h1,h2,y): 
        #h1=h2:NxD,y:N
        dim = h1.shape[1]*h1.shape[2]*h1.shape[3]
        h1 = h1.reshape(h1.shape[0],dim)
        h2 = h2.reshape(h2.shape[0],dim)
        euc_dist = F.pairwise_distance(h1, h2, p=2, eps=1e-06) # Calcualte Euclidean Distance
        sim_term = 0.5*(1-y)*euc_dist #penalize the similar iamge pairs when y=0
        unsim_term = 0.5*y*torch.clamp(self.margin*dim-euc_dist,0)#penalize the unsimlar image pairs when y =1
        reg_term = self.alpha * ( torch.sum((torch.abs(h1)-1),dim=1) + torch.sum((torch.abs(h2)-1),dim=1) ) #regularization term
        #loss = torch.mean(sim_term + unsim_term + reg_term) 
        loss = torch.sum(sim_term + unsim_term + reg_term) 
        return loss

In [14]:
#test network
x1 = torch.rand(10,3,1024,1024).cuda()
x2 = torch.rand(10,3,1024,1024).cuda()
y = torch.FloatTensor([0,1,1,0,1,0,0,0,1,1]).cuda()
model = AttHashNet().cuda()
out1 = model(x1)#out.grad_fn
out2 = model(x2)
criterion  = HashLossFunc().cuda() #define loss function
loss = criterion(out1,out2,y)
print (loss.item())
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
optimizer.zero_grad()
loss.backward()
optimizer.step()
x3 = torch.rand(10,3,1024,1024).cuda()
out3 = model(x3)
out3 = torch.sign(out3) #Binarization,[-1,1]->{-1,1}
#print (out3)

1162.296630859375


In [7]:
#train list to numpy
trData = np.array(trData) 
#define model
model = AttHashNet().cuda()
criterion  = HashLossFunc().cuda() #define loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
#train parameters
batchSize = 10
for epoch in range(10):#iteration
    #prepare train data
    I1,I2,Y =[],[],[] #list of training data
    for itype in ['AMD','DR','glaucoma','myopia','norm']:
        trType = np.asarray([x for x in trData if x[1]==itype ])
        trTypeList = trType.tolist() #numpy to list
        trNType = np.asarray([x for x in trData if x[1]!=itype ])
        trNTypeList = trNType.tolist()
        for x in trType:
            sim = random.sample(trTypeList, 1) 
            I1.append(x[2])
            I2.append(sim[0][2])
            Y.append(0) #sim
            unsim = random.sample(trNTypeList,1)
            I1.append(x[2])
            I2.append(unsim[0][2])
            Y.append(1) #unsim
        
    #train model
    num_batches = len(Y) // batchSize 
    losses = []
    for i in range(num_batches):
        min_idx = i * batchSize
        max_idx = np.min([len(Y), (i+1)*batchSize])
        I1_batch = torch.from_numpy(np.array(I1[min_idx: max_idx])).type(torch.FloatTensor).cuda()
        I2_batch = torch.from_numpy(np.array(I2[min_idx: max_idx])).type(torch.FloatTensor).cuda()
        Y_batch = torch.from_numpy(np.array(Y[min_idx: max_idx])).type(torch.FloatTensor).cuda()
        optimizer.zero_grad()
        #suitable for siamese network
        out1 = model(I1_batch.permute(0, 3, 1, 2))#adjust channel to the second
        out2 = model(I2_batch.permute(0, 3, 1, 2))#(100,1024,1024,3)->(100,3,1024,1024)
        loss = criterion(out1,out2,Y_batch)
        loss.backward()
        optimizer.step()
        sys.stdout.write('\r {} / {} : loss = {}'.format(i, num_batches, float('%0.6f'%loss.item())))
        sys.stdout.flush()     
        losses.append(loss.item())
    print("Eopch: %5d mean_loss = %.6f" % (epoch + 1, np.mean(losses)))

 1799 / 1801 : loss = 1074.900391

RuntimeError: number of dims don't match in permute

In [21]:
#define loss function with torch
#https://www.zhihu.com/question/66988664
import torch
import torch.nn.functional as F
from torch.nn.modules.loss import _Loss
from torch.autograd import Function

class DiceLoss(torch.nn.Module):
    def __init__(self):
        super(DiceLoss, self).__init__()
    
    def forward(self, input, target):        
        return -dice_coef(input, target) 
 
 
def dice_coef(input, target): 
    smooth = 1
    input_flat = input.view(-1)  
    target_flat = target.view(-1)
    intersection = input_flat * target_flat
    return (2 * intersection.sum() + smooth) / (input_flat.sum() + target_flat.sum() + smooth)

torch.set_grad_enabled(True)
x= torch.tensor([1.,1.,1.,1.],requires_grad=False)
w = torch.tensor([1.],requires_grad=True)
b = torch.tensor([1.],requires_grad=True)
target=torch.tensor([1.,0.,1.,0.],requires_grad=False)

for i in range(4):
    y=w*x+b
    diceloss = DiceLoss().cuda()
    loss=diceloss(y,target)

    optimizer = torch.optim.Adam([w,b], lr = 0.001)
    loss.backward()
    optimizer.step()

    print(loss)
    print(w,b)

tensor(-0.8182, grad_fn=<NegBackward>)
tensor([1.0010], requires_grad=True) tensor([1.0010], requires_grad=True)
tensor(-0.8183, grad_fn=<NegBackward>)
tensor([1.0020], requires_grad=True) tensor([1.0020], requires_grad=True)
tensor(-0.8184, grad_fn=<NegBackward>)
tensor([1.0030], requires_grad=True) tensor([1.0030], requires_grad=True)
tensor(-0.8186, grad_fn=<NegBackward>)
tensor([1.0040], requires_grad=True) tensor([1.0040], requires_grad=True)


In [22]:
#define loss function with numpy
#https://pytorch.org/tutorials/advanced/numpy_extensions_tutorial.html
import torch
from torch.autograd import Function
from numpy.fft import rfft2, irfft2


class BadFFTFunction(Function):
    @staticmethod
    def forward(ctx, input):
        numpy_input = input.detach().numpy()
        result = abs(rfft2(numpy_input))
        return input.new(result)

    @staticmethod
    def backward(ctx, grad_output):
        numpy_go = grad_output.numpy()
        result = irfft2(numpy_go)
        return grad_output.new(result)

# since this layer does not have any parameters, we can
# simply declare this as a function, rather than as an nn.Module class

def incorrect_fft(input):
    return BadFFTFunction.apply(input)

input = torch.randn(8, 8, requires_grad=True)
result = incorrect_fft(input)
print(result)
result.backward(torch.randn(result.size()))
print(input)

tensor([[ 1.9433, 14.6670,  8.8840,  3.7459, 10.4771],
        [ 3.5248, 14.1914,  6.3522,  2.3557,  8.7670],
        [11.5300,  3.4276,  9.5391,  6.0868,  2.8697],
        [11.4902,  7.0307,  5.4385,  6.7889,  8.7239],
        [ 5.6001,  8.6130,  9.8114,  2.7259,  9.8378],
        [11.4902,  3.9720,  2.7170,  6.5256,  8.7239],
        [11.5300, 13.3017,  2.2072,  8.9868,  2.8697],
        [ 3.5248,  6.2588,  4.3148,  7.9802,  8.7670]],
       grad_fn=<BadFFTFunctionBackward>)
tensor([[ 0.1568, -1.7971,  0.6778,  0.2603, -0.0574, -0.8125, -0.4640, -1.5712],
        [ 1.7153,  0.0527,  2.1980,  1.0389, -0.6894, -1.3743,  0.2009,  0.3321],
        [ 1.0450,  2.0642,  0.1938, -0.0262, -0.9482, -1.5863,  0.5905, -0.0913],
        [-0.5643, -0.6841,  1.3027, -1.3130,  0.3162, -2.1120, -1.1172, -0.8846],
        [-0.1827,  0.3809, -0.2066,  0.8642,  1.1014,  2.0335, -0.3849, -0.2000],
        [ 1.0178, -1.3539,  0.1065,  0.6148, -0.1528, -0.7632,  0.0947,  1.4215],
        [-1.3529,  0.2224,

In [6]:
import numpy as np
from scipy.spatial.distance import pdist
x=[-1,1,1,-1,1,-1,-1]
y=[1,-1,1,-1,1,-1,1]
X=np.vstack([x,y])
d=pdist(X,'hamming')
print (d)

[0.42857143]


In [2]:
import random
import numpy as np
trI=[1,2,3,4,5,6,7,8,9,10]
trY=[10,9,8,7,6,5,4,3,2,1]
idx_sf = random.sample(range(0, 10),3)
print (idx_sf)
trI_sf = np.array(trI)[idx_sf]
print (trI_sf)
trY_sf = np.array(trY)[idx_sf]
print (trY_sf)

[9, 6, 1]
[10  7  2]
[1 4 9]


In [5]:
import random
import numpy as np
import torch
idx_sf = random.sample(range(0, 8),3)
print (idx_sf)
trI = [1,2,3,4,5,6,7,8]
trI_sf = []
for i in idx_sf:
    trI_sf.append(trI[i])
print (trI_sf)

[6, 3, 0]
[7, 4, 1]
