In [1]:
import sys
import os
import math
import random
import heapq 
import time
import copy
import numpy as np
import pandas as pd
from functools import reduce
from scipy.spatial.distance import pdist
from PIL import Image
import matplotlib.pyplot as plt
import cv2
#import faiss
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.cuda.set_device(5)
print (torch.cuda.current_device())

5


In [2]:
#1. Read data with List storage Data:[name,type],I:[img],Y[type]
def TypetoNum(itype): #map the type into number.
    if itype =='AMD': return 0
    elif itype =='DR': return 1
    elif itype =='glaucoma': return 2
    elif itype =='myopia': return 3
    else: return 4 #norm
    
root_dir = '/data/fjsdata/fundus/iSee/iSee_multi_dataset/' #the path of images
trainset = pd.read_csv("/data/fjsdata/fundus/iSee/iSee_multi_dataset/CBIR_iSee_train.csv" , sep=',')#load trainset
testset = pd.read_csv("/data/fjsdata/fundus/iSee/iSee_multi_dataset/CBIR_iSee_test.csv" , sep=',')#load testset
tstart = time.time()
#read train image with CV
trData, trI, trY = [],[],[]
norm = 699
for iname, itype in np.array(trainset).tolist():
    if iname.endswith(".jpg"):
        try:
            image_dir = root_dir+'img_data_%s'%itype
            image_path = os.path.join(image_dir, iname)
            if itype == 'norm':
                if norm>0:
                    img = cv2.resize(cv2.imread(image_path).astype(np.float32), (1024, 1024))#(1920,1920,3)->(1024,1024,3)
                    trData.append([iname,itype])
                    trI.append(img)
                    trY.append(TypetoNum(itype))
                    norm = norm - 1
            else:
                img = cv2.resize(cv2.imread(image_path).astype(np.float32), (1024, 1024))#(1920,1920,3)->(1024,1024,3)
                trData.append([iname,itype,img])
                trI.append(img)
                trY.append(TypetoNum(itype))    
        except:
            print(iname+":"+str(image_path))
        sys.stdout.write('\r{} / {} '.format(len(trData),trainset.shape[0]))
        sys.stdout.flush()
print('The length of train set is %d'%len(trData))
#read test image with CV
teData, teI, teY = [],[],[]
norm = 77
for iname, itype in np.array(testset).tolist():
    if iname.endswith(".jpg"):
        try:
            image_dir = root_dir+'img_data_%s'%itype
            image_path = os.path.join(image_dir, iname)
            if itype == 'norm':
                if norm>0:
                    img = cv2.resize(cv2.imread(image_path).astype(np.float32), (1024, 1024))#(1920,1920,3)->(1024,1024,3)
                    teData.append([iname,itype])
                    teI.append(img)
                    teY.append(TypetoNum(itype))
                    norm = norm - 1
            else:
                img = cv2.resize(cv2.imread(image_path).astype(np.float32), (1024, 1024))#(1920,1920,3)->(1024,1024,3)
                teData.append([iname,itype,img])
                teI.append(img)
                teY.append(TypetoNum(itype)) 
        except:
            print(iname+":"+str(image_path))
        sys.stdout.write('\r{} / {} '.format(len(teData),testset.shape[0]))
        sys.stdout.flush()
print('The length of test set is %d'%len(teData))
elapsed = time.time() - tstart    
print('Completed buliding index in %d seconds' % int(elapsed))

2706 / 9000 The length of train set is 2706
300 / 1000 The length of test set is 300
Completed buliding index in 220 seconds


In [5]:
#2. define CNN network with pytorch
class CNN_FCL_Net(nn.Module): 
    def __init__(self,inChannels=3):
        super(CNN_FCL_Net, self).__init__()
        #(channels, Height, Width)
        #layer1: Convolution, (3,1024,1024)->(16,512,512)
        self.conv1 = nn.Conv2d(in_channels=inChannels, out_channels=16, kernel_size=3, padding=1, stride=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU(inplace=True)
        #layer2: max pooling,(16,512,512)->(16,256,256)
        self.maxpool = nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
        self.bn2 = nn.BatchNorm2d(16)
        #layer3: Convolution, (16,256,256)->(8,128,128)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1, stride=2)
        self.bn3 = nn.BatchNorm2d(8)
        self.relu2 = nn.ReLU(inplace=True)
        #layer4: mean pooling, (8,128,128)->(8,64,64)
        self.avgpool1 = nn.AvgPool2d(kernel_size=3, padding=1, stride=2)
        self.bn4 = nn.BatchNorm2d(8)
        #layer5: Convolution, (8,64,64)->(4*32*32)
        self.conv3 = nn.Conv2d(in_channels=8, out_channels=4, kernel_size=3, padding=1, stride=2)
        self.bn5 = nn.BatchNorm2d(4)
        self.relu3 = nn.ReLU(inplace=True)
        #layer6: mean pooling, (4,32,32)->(4,16,16)
        self.avgpool2 = nn.AvgPool2d(kernel_size=3, padding=1, stride=2)
        self.bn6 = nn.BatchNorm2d(4)
        #layer7: fully connected, 4*16*16->512
        self.fcl1 = nn.Linear(4*16*16,512)
        self.relu4 = nn.ReLU(inplace=True)
        #layer8: Hashing layer, 512->16
        self.fcl2 = nn.Linear(512,16)#
        self.tanh = nn.Tanh()
        #layer9: fully connected, 16->5
        self.fcl3 = nn.Linear(16,5)#type:5
              
    def forward(self,x):
        #input: (batch_size, in_channels, Height, Width)
        #output: (batch_size, out_channels, Height, Width)
        #layer1: convolution
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        #layer2: max pooling
        x = self.maxpool(x)
        x = self.bn2(x)
        #layer3: Convolution
        x = self.conv2(x)
        x = self.bn3(x)
        x = self.relu2(x)
        #layer4: mean pooling
        x = self.avgpool1(x)
        x = self.bn4(x)
        #layer5: Convolution
        x = self.conv3(x)
        x = self.bn5(x)
        x = self.relu3(x)
        #layer6: mean pooling
        x = self.avgpool2(x)
        x = self.bn6(x)
        #layer7:fully connected
        x = x.view(x.size(0),-1) #transfer three dims to one dim
        x = self.fcl1(x)
        x = self.relu4(x)
        #layer8: fully connected
        x = self.fcl2(x)
        x = self.tanh(x)#[-1,1]
        #layer9: fully connected
        out = self.fcl3(x)
                
        return x,out
#test network: valid
x = torch.rand(10,3,1024,1024)
y = torch.LongTensor([0,1,2,3,4,3,2,4,0,1])
model = CNN_FCL_Net()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
for epoch in range(10):
    optimizer.zero_grad()
    
    _,out = model(x)
    out = F.log_softmax(out)
    loss = F.nll_loss(out, y)
    print (loss.item())
    loss.backward()
    optimizer.step()
    #observe the variant of model.parameters
    for i in model.named_parameters():
        print(i[0])
        print(i[1][0][0][0])
        break
#output
x2 = torch.rand(10,3,1024,1024)#.cuda()
x2,_ = model(x2)
print (x2)
print (x2.size())



1.597808599472046
conv1.weight
tensor([-0.1370,  0.0693,  0.0283])
1.0785161256790161
conv1.weight
tensor([-0.1373,  0.0698,  0.0279])
0.7979228496551514
conv1.weight
tensor([-0.1372,  0.0705,  0.0274])
0.6365795135498047
conv1.weight
tensor([-0.1370,  0.0707,  0.0268])
0.5375972390174866
conv1.weight
tensor([-0.1369,  0.0708,  0.0261])
0.4756961762905121
conv1.weight
tensor([-0.1368,  0.0707,  0.0255])
0.4359434247016907
conv1.weight
tensor([-0.1367,  0.0705,  0.0249])
0.40972647070884705
conv1.weight
tensor([-0.1366,  0.0703,  0.0243])
0.39210301637649536
conv1.weight
tensor([-0.1365,  0.0700,  0.0238])
0.37974879145622253
conv1.weight
tensor([-0.1365,  0.0697,  0.0233])
tensor([[ 0.0126, -0.4354,  0.7358, -0.3279,  0.0964, -0.1032,  0.0251,
          0.0671, -0.3541,  0.1048, -0.2245, -0.0713,  0.0981, -0.3019,
         -0.0763, -0.3924],
        [-0.2796, -0.4190,  0.6042, -0.1088,  0.2098, -0.0519, -0.1614,
         -0.2900, -0.5231,  0.6286, -0.5180, -0.5717, -0.1499, -0.0641,
  

In [6]:
#3. Train and evaluate model 
#define model
model = CNN_FCL_Net().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
#train model
batchSize = 10
num_batches = len(trY) // batchSize+1
for epoch in range(2):#iteration
    #train model 
    losses = []
    for i in range(num_batches):
        min_idx = i * batchSize
        max_idx = np.min([len(trY), (i+1)*batchSize])
        I_batch = torch.from_numpy(np.array(trI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
        Y_batch = torch.from_numpy(np.array(trY[min_idx: max_idx])).type(torch.LongTensor).cuda()
        optimizer.zero_grad()
        #forward
        _, out = model(I_batch.permute(0, 3, 1, 2))#adjust channel to the second
        out = F.log_softmax(out)
        loss = F.nll_loss(out, Y_batch)
        loss.backward()
        optimizer.step()
        sys.stdout.write('\r {} / {} : loss = {}'.format(i, num_batches, float('%0.6f'%loss.item())))
        sys.stdout.flush()     
        losses.append(loss.item())
    print("Eopch: %5d mean_loss = %.6f" % (epoch + 1, np.mean(losses)))
    
#release gpu memory
#model = model.cpu()
#torch.cuda.empty_cache()
#hash code of train data from model
batchSize = 10
num_batches = len(trI) // batchSize+1
trF = []
for i in range(num_batches):
    min_idx = i * batchSize
    max_idx = np.min([len(trI), (i+1)*batchSize])
    I_batch = torch.from_numpy(np.array(trI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
    X_batch,_ = model(I_batch.permute(0, 3, 1, 2))#forword
    I_batch = I_batch.cpu()
    X_batch = X_batch.cpu()
    torch.cuda.empty_cache()#release gpu memory
    trF.extend(X_batch.data.numpy().tolist())
    sys.stdout.write('\r {} / {} '.format(i, num_batches))
    sys.stdout.flush()
    
#hash code of test data from model
teF = []
num_batches = len(teI) // batchSize
for i in range(num_batches):
    min_idx = i * batchSize
    max_idx = np.min([len(teI), (i+1)*batchSize])
    I_batch = torch.from_numpy(np.array(teI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
    X_batch,_ = model(I_batch.permute(0, 3, 1, 2))#forword
    I_batch = I_batch.cpu()
    X_batch = X_batch.cpu()
    torch.cuda.empty_cache()#release gpu memory
    teF.extend(X_batch.data.numpy().tolist())
    sys.stdout.write('\r {} / {} '.format(i, num_batches))
    sys.stdout.flush()
    
#Evaluate model
#train data with list: trData, trI, trF, trY
#test data with list: teData, teI, teF, teY
for topk in [5,10,15,20]:
    MHR = [] #mean Hit ratio 
    MAP = [] #mean average precision
    MRR = [] #mean reciprocal rank
    for i, teVal in enumerate(teF):
        stype = teY[i]
        map_item_score = {}
        for j, trVal in enumerate(trF):
            map_item_score[j] = pdist(np.vstack([teVal,trVal]),'cosine')
        ranklist = heapq.nsmallest(topk, map_item_score, key=map_item_score.get)
        #perfromance
        pos_len = 0
        rank_len = 0
        mrr_flag = 0
        for j in ranklist:
            dtype = trY[j]
            rank_len=rank_len+1
            if stype==dtype:  #hit
                MHR.append(1)
                pos_len = pos_len +1
                MAP.append(pos_len/rank_len) 
                if mrr_flag==0: 
                    MRR.append(pos_len/rank_len)
                    mrr_flag =1
            else: 
                MHR.append(0)
                MAP.append(0)   
    print("mHR@{}={:.6f}, mAP@{}={:.6f}, mRR@{}={:.6f}".format(topk,np.mean(MHR),topk,np.mean(MAP), topk, np.mean(MRR)))

 1 / 271 : loss = 1.238058



 270 / 271 : loss = 0.666214Eopch:     1 mean_loss = 1.406450
 270 / 271 : loss = 0.613051Eopch:     2 mean_loss = 1.731901
 29 / 30 1 mHR@5=0.242667, mAP@5=0.241122, mRR@5=0.980856
mHR@10=0.241667, mAP@10=0.240656, mRR@10=0.980856
mHR@15=0.241111, mAP@15=0.240438, mRR@15=0.980856
mHR@20=0.241333, mAP@20=0.240433, mRR@20=0.980856


In [7]:
#3. define Attention-CNN network with pytorch
class SpatialAttention(nn.Module):#spatial attention layer
    def __init__(self):
        super(SpatialAttention, self).__init__()

        self.conv1 = nn.Conv2d(2, 1, kernel_size=3, padding=1, bias=False)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)
    
class ATT_FCL_Net(nn.Module): 
    def __init__(self,inChannels=3):
        super(ATT_FCL_Net, self).__init__()
        #(channels, Height, Width)
        #layer1: Convolution, (3,1024,1024)->(16,512,512)
        self.conv1 = nn.Conv2d(in_channels=inChannels, out_channels=16, kernel_size=3, padding=1, stride=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU(inplace=True)
        #layer2: max pooling,(16,512,512)->(16,256,256)
        self.maxpool = nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
        self.bn2 = nn.BatchNorm2d(16)
        #layer3: Spatial Attention Layer, (16,256,256)->(16,256,256)
        self.sa = SpatialAttention()
        #layer4: Convolution, (16,256,256)->(8,128,128)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, padding=1, stride=2)
        self.bn3 = nn.BatchNorm2d(8)
        self.relu2 = nn.ReLU(inplace=True)
        #layer5: mean pooling, (8,128,128)->(8,64,64)
        self.avgpool1 = nn.AvgPool2d(kernel_size=3, padding=1, stride=2)
        self.bn4 = nn.BatchNorm2d(8)
        #layer6: Convolution, (8,64,64)->(4*32*32)
        self.conv3 = nn.Conv2d(in_channels=8, out_channels=4, kernel_size=3, padding=1, stride=2)
        self.bn5 = nn.BatchNorm2d(4)
        self.relu3 = nn.ReLU(inplace=True)
        #layer7: mean pooling, (4,32,32)->(4,16,16)
        self.avgpool2 = nn.AvgPool2d(kernel_size=3, padding=1, stride=2)
        self.bn6 = nn.BatchNorm2d(4)
        #layer8: fully connected, 4*16*16->512
        self.fcl1 = nn.Linear(4*16*16,512)
        self.relu4 = nn.ReLU(inplace=True)
        #layer9: Hashing layer, 512->16
        self.fcl2 = nn.Linear(512,16)#
        self.tanh = nn.Tanh()
        #layer10: fully connected, 16->5
        self.fcl3 = nn.Linear(16,5)#type:5
              
    def forward(self,x):
        #input: (batch_size, in_channels, Height, Width)
        #output: (batch_size, out_channels, Height, Width)
        #layer1: convolution
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        #layer2: max pooling
        x = self.maxpool(x)
        x = self.bn2(x)
        #layer3: Spatial Attention
        x = self.sa(x)*x
        #layer4: Convolution
        x = self.conv2(x)
        x = self.bn3(x)
        x = self.relu2(x)
        #layer5: mean pooling
        x = self.avgpool1(x)
        x = self.bn4(x)
        #layer6: Convolution
        x = self.conv3(x)
        x = self.bn5(x)
        x = self.relu3(x)
        #layer7: mean pooling
        x = self.avgpool2(x)
        x = self.bn6(x)
        #layer7:fully connected
        x = x.view(x.size(0),-1) #transfer three dims to one dim
        x = self.fcl1(x)
        x = self.relu4(x)
        #layer8: fully connected
        x = self.fcl2(x)
        x = self.tanh(x)#[-1,1]
        #layer9: fully connected
        out = self.fcl3(x)
                
        return x,out
    
#test network: valid
x = torch.rand(10,3,1024,1024)
y = torch.LongTensor([0,1,2,3,4,3,2,4,0,1])
model = ATT_FCL_Net()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
for epoch in range(10):
    optimizer.zero_grad()
    
    _,out = model(x)
    out = F.log_softmax(out)
    loss = F.nll_loss(out, y)
    print (loss.item())
    loss.backward()
    optimizer.step()
    #observe the variant of model.parameters
    for i in model.named_parameters():
        print(i[0])
        print(i[1][0][0][0])
        break
#output
x2 = torch.rand(10,3,1024,1024)#.cuda()
x2,_ = model(x2)
print (x2.size())
print (x2)



1.6286647319793701
conv1.weight
tensor([ 0.0681,  0.0753, -0.1499])
0.9922958612442017
conv1.weight
tensor([ 0.0688,  0.0762, -0.1497])
0.7420358657836914
conv1.weight
tensor([ 0.0695,  0.0771, -0.1497])
0.6227890253067017
conv1.weight
tensor([ 0.0700,  0.0779, -0.1498])
0.5594567060470581
conv1.weight
tensor([ 0.0702,  0.0786, -0.1500])
0.5235650539398193
conv1.weight
tensor([ 0.0704,  0.0792, -0.1502])
0.5012494325637817
conv1.weight
tensor([ 0.0706,  0.0798, -0.1505])
0.4861129820346832
conv1.weight
tensor([ 0.0707,  0.0802, -0.1507])
0.47518372535705566
conv1.weight
tensor([ 0.0708,  0.0807, -0.1508])
0.4666842818260193
conv1.weight
tensor([ 0.0709,  0.0810, -0.1510])
torch.Size([10, 16])
tensor([[-0.3691,  0.0007, -0.1708,  0.1649, -0.0504, -0.6654, -0.0322,
          0.2987,  0.2620, -0.4826, -0.0104, -0.4075,  0.0734, -0.3656,
          0.5830,  0.4572],
        [-0.1422,  0.0243, -0.2885,  0.4359, -0.4094, -0.6607, -0.4221,
          0.2962,  0.1323,  0.0828, -0.4885, -0.7189, 

In [8]:
#4.train and evaluate model
#define model
model = ATT_FCL_Net().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
#train model
batchSize = 10
num_batches = len(trY) // batchSize+1
for epoch in range(2):#iteration
    #train model 
    losses = []
    for i in range(num_batches):
        min_idx = i * batchSize
        max_idx = np.min([len(trY), (i+1)*batchSize])
        I_batch = torch.from_numpy(np.array(trI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
        Y_batch = torch.from_numpy(np.array(trY[min_idx: max_idx])).type(torch.LongTensor).cuda()
        optimizer.zero_grad()
        #forward
        _, out = model(I_batch.permute(0, 3, 1, 2))#adjust channel to the second
        out = F.log_softmax(out)
        loss = F.nll_loss(out, Y_batch)
        loss.backward()
        optimizer.step()
        sys.stdout.write('\r {} / {} : loss = {}'.format(i, num_batches, float('%0.6f'%loss.item())))
        sys.stdout.flush()     
        losses.append(loss.item())
    print("Eopch: %5d mean_loss = %.6f" % (epoch + 1, np.mean(losses)))
#release gpu memory
#model = model.cpu()
#torch.cuda.empty_cache()
#hash code of train data from model
batchSize = 10
num_batches = len(trI) // batchSize+1
trF = []
for i in range(num_batches):
    min_idx = i * batchSize
    max_idx = np.min([len(trI), (i+1)*batchSize])
    I_batch = torch.from_numpy(np.array(trI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
    X_batch,_ = model(I_batch.permute(0, 3, 1, 2))#forword
    I_batch = I_batch.cpu()
    X_batch = X_batch.cpu()
    torch.cuda.empty_cache()#release gpu memory
    trF.extend(X_batch.data.numpy().tolist())
    sys.stdout.write('\r {} / {} '.format(i, num_batches))
    sys.stdout.flush()
    
#hash code of test data from model
teF = []
num_batches = len(teI) // batchSize
for i in range(num_batches):
    min_idx = i * batchSize
    max_idx = np.min([len(teI), (i+1)*batchSize])
    I_batch = torch.from_numpy(np.array(teI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
    X_batch,_ = model(I_batch.permute(0, 3, 1, 2))#forword
    I_batch = I_batch.cpu()
    X_batch = X_batch.cpu()
    torch.cuda.empty_cache()#release gpu memory
    teF.extend(X_batch.data.numpy().tolist())
    sys.stdout.write('\r {} / {} '.format(i, num_batches))
    sys.stdout.flush()
#train data with list: trData, trI, trF, trY
#test data with list: teData, teI, teF, teY
for topk in [5,10,15,20]:
    MHR = [] #mean Hit ratio 
    MAP = [] #mean average precision
    MRR = [] #mean reciprocal rank
    for i, teVal in enumerate(teF):
        stype = teY[i]
        map_item_score = {}
        for j, trVal in enumerate(trF):
            map_item_score[j] = pdist(np.vstack([teVal,trVal]),'cosine')
        ranklist = heapq.nsmallest(topk, map_item_score, key=map_item_score.get)
        #perfromance
        pos_len = 0
        rank_len = 0
        mrr_flag = 0
        for j in ranklist:
            dtype = trY[j]
            rank_len=rank_len+1
            if stype==dtype:  #hit
                MHR.append(1)
                pos_len = pos_len +1
                MAP.append(pos_len/rank_len) 
                if mrr_flag==0: 
                    MRR.append(pos_len/rank_len)
                    mrr_flag =1
            else: 
                MHR.append(0)
                MAP.append(0)   
    print("mHR@{}={:.6f}, mAP@{}={:.6f}, mRR@{}={:.6f}".format(topk,np.mean(MHR),topk,np.mean(MAP), topk, np.mean(MRR)))

 1 / 271 : loss = 0.945197



 270 / 271 : loss = 1.491649Eopch:     1 mean_loss = 2.066111
 270 / 271 : loss = 0.721831Eopch:     2 mean_loss = 1.548523
 29 / 30 1 mHR@5=0.244000, mAP@5=0.242489, mRR@5=0.970175
mHR@10=0.246000, mAP@10=0.242817, mRR@10=0.948962
mHR@15=0.246889, mAP@15=0.243340, mRR@15=0.948962
mHR@20=0.246167, mAP@20=0.242868, mRR@20=0.948962
