In [16]:
import numpy as np
import cv2
import random
import glob
import time
 
# calculate means and std
train_txt_path = './train_val_list.txt'
image_path = '/media/scw4750/disk/test/skin_detect/isic2018-master/task3/images/HAM10000/'

img_dir = glob.glob(image_path+"*.jpg")
print(len(img_dir))

CNum = 100      # 挑选多少图片进行计算
 
img_h, img_w = 600, 450
imgs = np.zeros([img_w, img_h, 3, 1])
means, stdevs = [], []

start = time.time()
for i in range(CNum):
    img = cv2.imread(img_dir[i])
    img = img[:, :, :, np.newaxis]
    imgs = np.concatenate((imgs, img), axis=3)
    
imgs = imgs.astype(np.float32)/255.

for i in range(3):
    pixels = imgs[:,:,i,:].ravel()  # 拉成一行
    means.append(np.mean(pixels))
    stdevs.append(np.std(pixels))

# cv2 读取的图像格式为BGR，PIL/Skimage读取到的都是RGB不用转
means.reverse() # BGR --> RGB
stdevs.reverse()
 
print("normMean = {}".format(means))
print("normStd = {}".format(stdevs))
print('transforms.Normalize(normMean = {}, normStd = {})'.format(means, stdevs))
end = time.time()
print(end-start)

10015
normMean = [0.76608187, 0.5597207, 0.58785397]
normStd = [0.15595692, 0.1533187, 0.17299013]
transforms.Normalize(normMean = [0.76608187, 0.5597207, 0.58785397], normStd = [0.15595692, 0.1533187, 0.17299013])
14.439172506332397


In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim.lr_scheduler import *
import pretrainedmodels
import numpy as np
import os
import sys
import csv
import time
import logging
import errno
import random
import pickle
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from multiprocessing import Process
import torch.utils.model_zoo as model_zoo
import math
from torchvision import models, transforms, utils
from sklearn.metrics import confusion_matrix, auc, roc_curve, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from decimal import Decimal
from PIL import Image
from pathlib import Path

#填写cuda使用范围[0,1,2,3]
numGPUs = [1]
cuda_str = ""
for i in range(len(numGPUs)):
    cuda_str = cuda_str + str(numGPUs[i])
    if i is not len(numGPUs)-1:
        cuda_str = cuda_str + ","
print("Devices to use:",cuda_str)
os.environ["CUDA_VISIBLE_DEVICES"] = cuda_str
#数据类，特别重要
class ISICdataset(Dataset):
    def __init__(self, setInd, im_paths, labels_array, train=True):
        self.train = train
        self.same_sized_crop = True
        self.full_color_distort = False
        self.input_size = (np.int32([224, 224, 3][0]),np.int32([224, 224, 3][1]))
        self.setMean = np.array([0, 0, 0]).astype(np.float32)
        self.indices = setInd
        self.im_paths = im_paths
        self.labels_array = labels_array

        if self.train:
            if self.same_sized_crop:
                cropping = transforms.RandomCrop(self.input_size)
            else:
                cropping = transforms.RandomResizedCrop(self.input_size[0])
                # Color distortion
            if self.full_color_distort:
                color_distort = transforms.ColorJitter(brightness=32. / 255., saturation=0.5, contrast=0.5, hue=0.2)
            else:
                color_distort = transforms.ColorJitter(brightness=32. / 255., saturation=0.5)
                # All transforms
            self.composed = transforms.Compose([
                cropping,
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                color_distort,
                transforms.ToTensor(),
                transforms.Normalize(torch.from_numpy(self.setMean).float(),
                                     torch.from_numpy(np.array([1., 1., 1.])).float())
            ])
            self.labels = labels_array[setInd, :]
            self.im_paths = np.array(im_paths)[setInd].tolist()
        else:
            cropping = transforms.RandomResizedCrop(self.input_size[0])
            self.composed = transforms.Compose([
                cropping,
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(torch.from_numpy(self.setMean).float(),
                                     torch.from_numpy(np.array([1., 1., 1.])).float())
            ])

            self.labels = labels_array[setInd, :]
            self.im_paths = np.array(im_paths)[setInd].tolist()

    def __len__(self):
        return self.labels.shape[0]

    def __getitem__(self, idx):
        x = Image.open(self.im_paths[idx])
        y = self.labels[idx, :]
        x = self.composed(x)
        y = np.argmax(y)
        y = np.int64(y)
        return x, y, idx

#磁盘，选用第一块作为基底
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#标签及图片路径，5折交叉分割文件
# root = '/home/deeplearning/wwt/pymodel/model/skin_detect/isic2018-master/task3/labels/HAM10000/'
# img_dir = '/home/deeplearning/wwt/pymodel/model/skin_detect/isic2018-master/task3/images/HAM10000/'
# file = '/home/deeplearning/wwt/weight_test/pytorch_test/model_data/indices_new.pkl'


root = '/media/scw4750/disk/test/skin_detect/isic2018-master/task3/labels/HAM10000/'
img_dir = '/media/scw4750/disk/test/skin_detect/isic2018-master/task3/images/HAM10000/'
file = '/media/scw4750/disk/test/skin_detect/isic2018-master/saved_model/indices_new.pkl'
#图片路径列表
im_paths = [os.path.join(img_dir, img) for img in os.listdir(img_dir)]
# print("im_paths", np.array(im_paths).shape)

#打开标签文件，保存到字典中
labels_dict = {}
with open(root+'label.csv', newline='') as csvfile:
    labels_str = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in labels_str:
        if 'ISIC' not in row[0]:
            continue
        labels_dict[row[0]] = np.array(
            [int(float(row[1])), int(float(row[2])), int(float(row[3])), int(float(row[4])),
             int(float(row[5])), int(float(row[6])), int(float(row[7]))])
# print("labels_dict:", labels_dict)

#图片id列表以及标签列表
img_ids_list = []
labels_list = []
for img in im_paths:
    id = img[img.rindex("/") + 1:img.rindex(".")]
    array = labels_dict.get(id)
    img_ids_list.append(id)
    labels_list.append(array)
# print("img_ids_list:", img_ids_list)
# print("labels_list:", labels_list)

#标签数组以及计算各类所占比例
labels_array = np.zeros([len(labels_list), 7], dtype=np.float32)
for i in range(len(labels_list)):
    labels_array[i, :] = labels_list[i]
# print("labels_array:", labels_array)
# print(np.mean(labels_array, axis=0))

#保存5折交叉验证的标签
with open(file, 'rb') as f:
    indices = pickle.load(f)
trainIndCV = indices['trainIndCV']
valIndCV = indices['valIndCV']
#最终得到的最优值
f1Best = {}
sensBest = {}
specBest = {}
accBest = {}
waccBest = {}
aucBest = {}
convergeTime = {}
bestPred = {}
target = {}

#用到的几个常数
cv = 0
lr = 0.000025*len(numGPUs)
lastBestInd = -1
batchSize = 20 * len(numGPUs)
# numBatchesTrain = int(math.floor(len(trainInd) / batchSize))
# print("Train batches", numBatchesTrain)
start_epoch = 1
display_step = 5
training_steps = 150
eval_set = 'valInd'


def get_mean_std(dataloader):
    """Get mean and std by sample ratio
    """
    train = iter(dataloader).next()[0]   # 一个batch的数据
    mean = np.mean(train.numpy(), axis=(0,2,3))
    std = np.std(train.numpy(), axis=(0,2,3))
    return mean, std


print('Train')
trainInd = trainIndCV[0]
print(trainInd.shape)
print('val')
valInd = valIndCV[0]
print(valInd.shape)

#不平衡类加权方法
indices_ham = trainInd[trainInd < 10015]
class_weights = 1.0 / np.mean(labels_array[indices_ham, :], axis=0)
print("Current class weights", class_weights)


#训练集准备
trainset = ISICdataset(trainInd, im_paths, labels_array, train=True)
#测试集准备
valset = ISICdataset(valInd, im_paths, labels_array, train=False)
#数据加载
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchSize, shuffle=True, num_workers=2, pin_memory=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=batchSize, shuffle=False, num_workers=2, pin_memory=True)

train_mean, train_std = get_mean_std(trainloader)

test_mean, test_std = get_mean_std(valloader)

print(train_mean, train_std)
print(test_mean,test_std)

Devices to use: 1
Train
(8009,)
val
(2006,)
Current class weights [ 9.019144   1.5012184 19.7266    29.553505   8.938616  84.30526
 67.87288  ]
[0.7811761  0.5264199  0.54028153] [0.13406543 0.17649162 0.1900084 ]
[0.754263   0.50416595 0.5183167 ] [0.13232674 0.15433034 0.1807971 ]
