In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

import torch.utils.data as data

import torchvision.models as models
import torch.nn as nn

from PIL import Image

import glob
from random import shuffle

制作数据集

In [2]:
# 数据分类
files = glob.glob('../data/patches/train/*/*.tiff')
shuffle(files)

train_cancer = 0
train_normal = 0

train_imgs = []
val_imgs = []
test_imgs = []

# 先遍历一遍得到总数
cancer_sum = 0
normal_sum = 0
for file in files:
    if file.split('/')[-2].find('cancer') > -1:
        cancer_sum += 1
    else:
        normal_sum += 1
# 再遍历一遍分类
p80 = len(files) * 4 // 5

for file in files:
    # _type = 0 if file.split('/')[-2].find('cancer') > -1 else 1
    if 'cancer' in file:
    # if file.split('/')[-2].find('cancer') > -1:
        # 是cancer
        if train_cancer < cancer_sum * 4 // 5:
            train_imgs.append( ( file, 0 ) )
            train_cancer += 1
        else:
            val_imgs.append( ( file, 0 ) )
    else:
        # 是 normal
        if train_normal < normal_sum * 4 // 5:
            train_imgs.append( ( file, 1 ) )
            train_normal += 1
        else:
            val_imgs.append( ( file, 1 ) )

print('cancer_sum: %5d, normal_sum: %5d, train_cancer: %5d, train_normal: %5d, ' %
      (cancer_sum, normal_sum, train_cancer, train_normal))

test_files = glob.glob('../data/patches/test/*/*.tiff')
for file in test_files:
    if 'cancer' in file:
    # if file.split('/')[-2].find('cancer') > -1:
        # 是cancer
        test_imgs.append( ( file, 0 ) )
    else:
        # 是 normal
        test_imgs.append( ( file, 1 ) )

cancer_sum: 56123, normal_sum: 57033, train_cancer: 44898, train_normal: 45626, 


In [3]:
# 数据集的加载
def default_loader(path):
    return Image.open(path).convert('RGB')


class MyDataset(data.Dataset):
    def __init__(self, train=False, val=False, test=False, transform=None, target_transform=None, loader=default_loader):
        if train:
            self.imgs = train_imgs
        elif val:
            self.imgs = val_imgs
        elif test:
            self.imgs = test_imgs
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader

    def __getitem__(self, index):
        fn, label = self.imgs[index]
        img = self.loader(fn)
        if self.transform is not None:
            img = self.transform(img)
        return img, label

    def __len__(self):
        return len(self.imgs)

数据集的预处理

In [9]:

transform = transforms.Compose(
    [
        # transforms.RandomSizedCrop(224),
        # transforms.RandomCrop(32, padding=2),
        transforms.RandomHorizontalFlip(),
        # transforms.Scale(244),
        transforms.ToTensor(),
        # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]
)

trainset = MyDataset(train=True, transform=transform)
valset = MyDataset(val=True, transform=transform)
testset = MyDataset(test=True, transform=transform)

trainloader = data.DataLoader(trainset, batch_size=200,
                                          shuffle=True, num_workers=4)
valloader = data.DataLoader(valset, batch_size=50,
                                          shuffle=False, num_workers=4)
testloader = data.DataLoader(testset, batch_size=50,
                                         shuffle=False, num_workers=4)

classes = ('cancer', 'normal')

定义net

In [5]:
use_cuda = torch.cuda.is_available()

best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch


net = models.resnet18()

In [6]:
if use_cuda:
    net.cuda()
    net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

# 加载之前的参数
# net.load_state_dict( torch.load('../net_state/resnet18_cifar10_epoch30_params.pkl') )

# print('net', net)

In [7]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9)

import torch.optim.lr_scheduler as lr_scheduler
# scheduler = lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer)

定义训练和验证

In [8]:
import time

def train(epoch):
    print('\nEpoch: %d' % epoch)
    t1 = time.time()
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # get the inputs
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()
        # wrap them in Variable
        inputs, targets = Variable(inputs), Variable(targets)
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        if batch_idx % 400 == 399:    # print every 400 mini-batches
            print('m-b %4d loss: %.3f | Acc: %.3f%% | lr: %.4f | time: %.2f' %
                ( batch_idx+1, train_loss/batch_idx+1, 100.*correct/total, optimizer.param_groups[0]['lr'], time.time() - t1 ) )
            
def validation(epoch):
    global best_acc
    net.eval()
    t1 = time.time()
    val_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(valloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)

        test_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

    print('val loss: %.3f | Acc: %.3f%% | lr: %.4f | time: %.2f' %
        ( val_loss/batch_idx+1, 100.*correct/total, optimizer.param_groups[0]['lr'], time.time() - t1 ) )

    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': net.module if use_cuda else net,
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ckpt.t7')
        best_acc = acc

    return val_loss / len(valloader)


In [10]:
for epoch in range(start_epoch, start_epoch + 3):
    train(epoch)
    val_loss = validation(epoch)
    scheduler.step(val_loss)


Epoch: 0


RuntimeError: cuda runtime error (2) : out of memory at /pytorch/torch/lib/THC/generic/THCStorage.cu:66