In [1]:
import os, sys, glob, argparse
import pandas as pd
import numpy as np
from tqdm import tqdm

import cv2
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold

import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

#Check if GPU is available
if torch.cuda.is_available():
   device = torch.device("cuda")
else:
   device = torch.device("cpu")

In [2]:
train_path = glob.glob('./train_data/*/*')
np.random.shuffle(train_path)

In [3]:
len(train_path)

3380

In [4]:
class XunFeiDataset(Dataset):
    def __init__(self, img_path, transform=None):
        self.img_path = img_path
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None
    
    def __getitem__(self, index):
        img = Image.open(self.img_path[index])
        if self.transform is not None:
            img = self.transform(img)
        
        if 'powdery_mildew' in self.img_path[index]:
            label = 0
        elif 'healthy' in self.img_path[index]:
            label = 1
        elif 'rust' in self.img_path[index]:
            label = 2
        elif 'scab' in self.img_path[index]:
            label = 3
        
        return img, torch.from_numpy(np.array(label).astype(int))
    
    def __len__(self):
        return len(self.img_path)

In [15]:
class XunFeiNet(nn.Module):
    def __init__(self):
        super(XunFeiNet, self).__init__()
        model = models.resnet50(True)
        model.avgpool = nn.AdaptiveAvgPool2d(2)
        model.fc = nn.Linear(8192, 4)
        self.resnet = model
    
    def forward(self, img):
        out = self.resnet(img)
        return out

In [20]:
def train(train_loader, model, criterion, optimizer):
    model.train()
    print("start train")
    train_loss = 0.0
    for i, (input, target) in enumerate(train_loader):
        input = input.to(device)
        target = target.to(device)
       

        # compute output
        output = model(input)
        
        loss = criterion(output, target)
     
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
            print('Train loss', loss.item())
            
        train_loss += loss.item()
    
    return train_loss/len(train_loader)
            
def validate(val_loader, model, criterion):
    model.eval()
    
    val_acc = 0.0
    
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            input = input.to(device)
            target = target.to(device)

            # compute output
            output = model(input)
            loss = criterion(output, target)
            
            val_acc += (output.argmax(1) == target).sum().item()
            
    return val_acc / len(val_loader.dataset)

def predict(test_loader, model, criterion):
    model.eval()
    val_acc = 0.0
    
    test_pred = []
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            input = input.to(device)
            target = target.to(device)

            # compute output
            output = model(input)
            test_pred.append(output.data.cpu().numpy())
            
    return np.vstack(test_pred)

In [21]:
train_loader = torch.utils.data.DataLoader(
    XunFeiDataset(train_path[:-500],
    transforms.Compose([
                transforms.Resize(256),
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ])), batch_size=15, shuffle=True, num_workers=4, pin_memory=False
)

val_loader = torch.utils.data.DataLoader(
    XunFeiDataset(train_path[-500:],
    transforms.Compose([
                transforms.Resize(256),
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ])), batch_size=30, shuffle=False, num_workers=1, pin_memory=False
)

In [22]:
model = XunFeiNet()
model = model.to(device)
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), 0.001)

In [23]:
for _  in range(10):
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc  = validate(val_loader, model, criterion)
    
    print(train_loss, val_acc)

start train
Train loss 1.4678303003311157
Train loss 0.8444724678993225
0.946241787324349 0.788
start train
Train loss 0.6010607481002808
Train loss 0.34975355863571167
0.5841932684027901 0.836
start train
Train loss 0.32969680428504944
Train loss 0.43956664204597473
0.4683838080770026 0.89
start train
Train loss 0.5608193874359131
Train loss 0.2709323763847351
0.42548741854261607 0.89
start train
Train loss 0.6641634702682495
Train loss 0.4734472632408142
0.3683372457938579 0.866
start train
Train loss 0.44831058382987976
Train loss 0.23358970880508423
0.3430396023322828 0.882
start train
Train loss 0.27207428216934204
Train loss 0.16894394159317017
0.32976046002780396 0.906
start train
Train loss 0.3281022608280182
Train loss 0.1318625658750534
0.30089254657893133 0.928
start train
Train loss 0.23887018859386444
Train loss 0.3641754686832428
0.31933121049466234 0.9
start train
Train loss 0.68653804063797
Train loss 0.4926101565361023
0.2896609886471803 0.906


In [24]:
model = model.to('cpu')
torch.save(model.state_dict(), 'model.pt')

In [26]:
torch.cuda.empty_cache() 

run.py 内容如下：

```python
import os, sys, glob, argparse
import pandas as pd
import numpy as np
from tqdm import tqdm

import cv2
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold

import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

# Check if GPU is available
device = torch.device("cpu")

class XunFeiDataset(Dataset):
    def __init__(self, img_path, transform=None):
        self.img_path = img_path
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None
    
    def __getitem__(self, index):
        img = Image.open(self.img_path[index])
        if self.transform is not None:
            img = self.transform(img)
        
        if 'powdery_mildew' in self.img_path[index]:
            label = 0
        elif 'healthy' in self.img_path[index]:
            label = 1
        elif 'rust' in self.img_path[index]:
            label = 2
        elif 'scab' in self.img_path[index]:
            label = 3
        else:
            label = 0
        
        return img, torch.from_numpy(np.array(label).astype(int))
    
    def __len__(self):
        return len(self.img_path)
    
class XunFeiNet(nn.Module):
    def __init__(self):
        super(XunFeiNet, self).__init__()
        model = models.resnet50(False)
        model.avgpool = nn.AdaptiveAvgPool2d(1)
        model.fc = nn.Linear(2048, 4)
        self.resnet = model
    
    def forward(self, img):
        out = self.resnet(img)
        return out
    
def predict(test_loader, model):
    model.eval()    
    test_pred = []
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            input = input.to(device)
            target = target.to(device)
            output = model(input)
            test_pred.append(output.data.cpu().numpy())
            
    return np.vstack(test_pred)


test_path = glob.glob('/work/data/leafs-test-dataset/*')
test_path.sort()

test_loader = torch.utils.data.DataLoader(
    XunFeiDataset(test_path[:],
    transforms.Compose([
                transforms.Resize(224),
                # transforms.RandomResizedCrop(224),
                # transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ])), batch_size=30, shuffle=False, num_workers=1, pin_memory=False
)
model = XunFeiNet()
model.load_state_dict(torch.load('./model/model.pt'))

test_pred = predict(test_loader, model)
test_pred = test_pred.argmax(1)
class_names = np.array(['powdery_mildew', 'healthy', 'rust', 'scab'])
test_pred = class_names[test_pred]

pd.DataFrame({
    'uuid': [x.split('/')[-1] for x in test_path],
    'label': test_pred
}).to_csv('/work/output/result.csv', index=None)
```