In [5]:
import os, sys, glob, argparse
import pandas as pd
import numpy as np
from tqdm import tqdm

import cv2
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold

import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [10]:
train_path = glob.glob('./叶片病害识别挑战赛训练集/*/*')
np.random.shuffle(train_path)

In [14]:
len(train_path)

3400

In [38]:
class XunFeiDataset(Dataset):
    def __init__(self, img_path, transform=None):
        self.img_path = img_path
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None
    
    def __getitem__(self, index):
        img = Image.open(self.img_path[index])
        if self.transform is not None:
            img = self.transform(img)
        
        if 'powdery_mildew' in self.img_path[index]:
            label = 0
        elif 'healthy' in self.img_path[index]:
            label = 1
        elif 'rust' in self.img_path[index]:
            label = 2
        elif 'scab' in self.img_path[index]:
            label = 3
        
        return img, torch.from_numpy(np.array(label).astype(int))
    
    def __len__(self):
        return len(self.img_path)

In [None]:
class XunFeiNet(nn.Module):
    def __init__(self):
        super(XunFeiNet, self).__init__()
        model = models.resnet50(True)
        model.avgpool = nn.AdaptiveAvgPool2d(1)
        model.fc = nn.Linear(2048, 4)
        self.resnet = model
    
    def forward(self, img):
        out = self.resnet(img)
        return out

In [54]:
def train(train_loader, model, criterion, optimizer):
    model.train()
    train_loss = 0.0
    for i, (input, target) in enumerate(train_loader):
        input = input.to(device)
        target = target.to(device)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
            print('Train loss', loss.item())
            
        train_loss += loss.item()
    
    return train_loss/len(train_loader)
            
def validate(val_loader, model, criterion):
    model.eval()
    
    val_acc = 0.0
    
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            input = input.to(device)
            target = target.to(device)

            # compute output
            output = model(input)
            loss = criterion(output, target)
            
            val_acc += (output.argmax(1) == target).sum().item()
            
    return val_acc / len(val_loader.dataset)

def predict(test_loader, model, criterion):
    model.eval()
    val_acc = 0.0
    
    test_pred = []
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            input = input.to(device)
            target = target.to(device)

            # compute output
            output = model(input)
            test_pred.append(output.data.cpu().numpy())
            
    return np.vstack(test_pred)

In [55]:
train_loader = torch.utils.data.DataLoader(
    XunFeiDataset(train_path[:-500],
    transforms.Compose([
                transforms.Resize(256),
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ])), batch_size=15, shuffle=True, num_workers=4, pin_memory=False
)

val_loader = torch.utils.data.DataLoader(
    XunFeiDataset(train_path[-500:],
    transforms.Compose([
                transforms.Resize(256),
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ])), batch_size=30, shuffle=False, num_workers=1, pin_memory=False
)

In [56]:
model = XunFeiNet()
model = model.to(device)
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), 0.001)

In [57]:
for _  in range(10):
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc  = validate(val_loader, model, criterion)
    
    print(train_loss, val_acc)

Train loss 1.5940135717391968
Train loss 1.1077581644058228
1.1705250399014384 0.716
Train loss 0.9211582541465759
Train loss 0.7091454267501831
0.8290592757696958 0.83
Train loss 0.6219263076782227
Train loss 0.47454729676246643
0.6132681077903079 0.828
Train loss 0.45259857177734375
Train loss 0.5992544293403625
0.5223359434106916 0.864
Train loss 0.560684084892273
Train loss 0.24091745913028717
0.4466889486792161 0.87
Train loss 0.471619188785553
Train loss 0.3848036527633667
0.3995219959502982 0.864
Train loss 0.3464297354221344
Train loss 0.33798450231552124
0.3911478723754588 0.886
Train loss 0.3078201711177826
Train loss 0.36544033885002136
0.3732020866855518 0.908
Train loss 0.8368951082229614
Train loss 0.23293063044548035
0.35171481525314224 0.898
Train loss 0.245680570602417
Train loss 0.29640594124794006
0.3521484049785997 0.916


In [59]:
model = model.to('cpu')
torch.save(model.state_dict(), 'model.pt')

run.py 内容如下：

```python
import os, sys, glob, argparse
import pandas as pd
import numpy as np
from tqdm import tqdm

import cv2
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold

import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

# Check if GPU is available
device = torch.device("cpu")

class XunFeiDataset(Dataset):
    def __init__(self, img_path, transform=None):
        self.img_path = img_path
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None
    
    def __getitem__(self, index):
        img = Image.open(self.img_path[index])
        if self.transform is not None:
            img = self.transform(img)
        
        if 'powdery_mildew' in self.img_path[index]:
            label = 0
        elif 'healthy' in self.img_path[index]:
            label = 1
        elif 'rust' in self.img_path[index]:
            label = 2
        elif 'scab' in self.img_path[index]:
            label = 3
        else:
            label = 0
        
        return img, torch.from_numpy(np.array(label).astype(int))
    
    def __len__(self):
        return len(self.img_path)
    
class XunFeiNet(nn.Module):
    def __init__(self):
        super(XunFeiNet, self).__init__()
        model = models.resnet50(False)
        model.avgpool = nn.AdaptiveAvgPool2d(1)
        model.fc = nn.Linear(2048, 4)
        self.resnet = model
    
    def forward(self, img):
        out = self.resnet(img)
        return out
    
def predict(test_loader, model):
    model.eval()    
    test_pred = []
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            input = input.to(device)
            target = target.to(device)
            output = model(input)
            test_pred.append(output.data.cpu().numpy())
            
    return np.vstack(test_pred)


test_path = glob.glob('/work/data/leafs-test-dataset/*')
test_path.sort()

test_loader = torch.utils.data.DataLoader(
    XunFeiDataset(test_path[:],
    transforms.Compose([
                transforms.Resize(224),
                # transforms.RandomResizedCrop(224),
                # transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ])), batch_size=30, shuffle=False, num_workers=1, pin_memory=False
)
model = XunFeiNet()
model.load_state_dict(torch.load('./model/model.pt'))

test_pred = predict(test_loader, model)
test_pred = test_pred.argmax(1)
class_names = np.array(['powdery_mildew', 'healthy', 'rust', 'scab'])
test_pred = class_names[test_pred]

pd.DataFrame({
    'uuid': [x.split('/')[-1] for x in test_path],
    'label': test_pred
}).to_csv('/work/output/result.csv', index=None)
```