In [1]:
import os, sys, glob, argparse
import pandas as pd
import numpy as np
from tqdm import tqdm

%pylab inline

import cv2
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold

import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

Populating the interactive namespace from numpy and matplotlib


In [19]:
train_df = pd.read_csv('./LED_part1_train/train.csv', sep=' ')
train_df['file'] = train_df['file'].apply(lambda x: str(x).zfill(6) + '.bmp')
train_df = train_df.sample(frac=1.0)

train_path = './LED_part1_train/imgs/' + train_df['file']
train_label = train_df['label']

In [52]:
test_df = pd.read_csv('提交示例.csv')
test_df['path'] = test_df['file'].apply(lambda x: str(x.split('_')[1]).zfill(6) + '.bmp')
test_path = './LED_part1_train/imgs/' + test_df['path']

In [60]:
set(test_path) & set(train_label)

set()

In [13]:
class XunFeiDataset(Dataset):
    def __init__(self, img_path, img_label, transform=None):
        self.img_path = img_path
        self.img_label = img_label
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None
    
    def __getitem__(self, index):
        img = cv2.imread(self.img_path[index])            
        img = img.astype(np.float32)
        
        img /= 255.0
        img -= 1
        
        if self.transform is not None:
            img = self.transform(image = img)['image']
        img = img.transpose([2,0,1])
        return img,torch.from_numpy(np.array(self.img_label[index]))
    
    def __len__(self):
        return len(self.img_path)

In [14]:
class XunFeiNet(nn.Module):
    def __init__(self):
        super(XunFeiNet, self).__init__()
                
        model = models.resnet18(True)
        model.avgpool = nn.AdaptiveAvgPool2d(1)
        model.fc = nn.Linear(512, 4)
        self.resnet = model
        
    def forward(self, img):        
        out = self.resnet(img)
        return out

In [15]:
def train(train_loader, model, criterion, optimizer):
    model.train()
    train_loss = 0.0
    for i, (input, target) in enumerate(train_loader):
        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 20 == 0:
            print(loss.item())
            
        train_loss += loss.item()
    
    return train_loss/len(train_loader)
            
def validate(val_loader, model, criterion):
    model.eval()
    
    val_acc = 0.0
    
    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            input = input.cuda()
            target = target.cuda()

            # compute output
            output = model(input)
            loss = criterion(output, target)
            
            val_acc += (output.argmax(1) == target).sum().item()
            
    return val_acc / len(val_loader.dataset)

def predict(test_loader, model, criterion):
    model.eval()
    val_acc = 0.0
    
    test_pred = []
    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(test_loader):
            input = input.cuda()
            target = target.cuda()

            # compute output
            output = model(input)
            test_pred.append(output.data.cpu().numpy())
            
    return np.vstack(test_pred)

In [55]:
import albumentations as A

train_loader = torch.utils.data.DataLoader(
    XunFeiDataset(train_path.values[:-500], train_label.values[:-500],
            A.Compose([
            # A.Resize(300, 300),
            A.RandomCrop(130, 450),
            A.HorizontalFlip(p=0.5),
            A.RandomContrast(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
        ])
    ), batch_size=10, shuffle=True, num_workers=1, pin_memory=False
)

val_loader = torch.utils.data.DataLoader(
    XunFeiDataset(train_path.values[-500:], train_label.values[-500:],
            A.Compose([
            # A.Resize(300, 300),
            A.RandomCrop(130, 450),
            # A.HorizontalFlip(p=0.5),
            # A.RandomContrast(p=0.5),
        ])
    ), batch_size=2, shuffle=False, num_workers=1, pin_memory=False
)

test_loader = torch.utils.data.DataLoader(
    XunFeiDataset(test_path, [0] * len(test_path),
            A.Compose([
            # A.Resize(300, 300),
            A.RandomCrop(130, 450),
            # A.HorizontalFlip(p=0.5),
            # A.RandomContrast(p=0.5),
        ])
    ), batch_size=2, shuffle=False, num_workers=1, pin_memory=False
)


In [33]:
model = XunFeiNet()
model = model.to('cuda')
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), 0.01)

In [34]:
for _  in range(3):
    train_loss = train(train_loader, model, criterion, optimizer)
    val_acc = validate(val_loader, model, criterion)
    
    print(train_loss, val_acc)

2.1781413555145264
1.0419951677322388
1.0792276859283447
0.30832964181900024
0.8367191553115845
1.3866803646087646
1.5160852670669556
1.6955101490020752
0.7909185290336609
0.27210813760757446
0.1143868938088417
0.45553913712501526
0.5953348278999329
0.45054784417152405
0.33823224902153015
0.7402018904685974
0.3970138430595398
0.6925562620162964
1.2593271732330322
0.721782386302948
0.8106012187607876 0.882
0.862005352973938
0.7307425737380981
0.5787581205368042
0.5375865697860718
0.962967038154602
0.49011683464050293
0.13052770495414734
1.891213059425354
0.8977954983711243
0.5088403820991516
0.6014495491981506
1.0218145847320557
0.5123348832130432
0.5912672281265259
1.1143639087677002
0.24420467019081116
1.0526559352874756
0.4181271195411682
0.12599065899848938
1.1317453384399414
0.6991669833079561 0.828
1.0638439655303955
0.9436470866203308
0.2173367291688919
0.715546727180481
0.6696562767028809
0.5196341276168823
0.4729689061641693
1.4259179830551147
0.7534366250038147
1.1594340801239

In [56]:
pred = None

for _ in range(10):
    if pred is None:
        pred = predict(test_loader, model, criterion)
    else:
        pred += predict(test_loader, model, criterion)

In [57]:
submit = pd.DataFrame(
    {
        'image': [x.split('/')[-1] for x in test_path],
        'label': pred.argmax(1)
})

In [59]:
test_df = pd.read_csv('提交示例.csv')
test_df['label'] = pred.argmax(1)

# submit = submit.sort_values(by='id')
test_df.to_csv('submit2.csv', index=None)