In [1]:
# -*- coding: utf-8 -*-
import os, sys, glob, argparse
import pandas as pd
import numpy as np
from tqdm import tqdm

import time, datetime
import pdb, traceback

import cv2
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold

import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

In [36]:
train_jpg = glob.glob('./Datawhale_人脸情绪识别_数据集/train/*/*')
np.random.shuffle(train_jpg)
train_jpg = np.array(train_jpg)
    
class QRDataset(Dataset):
    def __init__(self, img_path, transform=None):
        self.img_path = img_path
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None
    
    def __getitem__(self, index):
        start_time = time.time()
        img = Image.open(self.img_path[index]).convert('RGB')
        
        lbl_dict = {'angry': 0,
             'disgusted': 1,
             'fearful': 2,
             'happy': 3,
             'neutral': 4,
             'sad': 5,
             'surprised': 6}
        if self.transform is not None:
            img = self.transform(img)
        
        if 'test' in self.img_path[index]:
            return img, torch.from_numpy(np.array(0))
        else:
            lbl_int = lbl_dict[self.img_path[index].split('/')[-2]]
            return img, torch.from_numpy(np.array(lbl_int))
    
    def __len__(self):
        return len(self.img_path)

In [29]:
class XunFeiNet(nn.Module):
    def __init__(self):
        super(XunFeiNet, self).__init__()
                
        model = models.resnet18(True)
        model.avgpool = nn.AdaptiveAvgPool2d(1)
        model.fc = nn.Linear(512, 7)
        self.resnet = model
         
    def forward(self, img):        
        out = self.resnet(img)
        return out

In [38]:
def validate(val_loader, model, criterion):
    model.eval()
    acc1 = []
    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            input = input.cuda()
            target = target.cuda()
            
            output = model(input)
            loss = criterion(output, target)
            acc1.append((output.argmax(1) == target).float().mean().item())

        print(' * Val Acc@1 {0}'.format(np.mean(acc1)))
        return np.mean(acc1)

def predict(test_loader, model, tta=10):
    model.eval()
    
    test_pred_tta = None
    for _ in range(tta):
        test_pred = []
        with torch.no_grad():
            end = time.time()
            for i, (input, target) in enumerate(test_loader):
                input = input.cuda()
                target = target.cuda()

                output = model(input)
                output = output.data.cpu().numpy()

                test_pred.append(output)
        test_pred = np.vstack(test_pred)
    
        if test_pred_tta is None:
            test_pred_tta = test_pred
        else:
            test_pred_tta += test_pred
    
    return test_pred_tta

def train(train_loader, model, criterion, optimizer, epoch):
    model.train()

    end = time.time()
    acc1 = []
    for i, (input, target) in enumerate(train_loader):
        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)
        output = model(input)
        loss = criterion(output, target)

        acc1.append((output.argmax(1) == target).float().mean().item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            print('Train: {0}'.format(np.mean(acc1)))

In [34]:
skf = KFold(n_splits=5, random_state=233, shuffle=True)
for flod_idx, (train_idx, val_idx) in enumerate(skf.split(train_jpg, train_jpg)):
    
    train_loader = torch.utils.data.DataLoader(
        QRDataset(train_jpg[train_idx][:],
                transforms.Compose([
                            transforms.RandomAffine(10),
                            transforms.ColorJitter(hue=.05, saturation=.05),
                            transforms.RandomHorizontalFlip(),
                            transforms.RandomVerticalFlip(),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        ), batch_size=50, shuffle=True, num_workers=5, pin_memory=True
    )
    
    val_loader = torch.utils.data.DataLoader(
        QRDataset(train_jpg[val_idx][:1000],
                transforms.Compose([
#                             transforms.Resize((256, 256)),
                            # transforms.Resize((124, 124)),
                            # transforms.RandomCrop((88, 88)),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        ), batch_size=10, shuffle=False, num_workers=5, pin_memory=True
    )
        
    model = XunFeiNet().cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), 0.003)
    best_acc = 0.0
    for epoch in range(10):
        print('\nEpoch: ', epoch)

        train(train_loader, model, criterion, optimizer, epoch)
        val_acc = validate(val_loader, model, criterion)
        
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), './resnet18_fold{0}.pt'.format(flod_idx))
            
    break


Epoch:  0
Train: 0.07999999821186066
Train: 0.22099009348024237
Train: 0.24338307917414614
Train: 0.2624584656716185
Train: 0.27855360942737123
 * Val Acc@1 0.34900000631809236

Epoch:  1
Train: 0.41999998688697815
Train: 0.36831682035238433
Train: 0.3699502394122271
Train: 0.37614616974445675
Train: 0.38334163566331314
 * Val Acc@1 0.4400000077486038

Epoch:  2
Train: 0.3400000035762787
Train: 0.41999998924755816
Train: 0.4163183962824333
Train: 0.42019932412625943
Train: 0.4243890156323773
 * Val Acc@1 0.48600000627338885

Epoch:  3
Train: 0.4399999976158142
Train: 0.45108909506608946
Train: 0.44935322059920774
Train: 0.45202656481353154
Train: 0.4526184410079756
 * Val Acc@1 0.510000008046627

Epoch:  4
Train: 0.3799999952316284
Train: 0.47148513587394564
Train: 0.47343282201396886
Train: 0.47362124895138596
Train: 0.4766084647312426
 * Val Acc@1 0.5310000072419644

Epoch:  5
Train: 0.3999999761581421
Train: 0.498613845888931
Train: 0.4941293383712199
Train: 0.49508304176140466
Tra

In [39]:
test_jpg = glob.glob('./Datawhale_人脸情绪识别_数据集/test/*')
test_jpg = np.array(test_jpg)
test_jpg.sort()

test_loader = torch.utils.data.DataLoader(
        QRDataset(test_jpg,
                transforms.Compose([
                            transforms.RandomHorizontalFlip(),
                            transforms.RandomVerticalFlip(),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        ), batch_size=50, shuffle=False, num_workers=5, pin_memory=True
)
        
model = XunFeiNet().cuda()
model.load_state_dict(torch.load('resnet18_fold0.pt'))
test_pred = predict(test_loader, model, 5)


# test_csv = pd.DataFrame()
# test_csv['ID'] = list(range(0, 3082))
# test_csv['Label'] = np.argmax(test_pred, 1)
# test_csv['Label'] = test_csv['Label'].map({1:'pos', 0:'neg'})
# test_csv.to_csv('tmp.csv', index=None)

In [41]:
cls_name = np.array(['angry', 'disgusted', 'fearful', 'happy','neutral', 'sad', 'surprised'])
submit_df = pd.DataFrame({'name': test_jpg, 'label': cls_name[test_pred.argmax(1)]})
submit_df['name'] = submit_df['name'].apply(lambda x: x.split('/')[-1])

In [42]:
submit_df = submit_df.sort_values(by='name')
submit_df.to_csv('pytorch_submit.csv', index=None)