In [1]:
import pandas as pd
from torch import np # Torch wrapper for Numpy

import os
import time
import copy
from PIL import Image

import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import KFold

In [2]:
DATA_DIR = '/home/user/data/amazon_planet'
TRAIN_DIR = 'train-jpg'
TRAIN_DATA = 'train_v2.csv'
TEST_DIR = 'test-jpg'
IMG_EXT = '.jpg'

In [3]:
# class KaggleAmazonDataset(Dataset):

#     def __init__(self, df_csv, img_path, img_ext, transform=None):
#         self.mlb = MultiLabelBinarizer()
#         self.img_path = img_path
#         self.img_ext = img_ext
#         self.transform = transform

#         self.X_train = df_csv['image_name']
#         self.y_train = self.mlb.fit_transform(df_csv['tags'].str.split()).astype(np.float32)

#     def __getitem__(self, index):
#         img = Image.open(os.path.join(self.img_path, self.X_train.iloc[index] + self.img_ext))
#         img = img.convert('RGB')
#         if self.transform is not None:
#             img = self.transform(img)
        
#         label = torch.from_numpy(self.y_train[index])
#         return img, label

#     def __len__(self):
#         return len(self.X_train.index)

In [4]:
class KaggleAmazonDatasetInMem(Dataset):

    def __init__(self, imgs, labels, transform=None):
        self.imgs = imgs
        self.labels = labels
        self.transform = transform

    def __getitem__(self, index):
        img = self.imgs[index]
        label = self.labels[index]

        if self.transform is not None:
            img = self.transform(img)
        label = torch.from_numpy(label)

        return img, label 

    def __len__(self):
        return len(self.imgs)

In [5]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [6]:
csv_df = pd.read_csv(os.path.join(DATA_DIR, TRAIN_DATA))
assert csv_df['image_name'].apply(lambda x: os.path.isfile(os.path.join(DATA_DIR, TRAIN_DIR, x + IMG_EXT))).all(), \
"Some images referenced in the CSV file were not found"
csv_df.head()

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road


In [7]:
# kf = KFold(10, shuffle=True, random_state=1024)

# for train, val in kf.split(csv_df):
#     split = {'train': train, 'val': val}
#     dsets = {x: KaggleAmazonDataset(csv_df.iloc[split[x]], os.path.join(DATA_DIR, TRAIN_DIR),
#         IMG_EXT, data_transforms[x]) for x in ['train', 'val']}
    
#     dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=256, shuffle=True, num_workers=1, pin_memory=True)
#         for x in ['train', 'val']}
    
#     dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
    
#     for x in ['train', 'val']:
#         print len(dsets[x]) 
# #     dset_classes = dsets['train'].classes
#     break

In [8]:
mlb = MultiLabelBinarizer()
kf = KFold(10, shuffle=True, random_state=1024)


for train, val in kf.split(csv_df):
    split = {'train': train, 'val': val}
    
    dsets = {}
    
    for fold in ['train', 'val']:
        data = csv_df.iloc[split[fold]]
        x = data['image_name']
        imgs = []
        for i in x:
            img = Image.open(os.path.join(DATA_DIR, TRAIN_DIR, i + IMG_EXT))
            img = img.convert('RGB')
            imgs.append(img)
        y = mlb.fit_transform(data['tags'].str.split()).astype(np.float32)
        
        dsets[fold] = KaggleAmazonDatasetInMem(imgs, y, data_transforms[fold])
    
    dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=128, shuffle=True, num_workers=16, pin_memory=True)
        for x in ['train', 'val']}
    
    dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
    
    for x in ['train', 'val']:
        print len(dsets[x]) 
#     dset_classes = dsets['train'].classes
    break

36431
4048


In [9]:
use_gpu = torch.cuda.is_available()
print use_gpu

True


In [10]:
def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=25):
    since = time.time()
    
    best_model = model
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                optimizer = lr_scheduler(optimizer, epoch)
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dset_loaders[phase]:
                # get the inputs
                inputs, labels = data
                
                print len(inputs)

                # wrap them in Variable
                if use_gpu:
                    inputs, labels = Variable(inputs.cuda()), \
                        Variable(labels.cuda())
                    print 'using GPU'
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)
                
                print 'forward pass'

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    
                print 'backwards pass'

                # statistics
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dset_sizes[phase]
            epoch_acc = running_corrects / dset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model = copy.deepcopy(model)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    return best_model

In [11]:
def exp_lr_scheduler(optimizer, epoch, init_lr=0.001, lr_decay_epoch=7):
    """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs."""
    lr = init_lr * (0.1**(epoch // lr_decay_epoch))

    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return optimizer

In [12]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
# model_ft.fc1 = nn.Linear(num_ftrs, 2)
model_ft.fc = nn.Linear(num_ftrs, 17)

if use_gpu:
    model_ft = model_ft.cuda()

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

In [None]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
    num_epochs=25)

Epoch 0/24
----------
LR is set to 0.001
