In [1]:
import os
import glob
import shutil
import time
import argparse

In [2]:
import numpy as np
import pandas as pd
from PIL import Image

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torchnet.meter import AverageValueMeter, ClassErrorMeter

# Load data

## Global parameters

In [4]:
args = {
    "arch": "resnet50", # resnet101, resnet152, inception_v3
    "pretrained": True,
    "datadir": "../data",
    "cuda": False,
    "optim": "adam", # sgd, rmsprop
    "epochs": 90,
    "batch_size": 256,
    "lr": 1e-3,
    "momentum": 0.9,
    "weight_decay": 1e-4,
    "seed": 7,
    "workers": 4,
    "nb_vals": 450,
    "nb_augs": 10
}

In [5]:
args = argparse.Namespace(**args)

## Make dir

In [6]:
# Make data dir
if not os.path.isdir(args.datadir):
    os.makedirs(args.datadir)

```python
# Seed for cuda
args.cuda = args.cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
```

In [7]:
# train/test folders: put the data provided by Kaggle in datadir
traindir_full = os.path.join(args.datadir, "train")
testdir = os.path.join(args.datadir, "test_stg1")

In [8]:
# intermediate folder: this folder contains train/val/test/submit folders
intermediate_path = os.path.join("..", "intermediate")

# train/val/test/submit folders
traindir = os.path.join(intermediate_path, "train" + str(args.nb_vals))
valdir = os.path.join(intermediate_path, "val" + str(args.nb_vals))
submission_path = os.path.join(intermediate_path, "submissions")

In [9]:
print(valdir)

../intermediate/val450


In [10]:
# best model path
model_best_filename = "model_best_{0}vals_{1}augs_{2}.pth.tar".format(args.nb_vals, args.nb_augs, args.arch)
model_best_filepath = os.path.join(intermediate_path, model_best_filename)

# get classes
classes = sorted([x.split("/")[-1] for x in glob.glob(traindir_full+"/*")])

In [11]:
#make dir
dir_list = [traindir_full, testdir, intermediate_path, traindir, valdir, submission_path, model_best_filepath]

for i in dir_list:
    if not os.path.isdir(i):
        os.makedirs(i)

In [12]:
#copy tree path of traindir_full to traindir
if not os.path.isdir(traindir):
    shutil.copytree("../data/train", traindir)

```python
#Use this to check the number of picture in folder
print(len(glob.glob(traindir_full+ '/ALB/*')))
print(len(glob.glob(traindir+ '/ALB/*')))
#return 1719
```

In [13]:
if not os.path.isdir(valdir):
    np.random.seed(args.seed)
    g = glob.glob(traindir + "/*/*.jpg")
    shuf = np.random.permutation(g) #randomly permute g
    for i in range(args.nb_vals):
        os.renames(shuf[i], shuf[i].replace("train", "val")) #move the picture of i in shuf from train to val

```python
#Use this to check the number of picture in folder
print(len(glob.glob(traindir_full+ '/ALB/*'))) #return 1719
print(len(glob.glob(traindir+ '/ALB/*'))) #return 1511
print(len(glob.glob(traindir+ '/ALB/*'))) #return 208
```

## Load data

```torch.utils.data.dataloader```

Read about DataLoader [Pytorch](http://pytorch.org/docs/_modules/torch/utils/data/dataloader.html)

num_workers (int, optional): how many subprocesses to use for data
            loading. 0 means that the data will be loaded in the main process
            (default: 0)

```torchvision.datasets```

Read about datasets [Pytorch](http://pytorch.org/docs/torchvision/datasets.html?highlight=dataset)

dset.ImageFolder(root="root folder path", [transform, target_transform])

```torchvision.transforms```

Read about transforms [pytorch](http://pytorch.org/docs/torchvision/transforms.html?highlight=transform)

class ```torchvision.transforms.Compose(transforms)```: Composes several transforms together.

In [14]:
# Load train
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

train_loader = DataLoader(
    datasets.ImageFolder(traindir,
                         transforms.Compose([
                             transforms.Scale(400),
                             transforms.RandomSizedCrop(224),
                             transforms.RandomHorizontalFlip(),
                             transforms.ToTensor(),
                             normalize])),
    batch_size=args.batch_size,
    shuffle=True,
    num_workers=args.workers,
)

In [15]:
#Load val
val_loader = DataLoader(
    datasets.ImageFolder(valdir,
                         transforms.Compose([
                             transforms.Scale(400),
                             transforms.RandomSizedCrop(224),
                             transforms.ToTensor(),
                             normalize])),
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=args.workers,
)

In [27]:
class TestImageFolder(Dataset):
    def __init__(self, root, transform=None):
        images = []
        for filepath in sorted(glob.glob(root + "/*.jpg")):
            images.append(filepath.split("/")[-1])

        self.root = root
        self.imgs = images
        self.transform = transform

    def __getitem__(self, index):
        filename = self.imgs[index]
        img = Image.open(os.path.join(self.root, filename))
        if self.transform is not None:
            img = self.transform(img)
        return img, filename

    def __len__(self):
        return len(self.imgs)

In [28]:
#Load test
test_loader = DataLoader(
    TestImageFolder(testdir, 
                    transforms.Compose([
                        transforms.Scale(400),
                        transforms.RandomSizedCrop(224),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        normalize])),
    batch_size=1,
    shuffle=False,
    num_workers=args.workers,
)

# Build model

## Load model

How to load pretrained model in Pytorch: [torchvision.models](http://pytorch.org/docs/torchvision/models.html?highlight=models)

```torchvision.models.alexnet(pretrained=False, **kwargs)```

In [16]:
# create model
if args.pretrained:
    print("=> Using pre-trained model '{}'".format(args.arch))
    model = models.__dict__[args.arch](pretrained=True)
else:
    print("=> Creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch]()

=> Using pre-trained model 'resnet50'


```python
for param in model.parameters():
    print("Data {0} \n Type {1} \n Size {2}".format(param.data, type(param.data), param.size))
    break
```

In [18]:
for param in model.parameters():
    param.requires_grad = False
# parameters of newly constructed modules have requires_grad=True by default
# replace the last fully-connected layer
model.fc = nn.Linear(2048, len(classes))
# for 1 GPU, it is unnecessary to use DataParallel
#model = torch.nn.DataParallel(model).cuda()
if args.cuda:
    model.cuda()

## Define loss function

In [19]:
# define loss function
criterion = nn.CrossEntropyLoss()
if args.cuda:
    criterion.cuda()

# define optimizer
if args.optim == "sgd":
    optimizer = optim.SGD(model.fc.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    
elif args.optim == "adam":
    optimizer = optim.Adam(model.fc.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    
elif args.optim == "rmsprop":
    optimizer = optim.RMSprop(model.fc.parameters(),
                              lr=args.lr,
                              weight_decay=args.weight_decay)

## Utility function

In [20]:
def save_checkpoint(state, is_best, filename="checkpoint.pth.tar"):
    checkpoint_filepath = os.path.join(intermediate_path, filename)
    torch.save(state, checkpoint_filepath)
    if is_best:
        shutil.copyfile(checkpoint_filepath, model_best_filepath)

In [21]:
def adjust_learning_rate(args, optimizer, epoch):
    """
    Sets the learning rate to the initial LR decayed by 10 every 30 epochs
    """
    lr = args.lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group["lr"] = lr

In [22]:
print(optimizer.param_groups)

[{'eps': 1e-08, 'params': [Parameter containing:
1.00000e-02 *
-1.4219  1.3843 -1.1213  ...   0.2236 -2.1476  0.9708
-1.3754  0.4443  0.1720  ...  -0.6882  1.4378 -0.4721
 2.1038 -1.8498 -0.8843  ...  -0.2508 -1.7673 -0.8141
          ...             ⋱             ...          
-2.1799  1.9730 -2.0085  ...  -0.6145  1.4237  1.2395
-0.8596  2.1436 -1.1804  ...   1.3579  2.0011 -1.1211
 1.2626  2.1991  0.6957  ...  -0.7204 -2.1124 -0.9735
[torch.FloatTensor of size 8x2048]
, Parameter containing:
1.00000e-02 *
 -0.5990
  2.1007
  2.1749
  0.7953
 -1.6991
 -0.0100
 -1.2440
  0.2237
[torch.FloatTensor of size 8]
], 'lr': 0.001, 'weight_decay': 0.0001, 'betas': (0.9, 0.999)}]


## Define train/validate function

In [23]:
# train function
def train(args, train_loader, model, criterion, optimizer, epoch):
    model.train() # turn on train mode
    
    losses = AverageValueMeter()
    top1 = ClassErrorMeter(accuracy=True) # accuracy instead of error
    start = time.time()
    
    for i, (input, target) in enumerate(train_loader):      
        # here we should call cuda() for input;
        # in the ImageNet example, the model is parallel by
        # torch.nn.DataParallel(model).cuda(), so no need to call cuda() there;
        # the option async=True works with pin_memory of DataLoader
        # pin_memory slows down DataLoader but fastens data transfer from
        # CPU to GPU
        if args.cuda:
            input = input.cuda()
            target = target.cuda()
            
        input = Variable(input)
        target = Variable(target)

        # compute output and loss
        output = model(input)
        loss = criterion(output, target)

        # compute gradient and do backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        losses.add(loss.data[0] * input.size(0), input.size(0))
        top1.add(output.data, target)
        
    print("==> EPOCH {0} | Time: {1:.3f} | Accuracy: {2:.3f} | Loss: {3:.4f}"
          .format(epoch, time.time()-start,
                  top1.value()[0], losses.value()[0]))

In [24]:
# validate function
def validate(args, val_loader, model, criterion):
    model.train(False) # turn off train mode
    
    losses = AverageValueMeter()
    top1 = ClassErrorMeter(accuracy=True)
    start = time.time()
    
    for i, (input, target) in enumerate(val_loader):
        if args.cuda:
            input = input.cuda(async=True)
            target = target.cuda(async=True)
            
        input = Variable(input, volatile=True) # no gradient
        target = Variable(target, volatile=True)
        
        #compute output and loss
        output = model(input_var)
        loss = criterion(output, target_var)
        losses.add(loss.data[0] * input.size(0), input.size(0))
        top1.add(output.data, target)
    
    print('top1.value {}'.format(top1.value))
    print("==> VALIDATE | Time: {0:.3f} | Accuracy: {1:.3f} | Loss: {2:.4f}"
          .format(time.time()-start, top1.value()[0], losses.value()[0]))
    return top1.value()[0]

# Train model

In [25]:
if 1 == 1:
    print("=> Starting to train on '{}' model".format(args.arch))
    best_prec1 = 0
    for epoch in range(1, args.epochs+1):
        adjust_learning_rate(args, optimizer, epoch)

        # train for one epoch
        train(args, train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(args, val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint({
            "epoch": epoch,
            "arch": args.arch,
            "state_dict": model.state_dict(),
            "best_prec1": best_prec1,
        }, is_best)

=> Starting to train on 'resnet50' model


Process Process-2:
Process Process-3:
Process Process-1:
Process Process-4:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/hoangnguyen/miniconda3/envs/pydata/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 32, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/Use

KeyboardInterrupt: 

# Submit

In [None]:
def test(args, test_loader, model):
    # placeholder arrays for predictions and id column
    preds = np.zeros(shape=(len(test_loader), len(classes)))
    id_col = []
    
    # turn off train mode
    model.train(False)
    
    # average predictions across several different augmentations
    for aug in range(args.nb_augs):
        print("   * Predicting on test augmentation {}".format(aug + 1))
        
        # iterate through image data, one file at a time
        # (assuming batch size set to 1)
        for i, (input, filename) in enumerate(test_loader):
            # batch_size = 1
            filename = filename[0]
                     
            if args.cuda:
                input = input.cuda()
            input_var = Variable(input, volatile=True) # no gradient
            output = model(input_var)
            softmax = F.softmax(output)[0].data.cpu().numpy()
            
            # add the scaled class probabilities
            preds[i] += softmax
            if aug == 0:
                id_col.append(filename)
       
    # convert averaged prediction array to pandas dataframe
    preds /= args.nb_augs
    pred = pd.DataFrame(preds, columns=[classes])
    pred["image"] = id_col
    return pred

In [None]:
print("=> Starting to test on '{}' model".format(args.arch))
if os.path.isfile(model_best_filepath):
    print("=> Loading checkpoint '{}'".format(model_best_filename))
    checkpoint = torch.load(model_best_filepath)
    best_prec1 = checkpoint["best_prec1"]
    model.load_state_dict(checkpoint["state_dict"])
    print("=> Loaded checkpoint '{}' (epoch {})"
          .format(model_best_filename, checkpoint["epoch"]))
    pred = test(args, test_loader, model)
    # filename for our submission file w/ extra info about this test run
    sub_fn = "{0}epoches_{1}vals_{2}augs_{3}.csv".format(
        checkpoint["epoch"], args.nb_vals, args.nb_augs, args.arch)
    # write predictions to csv
    pred.to_csv(os.path.join(submission_path, sub_fn), index=False)
else:
    print("=> No checkpoint found at '{}'".format(model_best_filepath))