In [1]:
from __future__ import division

import random
import argparse
import glob
import re
import csv
import time
from os.path import join
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tqdm import tqdm
from PIL import Image
import jpeg4py as jpeg
from conditional import conditional

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, sampler
from torch.optim import lr_scheduler

from utils import *
from train_utils import *
from custom_dataset import IEEECameraDataset, preprocess_image
from custom_scheduler import ReduceLROnPlateau
from custom_models import ResNet, DenseNet201

from multiprocessing import *

import logging
logging.basicConfig(filename='training.log', level=logging.INFO, format='%(asctime)s %(message)s')

In [2]:
SEED = 42

np.random.seed(SEED)
random.seed(SEED)

# parser = argparse.ArgumentParser()
# parser.add_argument('--max-epoch', type=int, default=200, help='Epoch to run')
# parser.add_argument('-b', '--batch-size', type=int, default=16, help='Batch Size during training, e.g. -b 64')
# parser.add_argument('-l', '--learning_rate', type=float, default=1e-4, help='Initial learning rate')
# parser.add_argument('-m', '--model', help='load hdf5 model including weights (and continue training)')
# #parser.add_argument('-w', '--weights', help='load hdf5 weights only (and continue training)')
# #parser.add_argument('-do', '--dropout', type=float, default=0.3, help='Dropout rate for FC layers')
# #parser.add_argument('-doc', '--dropout-classifier', type=float, default=0., help='Dropout rate for classifier')
# parser.add_argument('-t', '--test', action='store_true', help='Test model and generate CSV submission file')
# parser.add_argument('-tt', '--test-train', action='store_true', help='Test model on the training set')
# parser.add_argument('-cs', '--crop-size', type=int, default=512, help='Crop size')
# parser.add_argument('-w', '--workers', type=int, default=8, help='Num workers')
# #parser.add_argument('-g', '--gpus', type=int, default=1, help='Number of GPUs to use')
# #parser.add_argument('-p', '--pooling', type=str, default='avg', help='Type of pooling to use: avg|max|none')
# #parser.add_argument('-nfc', '--no-fcs', action='store_true', help='Dont add any FC at the end, just a softmax')
# #parser.add_argument('-kf', '--kernel-filter', action='store_true', help='Apply kernel filter')
# #parser.add_argument('-lkf', '--learn-kernel-filter', action='store_true', help='Add a trainable kernel filter before classifier')
# #parser.add_argument('-cm', '--classifier', type=str, default='ResNet50', help='Base classifier model to use')
# parser.add_argument('-uiw', '--use-imagenet-weights', action='store_true', help='Use imagenet weights (transfer learning)')
# parser.add_argument('-x', '--extra-dataset', action='store_true', help='Use dataset from https://www.kaggle.com/c/sp-society-camera-model-identification/discussion/47235')
# #parser.add_argument('-v', '--verbose', action='store_true', help='Pring debug/verbose info')
# parser.add_argument('-e', '--ensembling', type=str, default='arithmetic', help='Type of ensembling: arithmetic|geometric for TTA')
# parser.add_argument('-tta', action='store_true', help='Enable test time augmentation')

# args = parser.parse_args()

args_workers = 28
args_crop_size = 512
args_use_imagenet_weights = True
args_model = False
args_test = False
args_test_train = False
args_extra_dataset = True
args_tta = True
args_batch_size = 18
args_learning_rate = 1e-4
args_max_epoch = 150
args_ensembling = 'arithmetic'

num_workers = args_workers

TRAIN_FOLDER       = '../../data/train/'
EXTRA_TRAIN_FOLDER = 'flickr_images' #not used -> ./good_imgs_train.txt
EXTRA_VAL_FOLDER   = 'val_images' #not used -> ./good_imgs_val.txt
TEST_FOLDER        = '../../data/test'

CROP_SIZE = args_crop_size

In [3]:
experiment_name = 'densenet201-2'

In [4]:
# 2nd round
# args_model='densenet201-1_60_0.0822663608986.pth'
# 0.07
args_model= 'densenet201-2_9_0.0707190001471.pth'
args_learning_rate = 1e-6
args_max_epoch = 10

In [5]:
# MAIN
# model = ResNet(len(CLASSES), pretrained=args_use_imagenet_weights)
model = DenseNet201(len(CLASSES), freeze=args_use_imagenet_weights)
if cuda_is_available:
    print('GPU on')
#     model.cuda()
    model = nn.DataParallel(model).cuda()
if args_model:
    print("Loading model " + args_model)
    state_dict = torch.load('models/'+args_model)['state_dict']
    model.load_state_dict(state_dict)
    
    # e.g. DenseNet201_do0.3_doc0.0_avg-epoch128-val_acc0.964744.hdf5
    #args_classifier = match.group(2)
    #CROP_SIZE = args_crop_size  = model.get_input_shape_at(0)[0][1]

GPU on
Loading model densenet201-2_9_0.0707190001471.pth


In [9]:
# ids_train = [line.rstrip('\n') for line in open('good_imgs_train.txt')]
# ids_val   = [line.rstrip('\n') for line in open('good_imgs_val.txt')]

# def check_remove_broken(img_path):
#     try:
#         x = jpeg.JPEG(img_path).decode()
#     except Exception:
#         print('Decoding error:', img_path)
#         os.remove(img_path)
#     if x.ndim != 3:
#         print('BROKEN: {}'.format(img_path))
        
# p = Pool(cpu_count() - 2)
# p.map(check_remove_broken, tqdm(ids_train))

In [6]:
# TRAINING
ids = glob.glob(join(TRAIN_FOLDER, '*/*.jpg'))
print(len(ids))
ids.sort()

if not args_extra_dataset:
    ids_train, ids_val = train_test_split(ids, test_size=0.1, random_state=SEED)
else:
    ids_train = [line.rstrip('\n') for line in open('good_imgs_train.txt')]
    ids_val   = [line.rstrip('\n') for line in open('good_imgs_val.txt')]
    #ids_train = ids
    #ids_val   = [ ]

    #extra_train_ids = [os.path.join(EXTRA_TRAIN_FOLDER,line.rstrip('\n')) \
    #    for line in open(os.path.join(EXTRA_TRAIN_FOLDER, 'good_jpgs'))]
    #low_quality =     [os.path.join(EXTRA_TRAIN_FOLDER,line.rstrip('\n').split(' ')[0]) \
    #    for line in open(os.path.join(EXTRA_TRAIN_FOLDER, 'low-quality'))]
    #extra_train_ids = [idx for idx in extra_train_ids if idx not in low_quality]
    #extra_train_ids.sort()
    #ids_train.extend(extra_train_ids)
    #random.shuffle(ids_train)

    #extra_val_ids = glob.glob(join(EXTRA_VAL_FOLDER,'*/*.jpg'))
    #extra_val_ids.sort()
    #ids_val.extend(extra_val_ids)

    classes_val = [get_class(idx.split('/')[-2]) for idx in ids_val]
    classes_val_count = np.bincount(classes_val)
    max_classes_val_count = max(classes_val_count)

    # Balance validation dataset by filling up classes with less items from training set (and removing those from there)
    for class_idx in range(N_CLASSES):
        idx_to_transfer = [idx for idx in ids_train \
            if get_class(idx.split('/')[-2]) == class_idx][:max_classes_val_count-classes_val_count[class_idx]]

        ids_train = list(set(ids_train).difference(set(idx_to_transfer)))

        ids_val.extend(idx_to_transfer)

    #random.shuffle(ids_val)

print("Training set distribution:")
print_distribution(ids_train)

print("Validation set distribution:")
print_distribution(ids_val)

classes_train = [get_class(idx.split('/')[-2]) for idx in ids_train]
comp_class_weight = class_weight.compute_class_weight('balanced', np.unique(classes_train), classes_train)
classes_val = [get_class(idx.split('/')[-2]) for idx in ids_val]

weights = [comp_class_weight[i_class] for i_class in classes_train]
weights = torch.DoubleTensor(weights)
train_sampler = sampler.WeightedRandomSampler(weights, len(weights))

weights = [comp_class_weight[i_class] for i_class in classes_val]
weights = torch.DoubleTensor(weights)
val_sampler = sampler.WeightedRandomSampler(weights, len(weights))

train_dataset = IEEECameraDataset(ids_train, crop_size=CROP_SIZE, training=True)
val_dataset = IEEECameraDataset(ids_val, crop_size=CROP_SIZE, training=False)

train_loader = DataLoader(train_dataset, batch_size=args_batch_size, sampler=train_sampler, num_workers=num_workers, pin_memory=True)
valid_loader = DataLoader(val_dataset, batch_size=args_batch_size // 4, sampler=val_sampler, num_workers=num_workers, pin_memory=True, collate_fn=default_collate_unsqueeze)

2750
Training set distribution:
              HTC-1-M7:  1010 (14.0%)
              iPhone-6:   769 (10.7%)
   Motorola-Droid-Maxx:   707 (09.8%)
            Motorola-X:   252 (03.5%)
     Samsung-Galaxy-S4:  1355 (18.8%)
             iPhone-4s:   719 (10.0%)
           LG-Nexus-5x:   606 (08.4%)
      Motorola-Nexus-6:   747 (10.4%)
  Samsung-Galaxy-Note3:   317 (04.4%)
            Sony-NEX-7:   726 (10.1%)
Validation set distribution:
              HTC-1-M7:    48 (10.0%)
              iPhone-6:    48 (10.0%)
   Motorola-Droid-Maxx:    48 (10.0%)
            Motorola-X:    48 (10.0%)
     Samsung-Galaxy-S4:    48 (10.0%)
             iPhone-4s:    48 (10.0%)
           LG-Nexus-5x:    48 (10.0%)
      Motorola-Nexus-6:    48 (10.0%)
  Samsung-Galaxy-Note3:    48 (10.0%)
            Sony-NEX-7:    48 (10.0%)


### 1st train

In [None]:
# optimizer = optim.Adam(model.parameters(), lr=args_learning_rate)
# scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=10, min_lr=1e-9, epsilon=1e-5, verbose=1, mode='min')

# criterion = nn.CrossEntropyLoss()

# print('Starting training...')

# best_val_loss = None

# train_and_validate(
#     train_loader,
#     valid_loader,
#     model,
#     optimizer,
#     scheduler,
#     criterion,
#     args_max_epoch,
#     1,
#     best_val_loss,
#     experiment_name
# )

Starting training...
Epochs: 150
Epoch: 1
Step: 0, train_loss: 2.28059053421
Step: 50, train_loss: 2.19810493946
Step: 100, train_loss: 1.83483487606
Step: 150, train_loss: 1.51969740391
Step: 200, train_loss: 1.31870098829
Step: 250, train_loss: 1.24865574241
Step: 300, train_loss: 1.06386986613
Step: 350, train_loss: 1.08772423387
train_loss: 1.42791003931 | finished in 5m 15s
valid_loss: 0.961100642756, valid_acc: 0.719749652295
Epoch: 2
Step: 0, train_loss: 1.11211490631
Step: 50, train_loss: 0.859526391625
Step: 100, train_loss: 0.825671378374
Step: 150, train_loss: 0.756042047143
Step: 200, train_loss: 0.830646370053
Step: 250, train_loss: 0.776709423661
Step: 300, train_loss: 0.74910466373
Step: 350, train_loss: 0.692155454159
train_loss: 0.782373164756 | finished in 5m 8s
valid_loss: 0.615030290792, valid_acc: 0.837001375516
Epoch: 3
Step: 0, train_loss: 0.510388076305
Step: 50, train_loss: 0.680197889805
Step: 100, train_loss: 0.630369685292
train_loss: 0.652524473792 | finish

### 2nd

In [7]:
# 2nd round

args_learning_rate = 1e-5
args_max_epoch = 10

optimizer = optim.Adam(model.parameters(), lr=args_learning_rate)
# optimizer = optim.SGD(model.parameters(), lr=args_learning_rate, momentum=0.9)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20.0, eta_min=1e-9, last_epoch=-1)
scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=5, min_lr=1e-9, epsilon=1e-5, verbose=1, mode='min')

criterion = nn.CrossEntropyLoss()

print('2nd round training...')

best_val_loss = 0.0822663608986

train_and_validate(
    train_loader,
    valid_loader,
    model,
    optimizer,
    scheduler,
    criterion,
    args_max_epoch,
    1,
    best_val_loss,
    experiment_name
)

2nd round training...
Epochs: 10
Epoch: 1
Step: 0, train_loss: 0.241622552276
Step: 50, train_loss: 0.0862768395018
Step: 100, train_loss: 0.0352550423401
Step: 150, train_loss: 0.0448324072629
Step: 200, train_loss: 0.0434064711013
Step: 250, train_loss: 0.0778075150517
Step: 300, train_loss: 0.0490284431324
Step: 350, train_loss: 0.0350991055666
Step: 400, train_loss: 0.0389803798473
train_loss: 0.051810306091 | finished in 5m 27s
valid_loss: 0.148186890459, valid_acc: 0.974530831099
Epoch: 2
Step: 0, train_loss: 0.042412918061
Step: 50, train_loss: 0.0511174240957
Step: 100, train_loss: 0.0400912842457
Step: 150, train_loss: 0.0662704072252
Step: 200, train_loss: 0.068745732113
Step: 250, train_loss: 0.0461700825091
Step: 300, train_loss: 0.0602067746126
Step: 350, train_loss: 0.0469876486424
Step: 400, train_loss: 0.0558752386703
train_loss: 0.054403098563 | finished in 5m 21s
valid_loss: 0.107089819846, valid_acc: 0.964864864865
Epoch: 3
Step: 0, train_loss: 0.000207000310184
Step

Process Process-472:
Traceback (most recent call last):
Process Process-470:
Process Process-465:
Process Process-466:
Process Process-453:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-456:
Process Process-475:
Process Process-469:
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
Process Process-474:
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
Process Process-459:
Process Process-473:
Process Process-463:
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-471:
Traceback (most recent call last):
    self.run()
Process 

  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
  File "/home/user/software/miniconda2/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 36, in _worker_loop
  File "/home/user/software/miniconda2/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 36, in _worker_loop
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
    r = index_queue.get()
    self._target(*self._args, **self._kwargs)
  File "/home/user/software/miniconda2/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 36, in _worker_loop
  File "/home/user/software/miniconda2/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 36, in _worker_loop
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
    r = index_queue.get()
  File

KeyboardInterrupt: 

Process Process-467:
Process Process-462:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 267, in _bootstrap
    self.run()
    self.run()
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "/home/user/software/miniconda2/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 36, in _worker_loop
    self._target(*self._args, **self._kwargs)
  File "/home/user/software/miniconda2/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 36, in _worker_loop
    r = index_queue.get()
  File "/home/user/software/miniconda2/lib/python2.7/multiprocessing/queues.py",

In [18]:
# 3rd round

args_learning_rate = 0.5e-6
args_max_epoch = 1

optimizer = optim.Adam(model.parameters(), lr=args_learning_rate)
# optimizer = optim.SGD(model.parameters(), lr=args_learning_rate, momentum=0.9)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20.0, eta_min=1e-9, last_epoch=-1)
scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=3, min_lr=1e-9, epsilon=1e-5, verbose=1, mode='min')

criterion = nn.CrossEntropyLoss()

print('3rd round training...')

best_val_loss = 0.0822663608986

train_and_validate(
    train_loader,
    valid_loader,
    model,
    optimizer,
    scheduler,
    criterion,
    args_max_epoch,
    1,
    best_val_loss,
    experiment_name
)

2nd round training...
Epochs: 1
Epoch: 1
Step: 0, train_loss: 0.0293534472585
Step: 50, train_loss: 0.0550505582721
Step: 100, train_loss: 0.037839101305
Step: 150, train_loss: 0.0481259056204
Step: 200, train_loss: 0.0269528799743
Step: 250, train_loss: 0.0450508926611
Step: 300, train_loss: 0.037365114541
Step: 350, train_loss: 0.0442239881237
train_loss: 0.0457191242454 | finished in 5m 6s
valid_loss: 0.173170685933, valid_acc: 0.958695652174


DataParallel(
  (module): DenseNet201(
    (features): Sequential(
      (0): Sequential(
        (conv0): Conv2d (3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (relu0): ReLU(inplace)
        (pool0): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
        (denseblock1): _DenseBlock(
          (denselayer1): _DenseLayer(
            (norm.1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
            (relu.1): ReLU(inplace)
            (conv.1): Conv2d (64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (norm.2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
            (relu.2): ReLU(inplace)
            (conv.2): Conv2d (128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          )
          (denselayer2): _DenseLayer(
            (norm.1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True)
     

### 4th

In [9]:
# 4th round

args_learning_rate = 1e-5
args_max_epoch = 1

optimizer = optim.Adam(model.parameters(), lr=args_learning_rate)
# optimizer = optim.SGD(model.parameters(), lr=args_learning_rate, momentum=0.9)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20.0, eta_min=1e-9, last_epoch=-1)
scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=2, min_lr=1e-9, epsilon=1e-5, verbose=1, mode='min')

criterion = nn.CrossEntropyLoss()

print('4th round training...')

best_val_loss = 0.0822663608986

train_and_validate(
    train_loader,
    valid_loader,
    model,
    optimizer,
    scheduler,
    criterion,
    args_max_epoch,
    1,
    best_val_loss,
    experiment_name
)

args_learning_rate = 1e-6
args_max_epoch = 10

optimizer = optim.Adam(model.parameters(), lr=args_learning_rate)
# optimizer = optim.SGD(model.parameters(), lr=args_learning_rate, momentum=0.9)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20.0, eta_min=1e-9, last_epoch=-1)
scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=2, min_lr=1e-9, epsilon=1e-5, verbose=1, mode='min')

criterion = nn.CrossEntropyLoss()

print('4th round training...')

best_val_loss = 0.0822663608986

train_and_validate(
    train_loader,
    valid_loader,
    model,
    optimizer,
    scheduler,
    criterion,
    args_max_epoch,
    1,
    best_val_loss,
    experiment_name
)

4th round training...
Epochs: 1
Epoch: 1
Step: 0, train_loss: 0.0176224708557
Step: 50, train_loss: 0.0706716242398
Step: 100, train_loss: 0.0319110168878
Step: 150, train_loss: 0.0423186913435
Step: 200, train_loss: 0.0547658321442
Step: 250, train_loss: 0.0672576120088
Step: 300, train_loss: 0.0305543840464
Step: 350, train_loss: 0.0357684952818
Step: 400, train_loss: 0.0455144800071
train_loss: 0.0472711453088 | finished in 5m 24s
valid_loss: 0.112763713071, valid_acc: 0.973648648649
4th round training...
Epochs: 10
Epoch: 1
Step: 0, train_loss: 0.0278077926487
Step: 50, train_loss: 0.0452075613843
Step: 100, train_loss: 0.054338614943
Step: 150, train_loss: 0.0365166312354
Step: 200, train_loss: 0.0391302232537
Step: 250, train_loss: 0.0408454790257
Step: 300, train_loss: 0.0442106221399
Step: 350, train_loss: 0.0383682003489
Step: 400, train_loss: 0.0518743347187
train_loss: 0.0437715489904 | finished in 5m 20s
valid_loss: 0.0820695397796, valid_acc: 0.977970627503
Epoch: 2
Step: 

DataParallel(
  (module): DenseNet201(
    (features): Sequential(
      (0): Sequential(
        (conv0): Conv2d (3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (relu0): ReLU(inplace)
        (pool0): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
        (denseblock1): _DenseBlock(
          (denselayer1): _DenseLayer(
            (norm.1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
            (relu.1): ReLU(inplace)
            (conv.1): Conv2d (64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (norm.2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
            (relu.2): ReLU(inplace)
            (conv.2): Conv2d (128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          )
          (denselayer2): _DenseLayer(
            (norm.1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True)
     

### 5th

In [7]:
# 5th round -- 0.07

args_learning_rate = 1e-7
args_max_epoch = 1

optimizer = optim.Adam(model.parameters(), lr=args_learning_rate)
# optimizer = optim.SGD(model.parameters(), lr=args_learning_rate, momentum=0.9)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20.0, eta_min=1e-9, last_epoch=-1)
scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=2, min_lr=1e-9, epsilon=1e-5, verbose=1, mode='min')

criterion = nn.CrossEntropyLoss()

print('5th round training - LR 0.7...')

best_val_loss = 0.0707190001471

train_and_validate(
    train_loader,
    valid_loader,
    model,
    optimizer,
    scheduler,
    criterion,
    args_max_epoch,
    1,
    best_val_loss,
    experiment_name
)

args_learning_rate = 1e-8
args_max_epoch = 1

optimizer = optim.Adam(model.parameters(), lr=args_learning_rate)
# optimizer = optim.SGD(model.parameters(), lr=args_learning_rate, momentum=0.9)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20.0, eta_min=1e-9, last_epoch=-1)
scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=2, min_lr=1e-9, epsilon=1e-5, verbose=1, mode='min')

criterion = nn.CrossEntropyLoss()

print('4th round training - LR 0.8...')

best_val_loss = 0.0707190001471

train_and_validate(
    train_loader,
    valid_loader,
    model,
    optimizer,
    scheduler,
    criterion,
    args_max_epoch,
    1,
    best_val_loss,
    experiment_name
)

args_learning_rate = 1e-9
args_max_epoch = 2

optimizer = optim.Adam(model.parameters(), lr=args_learning_rate)
# optimizer = optim.SGD(model.parameters(), lr=args_learning_rate, momentum=0.9)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20.0, eta_min=1e-9, last_epoch=-1)
scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=2, min_lr=1e-9, epsilon=1e-5, verbose=1, mode='min')

criterion = nn.CrossEntropyLoss()

print('4th round training - LR 0.9...')

best_val_loss = 0.0707190001471

train_and_validate(
    train_loader,
    valid_loader,
    model,
    optimizer,
    scheduler,
    criterion,
    args_max_epoch,
    1,
    best_val_loss,
    experiment_name
)

5th round training - LR 0.7...
Epochs: 1
Epoch: 1
Step: 0, train_loss: 0.0133131872863
Step: 50, train_loss: 0.0418347371643
Step: 100, train_loss: 0.0292643586011
Step: 150, train_loss: 0.0389960063202
Step: 200, train_loss: 0.0434930672392
Step: 250, train_loss: 0.0508610804216
Step: 300, train_loss: 0.0394811853638
Step: 350, train_loss: 0.042606440353
Step: 400, train_loss: 0.0403372801305
train_loss: 0.0407905759775 | finished in 5m 24s
valid_loss: 0.151987171543, valid_acc: 0.962466487936
4th round training - LR 0.8...
Epochs: 1
Epoch: 1
Step: 0, train_loss: 0.0811273530126
Step: 50, train_loss: 0.0429838733189
Step: 100, train_loss: 0.0440733458183
Step: 150, train_loss: 0.0501740705021
Step: 200, train_loss: 0.0453472169291
Step: 250, train_loss: 0.0329068872303
Step: 300, train_loss: 0.0421424988611
Step: 350, train_loss: 0.0420074904105
Step: 400, train_loss: 0.0478239693493
train_loss: 0.0435264213815 | finished in 5m 19s
valid_loss: 0.215178594165, valid_acc: 0.97654155496


DataParallel(
  (module): DenseNet201(
    (features): Sequential(
      (0): Sequential(
        (conv0): Conv2d (3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (relu0): ReLU(inplace)
        (pool0): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
        (denseblock1): _DenseBlock(
          (denselayer1): _DenseLayer(
            (norm.1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
            (relu.1): ReLU(inplace)
            (conv.1): Conv2d (64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (norm.2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
            (relu.2): ReLU(inplace)
            (conv.2): Conv2d (128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          )
          (denselayer2): _DenseLayer(
            (norm.1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True)
     

In [None]:
# for i_batch, sample in enumerate(train_loader):
#     print(i_batch, sample['image'].size())
#     print(i_batch, sample['image'].mean())
# #     for i in xrange(sample['image'].size()[0]):
# #         img = sample['image'][i,0,:,:].numpy()
# #         plt.figure()
# #         plt.imshow(img, cmap=plt.cm.gray)
#     if i_batch == 100:
#         break

# i = 0
# for ib, b in enumerate(train_loader):
# #     print(i)
#     print(i*16)
#     print(len(b))
#     i+=1
#     if i>150:
#         break

### testing

In [10]:
args_test = True
# args_model='resnet50_first_14_0.45544702479.pth'
# args_model='densenet201-1_60_0.0822663608986.pth'
args_model='densenet201-2_9_0.0707190001471.pth'

epsilon = 1e-7

# model = ResNet(len(CLASSES), pretrained=args_use_imagenet_weights)
model = DenseNet201(len(CLASSES), freeze=args_use_imagenet_weights)
if cuda_is_available:
    print('GPU on')
    model = nn.DataParallel(model).cuda()
if args_model:
    print("Loading model " + args_model)
    state_dict = torch.load('models/'+args_model)['state_dict']
    model.load_state_dict(state_dict)

GPU on
Loading model densenet201-2_9_0.0707190001471.pth


In [11]:
# TEST
if args_test:
    ids = glob.glob(join(TEST_FOLDER, '*.tif'))
    print(len(ids))
elif args_test_train:
    ids = glob.glob(join(TRAIN_FOLDER, '*/*.jpg'))
else:
    assert False

ids.sort()

match = re.search(r'([^/]*)\.pth', args_model)
model_name = match.group(1) + ('_tta_' + args_ensembling if args_tta else '')
csv_name   = 'submission3_' + model_name + '.csv'

model.eval()
with conditional(args_test, open(csv_name, 'w')) as csvfile:

    if args_test:
        csv_writer = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(['fname', 'camera'])
        classes = []
    else:
        correct_predictions = 0

    for i, idx in enumerate(tqdm(ids)):

        img = np.array(Image.open(idx))

        if args_test_train:
            img = get_crop(img, 512*2, random_crop=False)

        original_img = img

        original_manipulated = np.float32([1. if idx.find('manip') != -1 else 0.])

        sx = img.shape[1] // CROP_SIZE
        sy = img.shape[0] // CROP_SIZE

        if args_test and args_tta:
            transforms = [[], ['orientation']]
        elif args_test_train:
            transforms = [[], ['orientation'], ['manipulation'], ['manipulation', 'orientation']]
        else:
            transforms = [[]]

        img_batch         = np.zeros((len(transforms)* sx * sy, CROP_SIZE, CROP_SIZE, 3), dtype=np.float32)
        manipulated_batch = np.zeros((len(transforms)* sx * sy, 1),  dtype=np.float32)

        i = 0
        for transform in transforms:
            img = np.copy(original_img)
            manipulated = np.copy(original_manipulated)

            if 'orientation' in transform:
                img = np.rot90(img, 1, (0,1))
                # aggressive
                # img = np.rot90(img, [1,2,3], (0,1))
            if 'manipulation' in transform and not original_manipulated:
                img = random_manipulation(img)
                manipulated = np.float32([1.])

            if args_test_train:
                img = get_crop(img, 512, random_crop=False)

            sx = img.shape[1] // CROP_SIZE
            sy = img.shape[0] // CROP_SIZE

            for x in range(sx):
                for y in range(sy):
                    _img = np.copy(img[y*CROP_SIZE:(y+1)*CROP_SIZE, x*CROP_SIZE:(x+1)*CROP_SIZE])
                    img_batch[i]         = preprocess_image(_img)
                    manipulated_batch[i] = manipulated
                    i += 1

        img_batch, manipulated_batch = variable(torch.from_numpy(img_batch)), variable(torch.from_numpy(manipulated_batch))
        prediction = model(img_batch, manipulated_batch).data.cpu().numpy()
        if prediction.shape[0] != 1: # TTA
            if args_ensembling == 'geometric':
                predictions = np.log(prediction + epsilon) # avoid numerical instability log(0)
                prediction = np.sum(prediction, axis=0, keepdims=True)
                prediction = np.exp(prediction) - epsilon
            else:
                prediction = np.sum(prediction, axis=0, keepdims=True)

        prediction_class_idx = np.argmax(prediction)

        if args_test_train:
            class_idx = get_class(idx.split('/')[-2])
            if class_idx == prediction_class_idx:
                correct_predictions += 1

        if args_test:
            csv_writer.writerow([idx.split('/')[-1], CLASSES[prediction_class_idx]])
            classes.append(prediction_class_idx)

    if args_test_train:
        print("Accuracy: " + str(correct_predictions / (len(transforms) * i)))

    if args_test:
        print("Test set predictions distribution:")
        print_distribution(None, classes=classes)
        print("Now you are ready to:")
        print("kg submit {}".format(csv_name))

  0%|          | 0/2640 [00:00<?, ?it/s]

2640


100%|██████████| 2640/2640 [11:19<00:00,  3.88it/s]

Test set predictions distribution:
              HTC-1-M7:   274 (10.4%)
              iPhone-6:   267 (10.1%)
   Motorola-Droid-Maxx:   266 (10.1%)
            Motorola-X:   258 (09.8%)
     Samsung-Galaxy-S4:   271 (10.3%)
             iPhone-4s:   267 (10.1%)
           LG-Nexus-5x:   204 (07.7%)
      Motorola-Nexus-6:   275 (10.4%)
  Samsung-Galaxy-Note3:   294 (11.1%)
            Sony-NEX-7:   264 (10.0%)
Now you are ready to:
kg submit submission3_densenet201-2_9_0.0707190001471_tta_arithmetic.csv



