In [1]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("dark")
plt.rcParams['figure.figsize'] = 16, 12
import pandas as pd
from tqdm import tqdm_notebook
import io
from PIL import Image
from glob import glob
from collections import defaultdict
import os
import pickle
from io import BytesIO
import random

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.datasets import ImageFolder

from kaggle_camera_model_id_lib.utils import PechkaBot, ImageList, NpzFolder, NCrops, TifFolderExFiles
from kaggle_camera_model_id_lib.models import VggHead, StyleVggHead, IEEEfcn, ResNetFC, FatNet1,InceptionResNetV2
from kaggle_camera_model_id_lib.utils import jpg_compress, equalize_v_hist, hsv_convert
from kaggle_camera_model_id_lib.utils import scale_crop_pad, gamma_correction
from kaggle_camera_model_id_lib.utils import patch_quality_dich, n_random_crops, n_pseudorandom_crops
from kaggle_camera_model_id_lib.models import ResNetDense, ResNetDenseFC

import imgaug as ia
from imgaug import augmenters as iaa

In [2]:
val_path = '/home/mephistopheies/storage2/data/camera-model-id/val/'
test_path = '/home/mephistopheies/storage2/data/camera-model-id/raw/test/'
model_path = '/home/mephistopheies/storage2/data/camera-model-id/models/ResNetDenseFC34/256_random_aug_kaggle_10_pretrained_zfc_flickr_noval_nocenter/pseudo_phase_1/checkpoint.tar'
out_dir = '/home/mephistopheies/storage2/data/camera-model-id/submit/'
model_type = 'ResNetDenseFC34_pretrained_zfc'
n_classes = 10
crop_size = 256
step = 128
num_workers = 1

do_random_aug_kaggle = True
p_random_aug_kaggle = 0.5
do_hard_aug = False
p_hard_aug = 0.5

to_tensor = transforms.ToTensor()
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)

scale_05 = lambda img: scale_crop_pad(img, 0.5)
scale_08 = lambda img: scale_crop_pad(img, 0.8)
scale_15 = lambda img: scale_crop_pad(img, 1.5)
scale_20 = lambda img: scale_crop_pad(img, 2.0)
gamma_08 = lambda img: gamma_correction(img, 0.8)
gamma_12 = lambda img: gamma_correction(img, 1.2)
jpg_70 = lambda img: jpg_compress(img, (70, 71))
jpg_90 = lambda img: jpg_compress(img, (90, 91))
augs = [scale_05, scale_08, scale_15, scale_20, gamma_08, gamma_12, jpg_70, jpg_90]

blur = iaa.GaussianBlur(sigma=(0, 2))
sharpen = iaa.Sharpen(alpha=(0, 1), lightness=(0.5, 2))
emboss = iaa.Emboss(alpha=(0, 1), strength=(0, 2))
contrast_normalization = iaa.ContrastNormalization(alpha=(0.7, 1.3))
hard_aug = iaa.OneOf([blur, sharpen, emboss, contrast_normalization])
sometimes = iaa.Sometimes(p_hard_aug, hard_aug)


def random_aug_kaggle(img, p=0.5):
    if np.random.rand() < p:
        return random.choice(augs)(img)
    return img

def aug_train(img):
    if min(img.size) > crop_center_size:
        return random_flip(random_crop(center_crop(img)))
    return random_flip(random_crop(img))

def aug_optional(img):
    if do_hard_aug:
        img = Image.fromarray(sometimes.augment_image(np.array(img)))

    if do_random_aug_kaggle:
        img = random_aug_kaggle(img, p_random_aug_kaggle)
    return img

In [3]:
model_factory = {
    'Vgg19Head_E_2b_bn': lambda n_classes: VggHead(num_classes=n_classes, vgg_key='E_2b', load_vgg_bn=True, batch_norm=True),
    'Vgg19Head_E_3b_bn': lambda n_classes: VggHead(num_classes=n_classes, vgg_key='E_3b', load_vgg_bn=True, batch_norm=True),
    'Vgg19Head_E_bn': lambda n_classes: VggHead(num_classes=n_classes, load_vgg_bn=True, vgg_key='E', batch_norm=True),
    'Vgg11Head_A_bn': lambda n_classes: VggHead(num_classes=n_classes, load_vgg_bn=True, vgg_key='A', batch_norm=True),
    'Vgg11Head_A': lambda n_classes: VggHead(num_classes=n_classes, load_vgg_bn=True, vgg_key='A', batch_norm=False),
    'StyleVggHead_bn': lambda n_classes: StyleVggHead(num_classes=n_classes, load_vgg_bn=True),
    'IEEEfcn': lambda n_classes: IEEEfcn(n_classes),
    'resnet18fc_pretrained': lambda n_classes: ResNetFC(
        models.resnet.BasicBlock, [2, 2, 2, 2], num_classes=n_classes, load_resnet='resnet18'),
    'resnet18fc': lambda n_classes: ResNetFC(
        models.resnet.BasicBlock, [2, 2, 2, 2], num_classes=n_classes, load_resnet=None),
    'resnet18X_pretrained': lambda n_classes: ResNetX(
        models.resnet.BasicBlock, [2, 2, 2, 2], num_classes=n_classes, load_resnet='resnet18'),
    'InceptionResNetV2fc_5_10_4': lambda n_classes: InceptionResNetV2fc(
        num_classes=n_classes, nun_block35=5, num_block17=10, num_block8=4),
    'InceptionResNetV2fcSmall_5_10': lambda n_classes: InceptionResNetV2fcSmall(
        num_classes=n_classes, nun_block35=5, num_block17=10),
    'resnet34fc_pretrained': lambda n_classes: ResNetFC(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34'),
    'resnet34fc_pretrained_maxpool': lambda n_classes: ResNetFC(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34', pool_type='max'),
    'resnet50fc_pretrained': lambda n_classes: ResNetFC(
        models.resnet.Bottleneck, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet50'),
    'FatNet1': lambda n_classes: FatNet1(n_classes),
    'resnet34X_pretrained_maxpool': lambda n_classes: ResNetX(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34', pool_type='max'),
    'resnet50X_pretrained_maxpool': lambda n_classes: ResNetX(
        models.resnet.Bottleneck, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet50', pool_type='max'),
    'InceptionResNetV2': lambda n_classes: InceptionResNetV2(num_classes=n_classes),
    'ResNetDense34_pretrained': lambda n_classes: ResNetDense(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34'),
    'ResNetDenseFC34_pretrained': lambda n_classes: ResNetDenseFC(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34', 
        zero_first_center=False),
    'ResNetDenseFC34_pretrained_zfc': lambda n_classes: ResNetDenseFC(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34', 
        zero_first_center=True)
}

model = model_factory[model_type](n_classes)
checkpoint = torch.load(model_path)
model.load_state_dict(checkpoint['model'])
loss_train = checkpoint['loss_train']
acc_train = checkpoint['acc_train']
loss_val = checkpoint['loss_val']
acc_val = checkpoint['acc_val']
class_to_idx = checkpoint['class_to_idx']
idx2class = dict([(v, k) for (k, v) in class_to_idx.items()])
print('Last state:\n  TLoss: %0.6f\n  TAcc:  %0.4f\n  VLoss: %0.6f\n  VAcc:  %0.4f' % 
    (loss_train[-1], acc_train[-1], loss_val[-1], acc_val[-1]))
del(checkpoint)
model = model.cuda()
model = model.eval()

Last state:
  TLoss: 0.031235
  TAcc:  0.9896
  VLoss: 0.000000
  VAcc:  0.0000


In [37]:
holdout_path = '/home/mephistopheies/projects/kaggle/camera-model-identification/validation/'

y_map = pd.read_csv(os.path.join(holdout_path, 'external_validation.csv')).set_index('fname')['camera'].to_dict()
g_map = pd.read_csv(os.path.join(holdout_path, 'external_validation.csv')).set_index('fname')['is_altered'].to_dict()
holdout_files = glob(os.path.join(holdout_path, '*.jpg'))

def loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


acc = 0
acc_manip = 0
acc_unalt = 0

def predict_image(img):
    crops = NCrops(img, crop_size=crop_size, step=step)
    X = torch.stack([normalize(to_tensor(aug_optional(Image.fromarray(crop)))) for crop in crops])
    X = X.view(1, 9, 3, 256, 256)
    bs, ncrops, c, h, w = X.shape
    X = X.view(-1, c, h, w)
    X_var = Variable(X.cuda(), volatile=True)
    log_p = model(X_var)
    log_p = log_p.view(bs, ncrops, -1)
    p = F.softmax(log_p, dim=2)
    p = p.prod(dim=1).pow(1/p.shape[1])
    
    return p

for fname in tqdm_notebook(holdout_files):
    img = loader(fname)
    
    p0 = predict_image(np.array(img))
    p90 = predict_image(np.array(img.rotate(90)))
    p180 = predict_image(np.array(img.rotate(180)))
    p270 = predict_image(np.array(img.rotate(270)))
    
    p = torch.stack([p0.squeeze(), p90.squeeze(), p180.squeeze(), p270.squeeze()]).max(dim=0)[0]
    
    
    y_pred = idx2class[p.max(0)[1].data[0]]
    y_true = y_map[os.path.basename(fname)]
    
    acc += float(y_pred == y_true)
    if g_map[os.path.basename(fname)] == 0:
        acc_unalt += float(y_pred == y_true)
    else:
        acc_manip += float(y_pred == y_true)
        
acc /= len(holdout_files)
acc_unalt /= (np.array(list(g_map.values())) == 0).sum()
acc_manip /= (np.array(list(g_map.values())) == 1).sum()

print(acc, acc_unalt, acc_manip)


0.8707692307692307 0.890243902439 0.850931677019


In [5]:
batch_size = 15

ds_val = NpzFolder(
    val_path,
    transform=transforms.Compose([
        transforms.Lambda(lambda img: NCrops(img, crop_size=crop_size, step=step)),
        transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(aug_optional(Image.fromarray(crop)))) 
                                                     for crop in crops]))
    ]))
val_loader = torch.utils.data.DataLoader(    
    ds_val,
    batch_size=batch_size, 
    shuffle=False,
    num_workers=1, 
    pin_memory=True)


p_val = []
acc_val_batch = 0
for ix_batch, (X, Y) in tqdm_notebook(enumerate(val_loader), total=int(len(ds_val.imgs)/batch_size)):
    bs, ncrops, c, h, w = X.shape
    X = X.view(-1, c, h, w)
    X_var = Variable(X.cuda(), volatile=True)
    Y_var = Variable(Y.cuda(), volatile=True)
    log_p = model(X_var)
    log_p = log_p.view(bs, ncrops, -1)
    p = F.softmax(log_p, dim=2)
    p = p.prod(dim=1).pow(1/p.shape[1])
    acc_val_batch += ((p.max(1)[1] == Y_var).float().sum()/Y_var.shape[0]).data[0]
    p_val.append(p.cpu().data.numpy())

p_val = np.vstack(p_val)
acc_val_batch /= ix_batch + 1
print(acc_val_batch)


0.9853333363930384


In [6]:
batch_size = 1

ds_test = TifFolderExFiles(
    test_path,
    transform=transforms.Compose([
        transforms.Lambda(lambda img: NCrops(np.array(img), crop_size=crop_size, step=step)),
        transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
    ]))

test_loader = torch.utils.data.DataLoader(    
    ds_test,
    batch_size=batch_size, 
    shuffle=False,
    num_workers=num_workers, 
    pin_memory=True)

res = []
p_test = {}
for X, Y, files in tqdm_notebook(test_loader, total=int(len(ds_test.imgs)/batch_size)):
    files = list(map(lambda s: os.path.basename(s), files))
    bs, ncrops, c, h, w = X.shape
    X = X.view(-1, c, h, w)
    X_var = Variable(X.cuda(), volatile=True)
    log_p = model(X_var)
    log_p = log_p.view(bs, ncrops, -1)
    p = F.softmax(log_p, dim=2) #.mean(dim=1)
    p = p.prod(dim=1).pow(1/p.shape[1])
    ix_argmax = p.max(1)[1].cpu().data.numpy()
    res.extend(list(zip(files, [idx2class[ix] for ix in ix_argmax])))

    for ix in range(len(files)):
        p_test[files[ix]] = [(idx2class[i], x) for (i, x) in enumerate(p[ix, :].cpu().data.numpy())]




In [1]:
# # scales = [(512 - 32*i, 64, int((512/(12 - i))/16) - 1) for i in range(5, 12)]
# scales = [(512 - 32*i, 64, 5) for i in range(5, 12)]
# for t in scales:
#     print(t)

# res_scales = defaultdict(list)
# for crop_size, step, batch_size in scales:
#     ds_test = TifFolderExFiles(
#         test_path,
#         transform=transforms.Compose([
#             transforms.Lambda(lambda img: NCrops(np.array(img), crop_size=crop_size, step=step)),
#             transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
#         ]))

#     test_loader = torch.utils.data.DataLoader(    
#         ds_test,
#         batch_size=batch_size, 
#         shuffle=False,
#         num_workers=num_workers, 
#         pin_memory=True)
    
#     for X, Y, files in tqdm_notebook(test_loader, total=int(len(ds_test.imgs)/batch_size), desc=str(crop_size)):
#         files = list(map(lambda s: os.path.basename(s), files))
#         bs, ncrops, c, h, w = X.shape
#         X = X.view(-1, c, h, w)
#         X_var = Variable(X.cuda(), volatile=True)
#         log_p = model(X_var)
#         log_p = log_p.view(bs, ncrops, -1)
#         p = F.softmax(log_p, dim=2)

#         for ix, fname in enumerate(files):
#             res_scales[fname].append(p[ix, :].cpu().data.numpy())
            
            
# res = []
# for fname, p in res_scales.items():
#     p = np.vstack(p)
#     if (p == 0).sum() > 0:
#         p[p == 0] += 1e-16
#     c_id = np.exp(np.log(p).mean(axis=0)).argmax()
#     res.append((fname, idx2class[c_id]))

In [7]:
# crop_size = 256
# step = 64
# batch_size = 5

# ds_test = TifFolderExFiles(
#     test_path,
#     transform=transforms.Compose([
#         transforms.Lambda(lambda img: NCrops(np.array(img), crop_size=crop_size, step=step)),
#         transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
#     ]))

# test_loader = torch.utils.data.DataLoader(    
#     ds_test,
#     batch_size=batch_size, 
#     shuffle=False,
#     num_workers=num_workers, 
#     pin_memory=True)

# res = []
# p_test = {}
# for X, Y, files in tqdm_notebook(test_loader, total=int(len(ds_test.imgs)/batch_size)):
#     files = list(map(lambda s: os.path.basename(s), files))
#     bs, ncrops, c, h, w = X.shape
#     X = X.view(-1, c, h, w)
#     X_var = Variable(X.cuda(), volatile=True)
#     log_p = model(X_var)
#     log_p = log_p.view(bs, ncrops, -1)
#     p = F.softmax(log_p, dim=2) #.mean(dim=1)
#     p = p.prod(dim=1).pow(1/p.shape[1])
#     ix_argmax = p.max(1)[1].cpu().data.numpy()
#     res.extend(list(zip(files, [idx2class[ix] for ix in ix_argmax])))

#     for ix in range(len(files)):
#         p_test[files[ix]] = [(idx2class[i], x) for (i, x) in enumerate(p[ix, :].cpu().data.numpy())]

In [9]:
# test_files = glob(os.path.join(test_path, '*/*.tif'))

# res = []
# for fname in tqdm_notebook(test_files):
#     crops = sorted([(c, patch_quality_dich(c/255.0)**2) for c in NCrops(np.array(Image.open(fname)), 256, 64)], 
#                    key=lambda t: t[1], reverse=True)
    
#     n = sum([v for k, v in crops])
#     crops = [(k, v/n) for k, v in crops]
#     batch = torch.stack([normalize(to_tensor(k)) for (k, v) in crops])
#     X_var = Variable(batch.cuda(), volatile=True)
#     log_p = model(X_var)
#     p = F.softmax(log_p, dim=1).cpu().data.numpy()
#     q = np.array([v for k, v in crops])
#     p = (p*q.reshape(-1, 1)).sum(axis=0)
#     ix = p.argmax()
#     res.append((os.path.basename(fname), idx2class[ix]))

In [8]:
with open(os.path.join(out_dir, 'submit__unalt.csv'.lower()), 'w') as f:
    f.write('fname,camera\n')
    for fname, c in res:
        if '_unalt' in fname:
            f.write('%s,%s\n' % (fname, c))
        else:
            f.write('%s,%s\n' % (fname, 'no_class'))
            
            
with open(os.path.join(out_dir, 'submit__manip.csv'.lower()), 'w') as f:
    f.write('fname,camera\n')
    for fname, c in res:
        if '_manip' in fname:
            f.write('%s,%s\n' % (fname, c))
        else:
            f.write('%s,%s\n' % (fname, 'no_class'))
            
            
with open(os.path.join(out_dir, 'submit.csv'.lower()), 'w') as f:
    f.write('fname,camera\n')
    for fname, c in res:
        f.write('%s,%s\n' % (fname, c))

In [9]:
pd.Series([v for (k, v) in res]).value_counts()

Samsung-Galaxy-Note3    336
Motorola-Nexus-6        285
iPhone-4s               278
iPhone-6                271
Samsung-Galaxy-S4       269
Sony-NEX-7              267
Motorola-Droid-Maxx     260
HTC-1-M7                259
Motorola-X              238
LG-Nexus-5x             177
dtype: int64

In [10]:
manip = [(f, c) for (f, c) in p_test.items() if '_manip' in f]
print(len(p_test), len(manip))

unalt = [(f, c) for (f, c) in p_test.items() if '_unalt' in f]
print(len(p_test), len(unalt))

unalt_tmp = unalt.copy()
manip_tmp = manip.copy()
print(len(unalt_tmp), len(manip_tmp))

t = int(0.1*1320)
print(t)

res = []

c_counts = defaultdict(int)
for _ in range(1320):
    skip_f = set([k for (k, _) in res])
    unalt_tmp = [(f, sorted(p, key=lambda t: t[-1], reverse=True)) for (f, p) in unalt_tmp if f not in skip_f]
    unalt_tmp = sorted(unalt_tmp, key=lambda t: t[-1][0], reverse=True)
    f, p = unalt_tmp[0]
    c, _ = p[0]
    res.append((f, c))
    c_counts[c] += 1
    
    if c_counts[c] == t:
        unalt_tmp = [(f, [(k, v) for (k, v) in p if k != c]) for (f, p) in unalt_tmp]
        
c_counts = defaultdict(int)
for _ in range(1320):
    skip_f = set([k for (k, _) in res])
    manip_tmp = [(f, sorted(p, key=lambda t: t[-1], reverse=True)) for (f, p) in manip_tmp if f not in skip_f]
    manip_tmp = sorted(manip_tmp, key=lambda t: t[-1][0], reverse=True)
    f, p = manip_tmp[0]
    c, _ = p[0]
    res.append((f, c))
    c_counts[c] += 1
    
    if c_counts[c] == t:
        manip_tmp = [(f, [(k, v) for (k, v) in p if k != c]) for (f, p) in manip_tmp]
        
print(len(res))

2640 1320
2640 1320
1320 1320
132
2640


In [11]:
pd.Series([v for (k, v) in res]).value_counts()

Sony-NEX-7              264
LG-Nexus-5x             264
Motorola-X              264
Samsung-Galaxy-Note3    264
Motorola-Nexus-6        264
iPhone-4s               264
HTC-1-M7                264
Motorola-Droid-Maxx     264
iPhone-6                264
Samsung-Galaxy-S4       264
dtype: int64

In [12]:
with open(os.path.join(out_dir, 'submit__unalt.csv'.lower()), 'w') as f:
    f.write('fname,camera\n')
    for fname, c in res:
        if '_unalt' in fname:
            f.write('%s,%s\n' % (fname, c))
        else:
            f.write('%s,%s\n' % (fname, 'no_class'))
            
            
with open(os.path.join(out_dir, 'submit__manip.csv'.lower()), 'w') as f:
    f.write('fname,camera\n')
    for fname, c in res:
        if '_manip' in fname:
            f.write('%s,%s\n' % (fname, c))
        else:
            f.write('%s,%s\n' % (fname, 'no_class'))
            
            
with open(os.path.join(out_dir, 'submit.csv'.lower()), 'w') as f:
    f.write('fname,camera\n')
    for fname, c in res:
        f.write('%s,%s\n' % (fname, c))