In [1]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("dark")
plt.rcParams['figure.figsize'] = 16, 12
import pandas as pd
from tqdm import tqdm_notebook
import io
from PIL import Image
from glob import glob
from collections import defaultdict
import os
import pickle
from io import BytesIO
import random

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.datasets import ImageFolder

from kaggle_camera_model_id_lib.utils import PechkaBot, ImageList, NpzFolder, NCrops, TifFolderExFiles
from kaggle_camera_model_id_lib.models import VggHead, StyleVggHead, IEEEfcn, ResNetFC, FatNet1,InceptionResNetV2
from kaggle_camera_model_id_lib.utils import jpg_compress, equalize_v_hist, hsv_convert
from kaggle_camera_model_id_lib.utils import scale_crop_pad, gamma_correction
from kaggle_camera_model_id_lib.utils import patch_quality_dich, n_random_crops, n_pseudorandom_crops
from kaggle_camera_model_id_lib.models import ResNetDense, ResNetDenseFC

import imgaug as ia
from imgaug import augmenters as iaa

In [2]:
val_path = '/home/mephistopheies/storage2/data/camera-model-id/val/'
test_path = '/home/mephistopheies/storage2/data/camera-model-id/raw/test/'
model_path = '/home/mephistopheies/storage2/data/camera-model-id/models/ResNetDenseFC34/256_random_aug_kaggle_10_pretrained_zfc_flickr_noval_nocenter/pseudo_phase_3/checkpoint.tar'
out_dir = '/home/mephistopheies/storage2/data/camera-model-id/submit/'
model_type = 'ResNetDenseFC34_pretrained_zfc'
n_classes = 10
crop_size = 256
step = 128
num_workers = 1

do_random_aug_kaggle = True
p_random_aug_kaggle = 0.5
do_hard_aug = False
p_hard_aug = 0.5

to_tensor = transforms.ToTensor()
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)

scale_05 = lambda img: scale_crop_pad(img, 0.5)
scale_08 = lambda img: scale_crop_pad(img, 0.8)
scale_15 = lambda img: scale_crop_pad(img, 1.5)
scale_20 = lambda img: scale_crop_pad(img, 2.0)
gamma_08 = lambda img: gamma_correction(img, 0.8)
gamma_12 = lambda img: gamma_correction(img, 1.2)
jpg_70 = lambda img: jpg_compress(img, (70, 71))
jpg_90 = lambda img: jpg_compress(img, (90, 91))
augs = [scale_05, scale_08, scale_15, scale_20, gamma_08, gamma_12, jpg_70, jpg_90]

blur = iaa.GaussianBlur(sigma=(0, 2))
sharpen = iaa.Sharpen(alpha=(0, 1), lightness=(0.5, 2))
emboss = iaa.Emboss(alpha=(0, 1), strength=(0, 2))
contrast_normalization = iaa.ContrastNormalization(alpha=(0.7, 1.3))
hard_aug = iaa.OneOf([blur, sharpen, emboss, contrast_normalization])
sometimes = iaa.Sometimes(p_hard_aug, hard_aug)


def random_aug_kaggle(img, p=0.5):
    if np.random.rand() < p:
        return random.choice(augs)(img)
    return img

def aug_train(img):
    if min(img.size) > crop_center_size:
        return random_flip(random_crop(center_crop(img)))
    return random_flip(random_crop(img))

def aug_optional(img):
    if do_hard_aug:
        img = Image.fromarray(sometimes.augment_image(np.array(img)))

    if do_random_aug_kaggle:
        img = random_aug_kaggle(img, p_random_aug_kaggle)
    return img

In [3]:
model_factory = {
    'Vgg19Head_E_2b_bn': lambda n_classes: VggHead(num_classes=n_classes, vgg_key='E_2b', load_vgg_bn=True, batch_norm=True),
    'Vgg19Head_E_3b_bn': lambda n_classes: VggHead(num_classes=n_classes, vgg_key='E_3b', load_vgg_bn=True, batch_norm=True),
    'Vgg19Head_E_bn': lambda n_classes: VggHead(num_classes=n_classes, load_vgg_bn=True, vgg_key='E', batch_norm=True),
    'Vgg11Head_A_bn': lambda n_classes: VggHead(num_classes=n_classes, load_vgg_bn=True, vgg_key='A', batch_norm=True),
    'Vgg11Head_A': lambda n_classes: VggHead(num_classes=n_classes, load_vgg_bn=True, vgg_key='A', batch_norm=False),
    'StyleVggHead_bn': lambda n_classes: StyleVggHead(num_classes=n_classes, load_vgg_bn=True),
    'IEEEfcn': lambda n_classes: IEEEfcn(n_classes),
    'resnet18fc_pretrained': lambda n_classes: ResNetFC(
        models.resnet.BasicBlock, [2, 2, 2, 2], num_classes=n_classes, load_resnet='resnet18'),
    'resnet18fc': lambda n_classes: ResNetFC(
        models.resnet.BasicBlock, [2, 2, 2, 2], num_classes=n_classes, load_resnet=None),
    'resnet18X_pretrained': lambda n_classes: ResNetX(
        models.resnet.BasicBlock, [2, 2, 2, 2], num_classes=n_classes, load_resnet='resnet18'),
    'InceptionResNetV2fc_5_10_4': lambda n_classes: InceptionResNetV2fc(
        num_classes=n_classes, nun_block35=5, num_block17=10, num_block8=4),
    'InceptionResNetV2fcSmall_5_10': lambda n_classes: InceptionResNetV2fcSmall(
        num_classes=n_classes, nun_block35=5, num_block17=10),
    'resnet34fc_pretrained': lambda n_classes: ResNetFC(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34'),
    'resnet34fc_pretrained_maxpool': lambda n_classes: ResNetFC(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34', pool_type='max'),
    'resnet50fc_pretrained': lambda n_classes: ResNetFC(
        models.resnet.Bottleneck, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet50'),
    'FatNet1': lambda n_classes: FatNet1(n_classes),
    'resnet34X_pretrained_maxpool': lambda n_classes: ResNetX(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34', pool_type='max'),
    'resnet50X_pretrained_maxpool': lambda n_classes: ResNetX(
        models.resnet.Bottleneck, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet50', pool_type='max'),
    'InceptionResNetV2': lambda n_classes: InceptionResNetV2(num_classes=n_classes),
    'ResNetDense34_pretrained': lambda n_classes: ResNetDense(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34'),
    'ResNetDenseFC34_pretrained': lambda n_classes: ResNetDenseFC(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34', 
        zero_first_center=False),
    'ResNetDenseFC34_pretrained_zfc': lambda n_classes: ResNetDenseFC(
        models.resnet.BasicBlock, [3, 4, 6, 3], num_classes=n_classes, load_resnet='resnet34', 
        zero_first_center=True)
}

model = model_factory[model_type](n_classes)
checkpoint = torch.load(model_path)
model.load_state_dict(checkpoint['model'])
loss_train = checkpoint['loss_train']
acc_train = checkpoint['acc_train']
loss_val = checkpoint['loss_val']
acc_val = checkpoint['acc_val']
class_to_idx = checkpoint['class_to_idx']
idx2class = dict([(v, k) for (k, v) in class_to_idx.items()])
print('Last state:\n  TLoss: %0.6f\n  TAcc:  %0.4f\n  VLoss: %0.6f\n  VAcc:  %0.4f' % 
    (loss_train[-1], acc_train[-1], loss_val[-1], acc_val[-1]))
del(checkpoint)
model = model.cuda()
model = model.eval()

Last state:
  TLoss: 0.023318
  TAcc:  0.9920
  VLoss: 0.000000
  VAcc:  0.0000


In [4]:
batch_size = 1

ds_test_0 = TifFolderExFiles(
    test_path,
    transform=transforms.Compose([
        transforms.Lambda(lambda img: NCrops(np.array(img), crop_size=crop_size, step=step)),
        transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
    ]))

ds_test_90 = TifFolderExFiles(
    test_path,
    transform=transforms.Compose([
        transforms.Lambda(lambda img: NCrops(np.array(img.rotate(90)), crop_size=crop_size, step=step)),
        transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
    ]))

ds_test_180 = TifFolderExFiles(
    test_path,
    transform=transforms.Compose([
        transforms.Lambda(lambda img: NCrops(np.array(img.rotate(180)), crop_size=crop_size, step=step)),
        transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
    ]))

ds_test_270 = TifFolderExFiles(
    test_path,
    transform=transforms.Compose([
        transforms.Lambda(lambda img: NCrops(np.array(img.rotate(270)), crop_size=crop_size, step=step)),
        transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
    ]))

ds_test_list = [ds_test_0, ds_test_90, ds_test_180, ds_test_270]


p_test_list = []
for ds_test in ds_test_list:

    test_loader = torch.utils.data.DataLoader(    
        ds_test,
        batch_size=batch_size, 
        shuffle=False,
        num_workers=num_workers, 
        pin_memory=True)

    res = []
    p_test = {}
    for X, Y, files in tqdm_notebook(test_loader, total=int(len(ds_test.imgs)/batch_size)):
        files = list(map(lambda s: os.path.basename(s), files))
        bs, ncrops, c, h, w = X.shape
        X = X.view(-1, c, h, w)
        X_var = Variable(X.cuda(), volatile=True)
        log_p = model(X_var)
        log_p = log_p.view(bs, ncrops, -1)
        p = F.softmax(log_p, dim=2) #.mean(dim=1)
        p = p.prod(dim=1).pow(1/p.shape[1])
        ix_argmax = p.max(1)[1].cpu().data.numpy()
        res.extend(list(zip(files, [idx2class[ix] for ix in ix_argmax])))

        for ix in range(len(files)):
            p_test[files[ix]] = [(idx2class[i], x) for (i, x) in enumerate(p[ix, :].cpu().data.numpy())]

    p_test_list.append(p_test)
    
    
p_test_manip = {}

for p_test_tmp in p_test_list:
    for fname, plist in p_test_tmp.items():
        if fname not in p_test_manip:
            p_test_manip[fname] = {}
            
        for c, p in plist:
            if c not in p_test_manip[fname]:
                p_test_manip[fname][c] = p
            else:
                p_test_manip[fname][c] = max(p_test_manip[fname][c], p)
                
for fname, plist in p_test.items():
    n = sum(p_test_manip[fname].values())
    p_test_manip[fname] = [(k, v/n) for (k, v) in p_test_manip[fname].items()]













In [5]:
del(model)

In [6]:
model_path = '/home/mephistopheies/storage2/data/camera-model-id/models/ResNetDenseFC34/256_pretrained_zfc_flickr_noval_nocenter/pseudo_phase_1/checkpoint.tar'
model_type = 'ResNetDenseFC34_pretrained_zfc'

model = model_factory[model_type](n_classes)
checkpoint = torch.load(model_path)
model.load_state_dict(checkpoint['model'])
loss_train = checkpoint['loss_train']
acc_train = checkpoint['acc_train']
loss_val = checkpoint['loss_val']
acc_val = checkpoint['acc_val']
class_to_idx = checkpoint['class_to_idx']
idx2class = dict([(v, k) for (k, v) in class_to_idx.items()])
print('Last state:\n  TLoss: %0.6f\n  TAcc:  %0.4f\n  VLoss: %0.6f\n  VAcc:  %0.4f' % 
    (loss_train[-1], acc_train[-1], loss_val[-1], acc_val[-1]))
del(checkpoint)
model = model.cuda()
model = model.eval()

Last state:
  TLoss: 0.010486
  TAcc:  0.9967
  VLoss: 0.000000
  VAcc:  0.0000


In [7]:
batch_size = 1

ds_test_0 = TifFolderExFiles(
    test_path,
    transform=transforms.Compose([
        transforms.Lambda(lambda img: NCrops(np.array(img), crop_size=crop_size, step=step)),
        transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
    ]))

ds_test_90 = TifFolderExFiles(
    test_path,
    transform=transforms.Compose([
        transforms.Lambda(lambda img: NCrops(np.array(img.rotate(90)), crop_size=crop_size, step=step)),
        transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
    ]))

ds_test_180 = TifFolderExFiles(
    test_path,
    transform=transforms.Compose([
        transforms.Lambda(lambda img: NCrops(np.array(img.rotate(180)), crop_size=crop_size, step=step)),
        transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
    ]))

ds_test_270 = TifFolderExFiles(
    test_path,
    transform=transforms.Compose([
        transforms.Lambda(lambda img: NCrops(np.array(img.rotate(270)), crop_size=crop_size, step=step)),
        transforms.Lambda(lambda crops: torch.stack([normalize(to_tensor(crop)) for crop in crops]))
    ]))

ds_test_list = [ds_test_0, ds_test_90, ds_test_180, ds_test_270]


p_test_list = []
for ds_test in ds_test_list:

    test_loader = torch.utils.data.DataLoader(    
        ds_test,
        batch_size=batch_size, 
        shuffle=False,
        num_workers=num_workers, 
        pin_memory=True)

    res = []
    p_test = {}
    for X, Y, files in tqdm_notebook(test_loader, total=int(len(ds_test.imgs)/batch_size)):
        files = list(map(lambda s: os.path.basename(s), files))
        bs, ncrops, c, h, w = X.shape
        X = X.view(-1, c, h, w)
        X_var = Variable(X.cuda(), volatile=True)
        log_p = model(X_var)
        log_p = log_p.view(bs, ncrops, -1)
        p = F.softmax(log_p, dim=2) #.mean(dim=1)
        p = p.prod(dim=1).pow(1/p.shape[1])
        ix_argmax = p.max(1)[1].cpu().data.numpy()
        res.extend(list(zip(files, [idx2class[ix] for ix in ix_argmax])))

        for ix in range(len(files)):
            p_test[files[ix]] = [(idx2class[i], x) for (i, x) in enumerate(p[ix, :].cpu().data.numpy())]

    p_test_list.append(p_test)
    
    
p_test_unalt = {}

for p_test_tmp in p_test_list:
    for fname, plist in p_test_tmp.items():
        if fname not in p_test_unalt:
            p_test_unalt[fname] = {}
            
        for c, p in plist:
            if c not in p_test_unalt[fname]:
                p_test_unalt[fname][c] = p
            else:
                p_test_unalt[fname][c] = max(p_test_unalt[fname][c], p)
                
for fname, plist in p_test.items():
    n = sum(p_test_unalt[fname].values())
    p_test_unalt[fname] = [(k, v/n) for (k, v) in p_test_unalt[fname].items()]













In [8]:
files = list(p_test_manip.keys())

In [9]:
res = {}
for fname in files:
    if '_unalt' not in fname:
        continue
    res[fname] = (
        sorted(p_test_manip[fname], key=lambda t: t[1], reverse=True)[0][0],
        sorted(p_test_unalt[fname], key=lambda t: t[1], reverse=True)[0][0]
    )

In [10]:
n = 0
for fname, (cu, cm) in res.items():
    if cu != cm:
        print(fname, cu, cm)
        n += 1

img_30d223c_unalt.tif Sony-NEX-7 Motorola-Nexus-6
img_35a8d3f_unalt.tif Motorola-Droid-Maxx Motorola-Nexus-6
img_c65cb5e_unalt.tif Samsung-Galaxy-S4 Motorola-Droid-Maxx
img_c2ba760_unalt.tif iPhone-4s Samsung-Galaxy-Note3
img_cfab198_unalt.tif Samsung-Galaxy-S4 Motorola-Droid-Maxx
img_38bad84_unalt.tif Samsung-Galaxy-Note3 Motorola-Nexus-6
img_d369cd1_unalt.tif Motorola-Droid-Maxx Motorola-Nexus-6
img_f29f2a4_unalt.tif Motorola-X Samsung-Galaxy-Note3
img_4554b75_unalt.tif Samsung-Galaxy-S4 Motorola-Droid-Maxx
img_3b45a7c_unalt.tif Samsung-Galaxy-S4 Motorola-Droid-Maxx
img_176f838_unalt.tif Samsung-Galaxy-Note3 Motorola-Nexus-6
img_4f6e72b_unalt.tif Samsung-Galaxy-S4 Motorola-Droid-Maxx
img_19e85e5_unalt.tif Samsung-Galaxy-S4 Motorola-Droid-Maxx
img_c3a5247_unalt.tif Samsung-Galaxy-S4 Motorola-Droid-Maxx
img_38aecf4_unalt.tif Samsung-Galaxy-S4 Motorola-Droid-Maxx
img_380d69f_unalt.tif Motorola-Droid-Maxx Motorola-Nexus-6
img_7a5e2ff_unalt.tif Samsung-Galaxy-Note3 Motorola-Droid-Maxx
img

In [11]:
n

36

In [12]:
p_test = {}
res = []
for fname in files:
    if '_unalt' in fname:
        p_test[fname] = p_test_unalt[fname]
    else:
        p_test[fname] = p_test_manip[fname]
    res.append((fname, sorted(p_test[fname], key=lambda t: t[1], reverse=True)[0][0]))

In [13]:
pd.Series([v for (k, v) in res]).value_counts()

Sony-NEX-7              279
Samsung-Galaxy-Note3    272
iPhone-4s               270
iPhone-6                269
Samsung-Galaxy-S4       267
HTC-1-M7                267
Motorola-Nexus-6        259
Motorola-Droid-Maxx     255
LG-Nexus-5x             254
Motorola-X              248
dtype: int64

In [14]:
pd.Series([np.max([p for (_, p) in d]) for (fname, d) in p_test.items()]).describe(percentiles=[.05, 0.1, 0.15, 0.2, 0.25])

count    2640.000000
mean        0.967518
std         0.106615
min         0.269772
5%          0.750968
10%         0.932526
15%         0.983017
20%         0.995278
25%         0.998285
50%         0.999978
max         1.000000
dtype: float64

In [15]:
tmp = [(fname, np.sum([p for (_, p) in d])) for (fname, d) in p_test.items()]

In [17]:
[(k, v) for (k, v) in tmp if v < 0.99]

[]

In [18]:
df = []
for fname, pred in p_test.items():
    df.append([fname] + [dict(pred)[k] for (k, _) in sorted(class_to_idx.items(), key=lambda t: t[1])])
    
df = pd.DataFrame(
    df, 
    columns=['fname'] + [k for (k, _) in 
                         sorted(class_to_idx.items(), key=lambda t: t[1])])

df.to_csv('/home/mephistopheies/storage2/data/camera-model-id/tmp/p_test_m3_u1__lbXXX.csv', index=False)

df.set_index(['fname']).apply(lambda r: idx2class[np.argmax(r.values)], axis=1).value_counts()

Sony-NEX-7              279
Samsung-Galaxy-Note3    272
iPhone-4s               270
iPhone-6                269
Samsung-Galaxy-S4       267
HTC-1-M7                267
Motorola-Nexus-6        259
Motorola-Droid-Maxx     255
LG-Nexus-5x             254
Motorola-X              248
dtype: int64

In [12]:
pd.Series([np.max([p for (_, p) in d]) for (fname, d) in p_test.items()]).describe(percentiles=[.05, 0.1, 0.15, 0.2, 0.25])

count    2640.000000
mean        0.873223
std         0.194776
min         0.202297
5%          0.439338
10%         0.525856
15%         0.614119
20%         0.708488
25%         0.796914
50%         0.992835
max         1.000000
dtype: float64

In [13]:
uncertain = [fname for (fname, x) in [(fname, np.max([p for (_, p) in d])) for (fname, d) in p_test.items()] if x < 0.9]

In [14]:
len(uncertain)

864

In [19]:
for c, p in p_test['img_4b25b5e_unalt.tif']:
    print(c, '%0.6f' % p)

HTC-1-M7 0.000000
LG-Nexus-5x 0.174085
Motorola-Droid-Maxx 0.000001
Motorola-Nexus-6 0.824398
Motorola-X 0.000012
Samsung-Galaxy-Note3 0.001446
Samsung-Galaxy-S4 0.000001
Sony-NEX-7 0.000002
iPhone-4s 0.000015
iPhone-6 0.000041


In [15]:
tmp = [(fname, np.max([p for (_, p) in d])) for (fname, d) in p_test.items()]

In [23]:
[(c, p) for (c, p) in tmp if p < 0.06]

[('img_cfeded2_unalt.tif', 0.043961879)]

In [24]:
[p for (_, p) in p_test['img_cfeded2_unalt.tif']]

# p_test['img_cfeded2_unalt.tif']

[0.0,
 6.2477942e-05,
 1.7279393e-05,
 0.0,
 0.0,
 0.0,
 0.0,
 0.036598668,
 0.0,
 0.043961879]

In [19]:
tmp = [(fname, np.sum([p for (_, p) in d])) for (fname, d) in p_test.items()]

In [21]:
# [(c, p) for (c, p) in tmp if p < 0.999]

In [15]:
# [(fname, np.max([p for (_, p) in d])) for (fname, d) in p_test.items()]

In [15]:
p_test = {}
res = []
for fname in files:
    if '_unalt' in fname:
        p_test[fname] = p_test_unalt[fname]
    else:
        p_test[fname] = p_test_manip[fname]
    res.append((fname, sorted(p_test[fname], key=lambda t: t[1], reverse=True)[0][0]))

In [26]:
pd.Series([v for (k, v) in res]).value_counts()

ValueError: too many values to unpack (expected 2)

In [24]:
# pd.Series([v for (k, v) in res]).value_counts()

In [21]:
# -(pd.Series([v for (k, v) in res]).value_counts(normalize=True)*np.log(pd.Series([v for (k, v) in res]).value_counts(normalize=True))).sum()

In [22]:
# x = np.array([270, 270, 268, 267, 265, 264, 261, 260, 259, 256])
# x = x/x.sum()

In [23]:
# -(x*np.log(x)).sum()

In [18]:
# with open('/home/mephistopheies/storage2/data/camera-model-id/tmp/p_test_985_m1_u0.pkl', 'wb') as f:
#     pickle.dump(p_test, f)

In [30]:
df = []
for fname, pred in p_test.items():
    df.append([fname] + [dict(pred)[k] for (k, _) in sorted(class_to_idx.items(), key=lambda t: t[1])])
    
df = pd.DataFrame(
    df, 
    columns=['fname'] + [k for (k, _) in 
                         sorted(class_to_idx.items(), key=lambda t: t[1])])

# df.to_csv('/home/mephistopheies/storage2/data/camera-model-id/tmp/p_test_985_m1_u0__lb985.csv', index=False)

df.set_index(['fname']).apply(lambda r: idx2class[np.argmax(r.values)], axis=1).value_counts()

Sony-NEX-7              271
iPhone-6                268
Samsung-Galaxy-Note3    268
HTC-1-M7                267
iPhone-4s               267
Motorola-Droid-Maxx     264
Samsung-Galaxy-S4       262
LG-Nexus-5x             260
Motorola-Nexus-6        259
Motorola-X              254
dtype: int64

In [31]:
manip = [(f, c) for (f, c) in p_test.items() if '_manip' in f]
print(len(p_test), len(manip))

unalt = [(f, c) for (f, c) in p_test.items() if '_unalt' in f]
print(len(p_test), len(unalt))

unalt_tmp = unalt.copy()
manip_tmp = manip.copy()
print(len(unalt_tmp), len(manip_tmp))

t = int(0.1*1320)
print(t)

res = []

c_counts = defaultdict(int)
for _ in range(1320):
    skip_f = set([k for (k, _) in res])
    unalt_tmp = [(f, sorted(p, key=lambda t: t[-1], reverse=True)) for (f, p) in unalt_tmp if f not in skip_f]
    unalt_tmp = sorted(unalt_tmp, key=lambda t: t[-1][0], reverse=True)
    f, p = unalt_tmp[0]
    c, _ = p[0]
    res.append((f, c))
    c_counts[c] += 1
    
    if c_counts[c] == t:
        unalt_tmp = [(f, [(k, v) for (k, v) in p if k != c]) for (f, p) in unalt_tmp]
        
c_counts = defaultdict(int)
for _ in range(1320):
    skip_f = set([k for (k, _) in res])
    manip_tmp = [(f, sorted(p, key=lambda t: t[-1], reverse=True)) for (f, p) in manip_tmp if f not in skip_f]
    manip_tmp = sorted(manip_tmp, key=lambda t: t[-1][0], reverse=True)
    f, p = manip_tmp[0]
    c, _ = p[0]
    res.append((f, c))
    c_counts[c] += 1
    
    if c_counts[c] == t:
        manip_tmp = [(f, [(k, v) for (k, v) in p if k != c]) for (f, p) in manip_tmp]
        
print(len(res))

2640 1320
2640 1320
1320 1320
132
2640


In [32]:
pd.Series([v for (k, v) in res]).value_counts()

Motorola-Droid-Maxx     264
iPhone-6                264
HTC-1-M7                264
Motorola-Nexus-6        264
Sony-NEX-7              264
Motorola-X              264
iPhone-4s               264
Samsung-Galaxy-S4       264
LG-Nexus-5x             264
Samsung-Galaxy-Note3    264
dtype: int64

In [35]:
with open(os.path.join(out_dir, 'submit__unalt.csv'.lower()), 'w') as f:
    f.write('fname,camera\n')
    for fname, c in res:
        if '_unalt' in fname:
            f.write('%s,%s\n' % (fname, c))
        else:
            f.write('%s,%s\n' % (fname, 'no_class'))
            
            
with open(os.path.join(out_dir, 'submit__manip.csv'.lower()), 'w') as f:
    f.write('fname,camera\n')
    for fname, c in res:
        if '_manip' in fname:
            f.write('%s,%s\n' % (fname, c))
        else:
            f.write('%s,%s\n' % (fname, 'no_class'))
            
            
with open(os.path.join(out_dir, 'submit.csv'.lower()), 'w') as f:
    f.write('fname,camera\n')
    for fname, c in res:
        f.write('%s,%s\n' % (fname, c))

In [37]:
res

[('img_3c3e717_unalt.tif', 'iPhone-6'),
 ('img_bb2d84a_unalt.tif', 'iPhone-6'),
 ('img_30d0426_unalt.tif', 'iPhone-6'),
 ('img_f5e2df6_unalt.tif', 'iPhone-6'),
 ('img_6286b86_unalt.tif', 'iPhone-6'),
 ('img_919d93e_unalt.tif', 'iPhone-6'),
 ('img_68bcdc5_unalt.tif', 'iPhone-6'),
 ('img_67faf71_unalt.tif', 'iPhone-6'),
 ('img_d08131e_unalt.tif', 'iPhone-6'),
 ('img_83491b7_unalt.tif', 'iPhone-6'),
 ('img_2af26d1_unalt.tif', 'iPhone-6'),
 ('img_6ce4249_unalt.tif', 'iPhone-6'),
 ('img_06a4a46_unalt.tif', 'iPhone-6'),
 ('img_cef577b_unalt.tif', 'iPhone-6'),
 ('img_1f3f9fa_unalt.tif', 'iPhone-6'),
 ('img_de0c0c3_unalt.tif', 'iPhone-6'),
 ('img_79e7de8_unalt.tif', 'iPhone-6'),
 ('img_0ea63b1_unalt.tif', 'iPhone-6'),
 ('img_f388a9d_unalt.tif', 'iPhone-6'),
 ('img_e529e0b_unalt.tif', 'iPhone-6'),
 ('img_699e499_unalt.tif', 'iPhone-6'),
 ('img_2b1d005_unalt.tif', 'iPhone-6'),
 ('img_f0acf39_unalt.tif', 'iPhone-6'),
 ('img_de5a87f_unalt.tif', 'iPhone-6'),
 ('img_3346671_unalt.tif', 'iPhone-6'),


In [39]:
with open('/home/mephistopheies/storage2/data/camera-model-id/tmp/p_test_985_m1_u0.pkl', 'rb') as f:
    res1 = pickle.load(f)

In [42]:
res1 = dict([(fname, sorted(v, key=lambda t: t[1], reverse=True)[0][0]) for (fname, v) in res1.items()])

In [43]:
res = dict(res)

In [46]:
n = 0
for fname in res.keys():
    if res[fname] != res1[fname]:
        print(fname, res[fname], res1[fname])
        n += 1
print(n)

img_c1bf475_unalt.tif Sony-NEX-7 Motorola-X
img_eae4783_manip.tif Motorola-Droid-Maxx LG-Nexus-5x
img_8d0d869_manip.tif LG-Nexus-5x Samsung-Galaxy-S4
img_29be29f_manip.tif Motorola-Nexus-6 LG-Nexus-5x
img_e247e1f_manip.tif Motorola-X iPhone-6
img_38bad84_unalt.tif Motorola-X Motorola-Nexus-6
img_4581af5_manip.tif Motorola-X iPhone-6
img_69fdd9f_manip.tif Samsung-Galaxy-Note3 Samsung-Galaxy-S4
img_d1b249e_unalt.tif LG-Nexus-5x Motorola-Nexus-6
img_0ac09ab_manip.tif Motorola-Nexus-6 Samsung-Galaxy-Note3
img_e1c9b03_unalt.tif LG-Nexus-5x Motorola-Nexus-6
img_cf68c4b_manip.tif Samsung-Galaxy-S4 iPhone-6
img_bc5dd45_manip.tif Motorola-X Samsung-Galaxy-Note3
img_2753367_manip.tif Motorola-Droid-Maxx LG-Nexus-5x
img_31d5743_manip.tif Motorola-Nexus-6 HTC-1-M7
img_36a3d54_manip.tif Motorola-X Samsung-Galaxy-Note3
img_c94755c_manip.tif Motorola-Nexus-6 Sony-NEX-7
img_1c59671_manip.tif Motorola-Nexus-6 HTC-1-M7
img_ee9d0cb_manip.tif Motorola-X Sony-NEX-7
img_1073b1f_manip.tif Motorola-Nexus-6 So