In [1]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("dark")
plt.rcParams['figure.figsize'] = 16, 12
import pandas as pd
from tqdm import tqdm_notebook
import io
from PIL import Image
from glob import glob
from collections import defaultdict
import os
import pickle
from io import BytesIO
import random
import shutil

In [2]:
def loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

In [3]:
files = glob('/home/mephistopheies/storage2/data/camera-model-id/pseudo_labels/voters_phase_3/*.csv')

votes = defaultdict(list)
for fname in tqdm_notebook(files):
    df = pd.read_csv(fname)    
    for ix, r in df.iterrows():
        votes[r['fname']].append(r['camera'])




In [4]:
conf = {}

for k, v in votes.items():
    conf[k] = len(set(v))

In [5]:
pd.Series(list(conf.values())).value_counts()

1    2477
2     111
3      40
4      11
5       1
dtype: int64

In [27]:
mdiff = defaultdict(int)
for fname, v in votes.items():
    if pd.Series(v).value_counts()[0] != 4:
        continue
    
    diff = pd.Series(v).value_counts().index[1]

    m = [mname for (mname, c) in zip(files, v) if c == diff][0]
    
    mdiff[m] += 1
    
mdiff

In [29]:
t = 1

pseudo_labels = [(k, pd.Series(votes[k]).value_counts().index[0]) 
                 for (k, v) in conf.items() if v <= t]
print(len(pseudo_labels))

2477


In [10]:
# t = 0.8

# pseudo_labels = []
# for fname, v in votes.items():
#     s = pd.Series(v).value_counts(normalize=True)
#     c, p = list(s.items())[0]
#     if p >= t:
#         pseudo_labels.append((fname, c))

# print(len(pseudo_labels))

2565


In [30]:
pd.Series([c for (k, c) in pseudo_labels]).value_counts()

iPhone-4s               260
HTC-1-M7                257
Samsung-Galaxy-S4       256
iPhone-6                254
Samsung-Galaxy-Note3    252
Motorola-X              252
Sony-NEX-7              247
Motorola-Droid-Maxx     241
Motorola-Nexus-6        232
LG-Nexus-5x             226
dtype: int64

In [31]:
test_dir = '/home/mephistopheies/storage2/data/camera-model-id/raw/test/no_class/'
output_dir = '/home/mephistopheies/storage2/data/camera-model-id/pseudo_labels/manip/phase_3/'

if os.path.isdir(output_dir):
    shutil.rmtree(output_dir)

os.makedirs(output_dir)

for fname, c in tqdm_notebook(pseudo_labels):
    output_dir_tmp = os.path.join(output_dir, c)
    if not os.path.isdir(output_dir_tmp):
        os.makedirs(output_dir_tmp)
    img = np.array(loader(os.path.join(test_dir, fname)))
    np.savez(os.path.join(output_dir_tmp, fname), data=img)




In [32]:
test_dir = '/home/mephistopheies/storage2/data/camera-model-id/raw/test/no_class/'
output_dir = '/home/mephistopheies/storage2/data/camera-model-id/pseudo_labels/unalt/phase_1/'

if os.path.isdir(output_dir):
    shutil.rmtree(output_dir)

os.makedirs(output_dir)

for fname, c in tqdm_notebook(pseudo_labels):
    if '_unalt' not in fname:
        continue
    output_dir_tmp = os.path.join(output_dir, c)
    if not os.path.isdir(output_dir_tmp):
        os.makedirs(output_dir_tmp)
    img = np.array(loader(os.path.join(test_dir, fname)))
    np.savez(os.path.join(output_dir_tmp, fname), data=img)


