In [18]:
%load_ext autoreload
%autoreload 2

from repo import *
import numpy as np
import seaborn as sns
from sklearn.metrics import roc_curve, auc

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
# get the file names
train_files, test_files = train_test_all_ids('/data/home/shruti/voxceleb/vgg/leaders/')
all_lbls = list(train_files.keys())
all_lbls.sort()
all_lbls = np.array(all_lbls)

In [20]:

# repo with leaders + ff original + imposters + Google
def get_train_repo(params):
    
    out_repo = Repo(params['bsfldr'], params['frames'], params['steps'], np.mean, 5000)
    out_repo.add_ids({f: train_files[f] for f in ['bo','br','bs','cb','dt','ew','hc','jb','kh', 'pb',
                                                  'bo_imposter','bs_imposter','ew_imposter','dt_imposter',
                                                   'hc_imposter', 'jb_imposter']}, 0, 1)
    out_repo.add_ids({'FF_{0:03d}'.format(f): train_files['FF_{0:03d}'.format(f)] for f in range(1000)}, 0, 0.5)
    out_repo.add_ids({'GG_{0:02d}'.format(f+1): train_files['GG_{0:02d}'.format(f+1)] for f in range(28)}, 0, 1)
    out_repo.add_ids({'steve_b': train_files['steve_b']}, 0, 1)
    out_repo.add_ids({'jen_l': train_files['jen_l']}, 0, 1)
    
    out_repo.build_repo_noKDD()
    
    return out_repo


def save_results(vgg_repo, fab_repo, test_ids, r_or_f, N_f, out_file, n1, n2, parallel=False):
        
    # number of files per id
    test_dict = {}
    for ids in test_ids:
        cur_test_files = test_files[r_or_f][ids]
        if N_f>0 and len(cur_test_files)>N_f:
            test_dict[ids] = cur_test_files[:N_f].copy()
        else:
            test_dict[ids] = cur_test_files.copy()
            
    vgg_result = vgg_repo.dist_using_dict(test_dict, n1, n2, parallel=parallel, dist='cosine')
    vgg_result['RealFake'] = r_or_f
    fab_result = fab_repo.dist_using_dict(test_dict, n1, n2, parallel=parallel, dist='cosine')
    fab_result['RealFake'] = r_or_f
    full_df = vgg_result.set_index('fileName').join(fab_result.set_index('fileName'), 
                                                 how='inner', lsuffix='_vgg', rsuffix='_fab')
    full_df = full_df[['predLabel_vgg', 'actualLabel_vgg', 'RealFake_vgg', 'predLabel_fab']].copy()
    
    full_df.to_csv(out_file)
    full_df = []

# No Time

### Repo of Leaders + Face Forensics Original + Imposter + Google

In [4]:
vgg_repo = get_train_repo({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':1, 'steps':1, 'N_comp':512})

fab_repo = get_train_repo({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet/leaders/', 
                                'frames':1, 'steps':1, 'N_comp':-1})


Number of labels 1046
pca components: 512, explained variance 0.9971816861629749
Build repo time: 156.539
Number of labels 1046
Build repo time: 1.842


In [5]:
save_results(vgg_repo, fab_repo, 
             ['FF_{0:03d}'.format(f) for f in range(1000)], 
             'real', -1, 
             'FF_real_notime.csv', 0.5, 1, parallel=True)
save_results(vgg_repo, fab_repo, 
             ['FF_{0:03d}'.format(f) for f in range(1000)], 
             'fake', -1, 
             'FF_fake_notime.csv', 0.5, 1, parallel=True)

[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   38.2s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:  3.1min
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:  7.3min
[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed: 13.0min
[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed: 16.5min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   16.2s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:  2.0min
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:  4.8min
[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed:  8.6min
[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed: 11.0min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   22.5s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:  2.6min
[Pa

In [6]:
# change the location of the base folder in the repo
vgg_repo.bs_fldr = '/data/home/shruti/voxceleb/vgg/leaders/'
fab_repo.bs_fldr = '/data/home/shruti/voxceleb/fabnet/leaders/'

save_results(vgg_repo, fab_repo, 
             ['GG_{0:02d}'.format(f+1) for f in range(28)], 
             'real', -1, 
             'GG_real_notime.csv', 0, 1, parallel=True)
save_results(vgg_repo, fab_repo, 
             ['GG_{0:02d}'.format(f+1) for f in range(28)], 
             'fake', -1, 
             'GG_fake_notime.csv', 0, 1, parallel=True)

save_results(vgg_repo, fab_repo, 
             ['bo','bs','dt','ew','hc','jb'], 
             'real', 10, 
             'ldr_real_notime.csv', 0, 1, parallel=True)
save_results(vgg_repo, fab_repo, 
             ['bo','bs','dt','ew','hc','jb'], 
             'fake', 10, 
             'ldr_fake_notime.csv', 0, 1, parallel=True)
save_results(vgg_repo, fab_repo, 
             ['jen_l'], 
             'real', -1, 
             'SJ_real_notime.csv', 0, 1, parallel=True)
save_results(vgg_repo, fab_repo, 
             ['steve_b'], 
             'fake', -1, 
             'SJ_fake_notime.csv', 0, 1, parallel=True)

[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:  1.1min
[Parallel(n_jobs=16)]: Done  55 out of  55 | elapsed:  2.1min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   46.9s
[Parallel(n_jobs=16)]: Done  55 out of  55 | elapsed:  1.5min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:  1.3min
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:  8.0min
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed: 19.2min
[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed: 33.9min
[Parallel(n_jobs=16)]: Done 1218 tasks      | elapsed: 53.7min
[Parallel(n_jobs=16)]: Done 1378 out of 1378 | elapsed: 60.4min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks     

In [7]:
real_df = pd.read_csv('GG_real_notime.csv')
print(len(np.argwhere(np.array(real_df['predLabel_vgg'])==np.array(real_df['predLabel_fab'])).ravel())/len(real_df))

fake_df = pd.read_csv('GG_fake_notime.csv')
print(len(np.argwhere(np.array(fake_df['predLabel_vgg'])==np.array(fake_df['predLabel_fab'])).ravel())/len(fake_df))

0.8689181895288766
0.3688259422775572


In [8]:
real_df = pd.read_csv('ldr_real_notime.csv')
print(len(np.argwhere(np.array(real_df['predLabel_vgg'])==np.array(real_df['predLabel_fab'])).ravel())/len(real_df))

fake_df = pd.read_csv('ldr_fake_notime.csv')
print(len(np.argwhere(np.array(fake_df['predLabel_vgg'])==np.array(fake_df['predLabel_fab'])).ravel())/len(fake_df))

0.9980912924465488
0.10455858521806524


In [9]:
real_df = pd.read_csv('FF_real_notime.csv')
print(len(np.argwhere(np.array(real_df['predLabel_vgg'])==np.array(real_df['predLabel_fab'])).ravel())/len(real_df))

fake_df = pd.read_csv('FF_fake_notime.csv')
print(len(np.argwhere(np.array(fake_df['predLabel_vgg'])==np.array(fake_df['predLabel_fab'])).ravel())/len(fake_df))

0.9567479836843941
0.03477880190721823


In [10]:
real_df = pd.read_csv('SJ_real_notime.csv')
print(len(np.argwhere(np.array(real_df['predLabel_vgg'])==np.array(real_df['predLabel_fab'])).ravel())/len(real_df))

fake_df = pd.read_csv('SJ_fake_notime.csv')
print(len(np.argwhere(np.array(fake_df['predLabel_vgg'])==np.array(fake_df['predLabel_fab'])).ravel())/len(fake_df))

0.7718133017923756
0.023508137432188065


### Compressed Repo of Leaders + Face Forensics Original + Imposter + Google

In [11]:
# change the location of the base folder in the repo
vgg_repo.bs_fldr = '/data/home/shruti/voxceleb/vgg/compression/'
fab_repo.bs_fldr = '/data/home/shruti/voxceleb/fabnet/compression/'

save_results(vgg_repo, fab_repo, 
             ['bo','bs','dt','ew','hc','jb'], 
             'real', 10, 
             'ldr_real_notime_comp.csv', 0, 1, parallel=True)
save_results(vgg_repo, fab_repo, 
             ['bo','bs','dt','ew','hc','jb'], 
             'fake', 10, 
             'ldr_fake_notime_comp.csv', 0, 1, parallel=True)



[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:  1.4min
[Parallel(n_jobs=16)]: Done  60 out of  60 | elapsed:  3.1min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   57.0s
[Parallel(n_jobs=16)]: Done  60 out of  60 | elapsed:  2.3min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   56.3s
[Parallel(n_jobs=16)]: Done  57 out of  57 | elapsed:  2.0min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   42.5s
[Parallel(n_jobs=16)]: Done  57 out of  57 | elapsed:  1.5min finished


# Metric Learning

### Repo of Leaders + Face Forensics Original + Imposter + Google

In [22]:

vgg_repo_metric = get_train_repo({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':100, 'steps':5, 'N_comp':512})
fab_repo_metric = get_train_repo({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric/', 
                                'frames':1, 'steps':1, 'N_comp':-1})


Number of labels 1046
Build repo time: 8.468
Number of labels 1046
Build repo time: 2.134


In [14]:
save_results(vgg_repo_metric, fab_repo_metric, 
             ['bo','bs','dt','ew','hc','jb'], 
             'real', 10, 
             'ldr_real_100.csv', 0, 1, parallel=True)
save_results(vgg_repo_metric, fab_repo_metric, 
             ['bo','bs','dt','ew','hc','jb'], 
             'fake', 10, 
             'ldr_fake_100.csv', 0, 1, parallel=True)

[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   36.3s
[Parallel(n_jobs=16)]: Done  60 out of  60 | elapsed:  1.3min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    8.7s
[Parallel(n_jobs=16)]: Done  60 out of  60 | elapsed:   24.0s finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   25.0s
[Parallel(n_jobs=16)]: Done  57 out of  57 | elapsed:   46.5s finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    8.3s
[Parallel(n_jobs=16)]: Done  57 out of  57 | elapsed:   16.2s finished


In [15]:
real_df = pd.read_csv('ldr_real_100.csv')
print(len(np.argwhere(np.array(real_df['predLabel_vgg'])==np.array(real_df['predLabel_fab'])).ravel())/len(real_df))

fake_df = pd.read_csv('ldr_fake_100.csv')
print(len(np.argwhere(np.array(fake_df['predLabel_vgg'])==np.array(fake_df['predLabel_fab'])).ravel())/len(fake_df))


0.9999189364461738
0.05114328237045055


In [16]:
save_results(vgg_repo_metric, fab_repo_metric, 
             ['FF_{0:03d}'.format(f) for f in range(1000)], 
             'real', -1, 
             'FF_real_100.csv', 0.5, 1, parallel=True)
save_results(vgg_repo_metric, fab_repo_metric, 
             ['FF_{0:03d}'.format(f) for f in range(1000)], 
             'fake', -1, 
             'FF_fake_100.csv', 0.5, 1, parallel=True)

[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   15.1s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:  1.4min
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:  3.5min
[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed:  6.3min
[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed:  8.1min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    4.3s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:   28.5s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:  1.1min
[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed:  2.1min
[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed:  2.7min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   13.5s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:  1.4min
[Pa

In [None]:
vgg_repo_metric.bs_fldr = '/data/home/shruti/voxceleb/vgg/leaders/'
fab_repo_metric.bs_fldr = '/data/home/shruti/voxceleb/fabnet_metric/'


save_results(vgg_repo_metric, fab_repo_metric, 
             ['GG_{0:02d}'.format(f+1) for f in range(28)], 
             'real', -1, 
             'GG_real_100.csv', 0, 1, parallel=True)
save_results(vgg_repo_metric, fab_repo_metric, 
             ['GG_{0:02d}'.format(f+1) for f in range(28)], 
             'fake', -1, 
             'GG_fake_100.csv', 0, 1, parallel=True)

In [23]:
save_results(vgg_repo_metric, fab_repo_metric, 
             ['jen_l'], 
             'real', -1, 
             'SJ_real_100.csv', 0, 1, parallel=True)
save_results(vgg_repo_metric, fab_repo_metric, 
             ['steve_b'], 
             'fake', -1, 
             'SJ_fake_100.csv', 0, 1, parallel=True)

[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   1 out of   1 | elapsed:   19.4s finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   1 out of   1 | elapsed:   12.4s finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   1 out of   1 | elapsed:   11.9s finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done   1 out of   1 | elapsed:    8.3s finished


In [24]:

real_df = pd.read_csv('GG_real_100.csv')
print(len(np.argwhere(np.array(real_df['predLabel_vgg'])==np.array(real_df['predLabel_fab'])).ravel())/len(real_df))

fake_df = pd.read_csv('GG_fake_100.csv')
print(len(np.argwhere(np.array(fake_df['predLabel_vgg'])==np.array(fake_df['predLabel_fab'])).ravel())/len(fake_df))


0.9328758949880668
0.370629271003006


In [25]:

real_df = pd.read_csv('FF_real_100.csv')
print(len(np.argwhere(np.array(real_df['predLabel_vgg'])==np.array(real_df['predLabel_fab'])).ravel())/len(real_df))

fake_df = pd.read_csv('FF_fake_100.csv')
print(len(np.argwhere(np.array(fake_df['predLabel_vgg'])==np.array(fake_df['predLabel_fab'])).ravel())/len(fake_df))


0.9936903101191115
0.04180426114821066


In [26]:
real_df = pd.read_csv('SJ_real_100.csv')
print(len(np.argwhere(np.array(real_df['predLabel_vgg'])==np.array(real_df['predLabel_fab'])).ravel())/len(real_df))

fake_df = pd.read_csv('SJ_fake_100.csv')
print(len(np.argwhere(np.array(fake_df['predLabel_vgg'])==np.array(fake_df['predLabel_fab'])).ravel())/len(fake_df))

1.0
0.054373522458628844


### Compressed Repo of Leaders + Face Forensics Original + Imposter + Google

In [11]:
vgg_repo_metric.bs_fldr = '/data/home/shruti/voxceleb/vgg/compression/'
fab_repo_metric.bs_fldr = '/data/home/shruti/voxceleb/fabnet_metric_compression/'

save_results(vgg_repo_metric, fab_repo_metric, 
             ['bo','bs','dt','ew','hc','jb'], 
             'real', 10, 
             'ldr_real_100_comp.csv', 0, 1, parallel=True)
save_results(vgg_repo_metric, fab_repo_metric, 
             ['bo','bs','dt','ew','hc','jb'], 
             'fake', 10, 
             'ldr_fake_100_comp.csv', 0, 1, parallel=True)




[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:  1.2min
[Parallel(n_jobs=16)]: Done  60 out of  60 | elapsed:  2.9min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    8.2s
[Parallel(n_jobs=16)]: Done  60 out of  60 | elapsed:   20.5s finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   58.0s
[Parallel(n_jobs=16)]: Done  57 out of  57 | elapsed:  2.0min finished
[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    7.0s
[Parallel(n_jobs=16)]: Done  57 out of  57 | elapsed:   14.6s finished
