In [1]:
%load_ext autoreload
%autoreload 2

from test_repo_biometric import *
import numpy as np
import seaborn as sns
from sklearn.metrics import roc_curve, auc

In [3]:
# get the file names
train_files, test_files = train_test_all_ids('/data/home/shruti/voxceleb/vgg/leaders/')
all_lbls = list(train_files.keys())
all_lbls.sort()
all_lbls = np.array(all_lbls)

In [8]:
# repo for only leaders
def get_train_repo1(params):
    
    out_repo = {}
    if params['istrain']:
        out_repo = build_repo(params['bsfldr'], 
                          {f: train_files[f] for f in ['bo','br','bs','cb','dt','ew','hc','jb','kh', 'pb']}, 
                          {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                          in_dict=out_repo)
    else:
        out_repo['real'] = {}; out_repo['fake'] = {}; 
        for r_or_f in ['real', 'fake']:

            out_repo[r_or_f] = build_repo(params['bsfldr'], 
                                  {f: test_files[r_or_f][f] for f in ['bo','bs','dt','ew','hc','jb']}, 
                                  {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                                          in_dict=out_repo[r_or_f])
    return out_repo

# repo with leaders + ff original 
def get_train_repo2(params):

    out_repo = {}
    if params['istrain']:
        
        # ff original
        out_repo = build_repo(params['bsfldr'], 
                      {'FF_{0:03d}'.format(f): train_files['FF_{0:03d}'.format(f)] for f in range(1000)}, 
                      {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':0.5}, 
                      in_dict=out_repo)
        # leaders
        out_repo = build_repo(params['bsfldr'], 
                          {f: train_files[f] for f in ['bo','br','bs','cb','dt','ew','hc','jb','kh', 'pb']}, 
                          {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                          in_dict=out_repo)
        
    else:
        out_repo['real'] = {}; out_repo['fake'] = {}; 
        for r_or_f in ['real', 'fake']:
            
            # leaders
            out_repo[r_or_f] = build_repo(params['bsfldr'], 
                                          {f: test_files[r_or_f][f] for f in ['bo','bs','dt','ew','hc','jb']}, 
                                          {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                                          in_dict=out_repo[r_or_f])
            # ff original
            out_repo[r_or_f] = build_repo(params['bsfldr'], 
                                          {'FF_{0:03d}'.format(f): test_files[r_or_f]['FF_{0:03d}'.format(f)] for f in range(1000)}, 
                                          {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0.5, 'n2':1}, 
                                          in_dict=out_repo[r_or_f])
    return out_repo


# repo with leaders + ff original + imposters 
def get_train_repo3(params):
    
    out_repo = {}
    if params['istrain']:
        # leaders
        out_repo = build_repo(params['bsfldr'], 
                          {f: train_files[f] for f in ['bo','br','bs','cb','dt','ew','hc','jb','kh', 'pb',
                                                      'bo_imposter','bs_imposter','ew_imposter','dt_imposter',
                                                       'hc_imposter', 'jb_imposter']}, 
                          {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                          in_dict=out_repo)
        # ff original
        out_repo = build_repo(params['bsfldr'], 
                      {'FF_{0:03d}'.format(f): train_files['FF_{0:03d}'.format(f)] for f in range(1000)}, 
                      {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':0.5}, 
                      in_dict=out_repo)
    else:
        out_repo['real'] = {}; out_repo['fake'] = {}; 
        for r_or_f in ['real', 'fake']:
            
            # leaders
            out_repo[r_or_f] = build_repo(params['bsfldr'], 
                                          {f: test_files[r_or_f][f] for f in ['bo','bs','dt','ew','hc','jb']}, 
                                          {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                                          in_dict=out_repo[r_or_f])
            # ff original
            out_repo[r_or_f] = build_repo(params['bsfldr'], 
                                          {'FF_{0:03d}'.format(f): test_files[r_or_f]['FF_{0:03d}'.format(f)] for f in range(1000)}, 
                                          {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0.5, 'n2':1}, 
                                          in_dict=out_repo[r_or_f])
    return out_repo

# repo with leaders + ff original + imposters 
def get_train_repo3_notest(params):
    
    out_repo = {}
    # leaders
    out_repo = build_repo(params['bsfldr'], 
                      {f: train_files[f] for f in ['bo','br','bs','cb','dt','ew','hc','jb','kh', 'pb',
                                                  'bo_imposter','bs_imposter','ew_imposter','dt_imposter',
                                                   'hc_imposter', 'jb_imposter']}, 
                      {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                      in_dict=out_repo)
    # ff original
    out_repo = build_repo(params['bsfldr'], 
                  {'FF_{0:03d}'.format(f): train_files['FF_{0:03d}'.format(f)] for f in range(1000)}, 
                  {'frames': params['frames'], 'step': params['steps'], 'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':0.5}, 
                  in_dict=out_repo)
    
    return out_repo

# repo with leaders + ff original + imposters + Google
def get_train_repo4(params):
    
    out_repo = {}
    # leaders
    out_repo = build_repo(params['bsfldr'], 
                      {f: train_files[f] for f in ['bo','br','bs','cb','dt','ew','hc','jb','kh', 'pb',
                                                  'bo_imposter','bs_imposter','ew_imposter','dt_imposter',
                                                   'hc_imposter', 'jb_imposter']}, 
                      {'frames': params['frames'], 'step': params['steps'], 
                       'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                      in_dict=out_repo)
    # ff original
    out_repo = build_repo(params['bsfldr'], 
                  {'FF_{0:03d}'.format(f): train_files['FF_{0:03d}'.format(f)] for f in range(1000)}, 
                  {'frames': params['frames'], 'step': params['steps'], 
                   'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':0.5}, 
                  in_dict=out_repo)

    # Google 
    out_repo = build_repo(params['bsfldr'], 
                  {'GG_{0:02d}'.format(f+1): train_files['GG_{0:02d}'.format(f+1)] for f in range(28)}, 
                  {'frames': params['frames'], 'step': params['steps'], 
                   'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                  in_dict=out_repo)
    
    # steve + jennifer
    out_repo = build_repo(params['bsfldr'], 
                  {'steve_b': train_files['steve_b']}, 
                  {'frames': params['frames'], 'step': params['steps'], 
                   'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                  in_dict=out_repo)
    out_repo = build_repo(params['bsfldr'], 
                  {'jen_l': train_files['jen_l']}, 
                  {'frames': params['frames'], 'step': params['steps'], 
                   'pool_func': np.mean, 'N': 5000, 'n1':0, 'n2':1}, 
                  in_dict=out_repo)
    
    kdd_repo=build_kdd(out_repo, all_lbls, k=1)

    return kdd_repo

def get_result_df(in_train_repo, in_test_repo):
    
    out_result_df = {}
    for r_or_f in ['real', 'fake']:

        r_or_f_res = {}
        
        test_keys = np.sort(list(in_test_repo[r_or_f].keys()))
        for ids in test_keys:

            cur_max, cur_pred_id = get_repo_dist(in_train_repo, in_test_repo[r_or_f][ids])
            r_or_f_res[ids] = pd.DataFrame(data=cur_max, columns=['dist'])
            r_or_f_res[ids]['predLabel'] = cur_pred_id
            r_or_f_res[ids]['actualLabel'] = ids
            r_or_f_res[ids]['RealFake'] = r_or_f

        out_result_df[r_or_f] = pd.concat([r_or_f_res[f] for f in test_keys], ignore_index=True, sort=False)

    return pd.concat([out_result_df[f] for f in ['real', 'fake']], ignore_index=True, sort=False)


def get_result_df_withnotest(in_train_repo, params):
    
    out_result_df = {}
    for r_or_f in ['real', 'fake']:

        r_or_f_res = {}
        
        test_keys = np.sort(list(test_files[r_or_f].keys()))
        for ids in test_keys:
            cur_test = {}
            cur_test = build_repo(params['bsfldr'], 
                                  {ids: test_files[r_or_f][ids]}, 
                                  {'frames': params['frames'], 'step': params['steps'], 
                                   'pool_func': np.mean, 'N': 5000, 
                                   'n1':0.5 if 'FF_' in ids else 0, 'n2':1}, 
                                  in_dict=cur_test)

            cur_pred_id = get_kdd_dist(in_train_repo, cur_test[ids])
            r_or_f_res[ids] = pd.DataFrame(data=cur_pred_id, columns=['predLabel'])
            r_or_f_res[ids]['actualLabel'] = ids
            r_or_f_res[ids]['RealFake'] = r_or_f

        out_result_df[r_or_f] = pd.concat([r_or_f_res[f] for f in test_keys], ignore_index=True, sort=False)

    return pd.concat([out_result_df[f] for f in ['real', 'fake']], ignore_index=True, sort=False)


def get_result_compression(in_train_repo, params):
    
    out_result_df = {}
    for r_or_f in ['real', 'fake']:

        r_or_f_res = {}
        
        test_keys = np.sort(['bo','bs','dt','ew','hc','jb'])
        for ids in test_keys:
            cur_test = {}
            cur_test = build_repo(params['bsfldr'], 
                                  {ids: test_files[r_or_f][ids]}, 
                                  {'frames': params['frames'], 'step': params['steps'], 
                                   'pool_func': np.mean, 'N': 5000, 
                                   'n1':0.5 if 'FF_' in ids else 0, 'n2':1}, 
                                  in_dict=cur_test)

            cur_max, cur_pred_id = get_repo_dist(in_train_repo, cur_test[ids])
            r_or_f_res[ids] = pd.DataFrame(data=cur_max, columns=['dist'])
            r_or_f_res[ids]['predLabel'] = cur_pred_id
            r_or_f_res[ids]['actualLabel'] = ids
            r_or_f_res[ids]['RealFake'] = r_or_f

        out_result_df[r_or_f] = pd.concat([r_or_f_res[f] for f in test_keys], ignore_index=True, sort=False)

    return pd.concat([out_result_df[f] for f in ['real', 'fake']], ignore_index=True, sort=False)


# VGG

### Repo with only leaders

In [None]:
# repo with leaders
repo_leaders = get_train_repo1({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':100, 'steps':5})
test_emb = get_train_repo1({'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':100, 'steps':5})
vgg_repo1 = get_result_df(repo_leaders, test_emb)


In [None]:
repo_leaders = []; test_emb = []

### Repo of Leaders plus Face Forensics Original

In [None]:
# repo with leaders
repo_leaders_ff = get_train_repo2({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':100, 'steps':5})
test_emb_ff = get_train_repo2({'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':100, 'steps':5})
vgg_repo2 = get_result_df(repo_leaders_ff, test_emb_ff)


In [None]:
repo_leaders_ff = []; test_emb_ff = []

### Repo of Leaders + Face Forensics Original + Imposters

In [None]:
# repo with leaders
repo = get_train_repo3({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':100, 'steps':5})
test_emb = get_train_repo3({'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':100, 'steps':5})
vgg_repo3 = get_result_df(repo, test_emb)


### Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':100, 'steps':5})
vgg_repo4 = get_result_df_withnotest(repo, {'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':100, 'steps':5})
vgg_repo4.to_csv('results100_vgg.csv')


### Compressed Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':100, 'steps':5})
vgg_results_comp = get_result_compression(repo, {'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/vgg/compression/', 
                                'frames':100, 'steps':5})
vgg_results_comp.to_csv('results100_vgg_comp.csv')


### 25 Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':25, 'steps':5})
vgg_results_25 = get_result_df_withnotest(repo, {'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':25, 'steps':5})
vgg_results_25.to_csv('results25_vgg.csv')


### 50 Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':50, 'steps':5})
vgg_results_50 = get_result_df_withnotest(repo, {'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':50, 'steps':5})
vgg_results_50.to_csv('results50_vgg.csv')


### 75 Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':75, 'steps':5})
vgg_results_75 = get_result_df_withnotest(repo, {'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':75, 'steps':5})
vgg_results_75.to_csv('results75_vgg.csv')


### No Time Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':1, 'steps':5})
vgg_results_1 = get_result_df_withnotest(repo, {'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/vgg/leaders/', 
                                'frames':1, 'steps':5})
vgg_results_1.to_csv('results1_vgg.csv')


# Fabnet Metric

### Repo with only leaders

In [None]:
# repo with leaders
repo_leaders = get_train_repo1({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric/', 
                                'frames':1, 'steps':1})
test_emb = get_train_repo1({'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric/', 
                                'frames':1, 'steps':1})
fabnet_repo1 = get_result_df(repo_leaders, test_emb)


In [None]:
repo_leaders = []; test_emb = []

### Repo of Leaders plus Face Forensics Original

In [None]:
# repo with leaders
repo_leaders_ff = get_train_repo2({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric/', 
                                'frames':1, 'steps':1})
test_emb_ff = get_train_repo2({'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric/', 
                                'frames':1, 'steps':1})
fabnet_repo2 = get_result_df(repo_leaders_ff, test_emb_ff)


In [None]:
repo_leaders_ff = []; test_emb_ff = []

### Repo of Leaders + Face Forensics Original + Imposters

In [None]:
# repo with leaders
repo = get_train_repo3_notest({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric/', 
                                'frames':1, 'steps':1})
fabnet_repo3 = get_result_df_withnotest(repo, {'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric/', 
                                'frames':1, 'steps':1})


In [None]:
repo = []; test_emb = []

### Repo of Leaders + Face Forensics Original + Imposters + Google

In [None]:
# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric/', 
                                'frames':1, 'steps':1})
fabnet_repo4 = get_result_df_withnotest(repo, {'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric/', 
                                'frames':1, 'steps':1})
fabnet_repo4.to_csv('results100_fabnet.csv')


### Compressed Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric/', 
                                'frames':1, 'steps':1})
fabnet_results_comp = get_result_compression(repo, {'istrain':False, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric_compression/', 
                                'frames':1, 'steps':1})

fabnet_results_comp.to_csv('results100_fabnet_comp.csv')


### 25 Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric25/', 
                                'frames':1, 'steps':1})
fabnet_results_25 = get_result_df_withnotest(repo, {'istrain':False, 
                                                    'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric25/', 
                                'frames':1, 'steps':1})

fabnet_results_25.to_csv('results25_fabnet.csv')


### 50 Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric50/', 
                                'frames':1, 'steps':1})
fabnet_results_50 = get_result_df_withnotest(repo, {'istrain':False, 
                                                    'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric50/', 
                                'frames':1, 'steps':1})

fabnet_results_50.to_csv('results50_fabnet.csv')


### 75 Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric75/', 
                                'frames':1, 'steps':1})
fabnet_results_75 = get_result_df_withnotest(repo, {'istrain':False, 
                                                    'bsfldr':'/data/home/shruti/voxceleb/fabnet_metric75/', 
                                'frames':1, 'steps':1})

fabnet_results_75.to_csv('results75_fabnet.csv')


### No Time Repo of Leaders + Face Forensics Original + Imposter + Google

In [None]:

# repo with leaders
repo = get_train_repo4({'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet/leaders/', 
                                'frames':1, 'steps':5})
fabnet_results_1 = get_result_df_withnotest(repo, {'istrain':True, 'bsfldr':'/data/home/shruti/voxceleb/fabnet/leaders/', 
                                'frames':1, 'steps':5})
fabnet_results_1.to_csv('results1_fabnet.csv')


# results

In [None]:
# with only leaders in repository
# if we had leaders in the repo and we get the face-swap of leaders dist
# how we do on Leader Faceswap
def get_auc(in_result_df, real_nm, fake_nm):
    
    real_dist = in_result_df.loc[np.logical_and(in_result_df['actualLabel'].isin(real_nm), 
                                                in_result_df['RealFake']=='real'), 'dist']
    
    fake_dist = in_result_df.loc[np.logical_and(in_result_df['actualLabel'].isin(fake_nm), 
                                                in_result_df['RealFake']=='fake'), 'dist']
    
    fpr, tpr, thresholds = roc_curve(np.concatenate((np.ones((len(real_dist), )), 
                                                    np.zeros((len(fake_dist), )) )), 
                                     np.concatenate((real_dist, 
                                                     fake_dist)))
    return auc(fpr, tpr)

def get_dis_accuracy(df2, real_nm, fake_nm):
    
    # real accuracy
    r1 = df2.loc[np.logical_and(df2['actualLabel'].isin(real_nm), 
                                df2['RealFake']=='real'), 'actualLabel']
    r2 = df2.loc[np.logical_and(df2['actualLabel'].isin(real_nm), 
                                df2['RealFake']=='real'), 'predLabel']
    real_acc = len(np.argwhere(r1 == r2).ravel())/len(r1)
    
    # fake accuracy
    f1 = df2.loc[np.logical_and(df2['actualLabel'].isin(fake_nm), 
                                df2['RealFake']=='fake'), 'actualLabel']
    f2 = df2.loc[np.logical_and(df2['actualLabel'].isin(fake_nm), 
                                df2['RealFake']=='fake'), 'predLabel']
    fake_acc = len(np.argwhere(f1 != f2).ravel())/len(f1)
    
    return real_acc, fake_acc


def get_four_measures(cur_df1, cur_df2):
    
    out_res = np.zeros((2, 4))
    out_res[0, 0] = len(np.argwhere((np.array(cur_df1['predLabel']) == np.array(cur_df2['predLabel']))
                    & (np.array(cur_df2['predLabel']) == np.array(cur_df2['actualLabel']))).ravel())
    
    out_res[0, 1] = len(np.argwhere((np.array(cur_df1['predLabel']) == np.array(cur_df2['predLabel'])) 
                                    & (np.array(cur_df2['predLabel']) != np.array(cur_df2['actualLabel'])) 
                                    & (np.array(cur_df1['predLabel']) != np.array(cur_df1['actualLabel']))).ravel())
    
    out_res[1, 1] = len(np.argwhere((np.array(cur_df1['predLabel']) != np.array(cur_df2['predLabel'])) 
                                    & (np.array(cur_df2['predLabel']) != np.array(cur_df2['actualLabel'])) 
                                    & (np.array(cur_df1['predLabel']) != np.array(cur_df1['actualLabel']))).ravel())
    
    
    out_res[1, 2] = len(np.argwhere((np.array(cur_df1['predLabel']) != np.array(cur_df2['predLabel'])) 
                                & (np.array(cur_df2['predLabel']) != np.array(cur_df2['actualLabel'])) 
                                & (np.array(cur_df1['predLabel']) == np.array(cur_df1['actualLabel']))).ravel())
    
    out_res[1, 3] = len(np.argwhere((np.array(cur_df1['predLabel']) != np.array(cur_df2['predLabel'])) 
                            & (np.array(cur_df2['predLabel']) == np.array(cur_df2['actualLabel'])) 
                            & (np.array(cur_df1['predLabel']) != np.array(cur_df1['actualLabel']))).ravel())
    
    return out_res


def get_discrepany_accuracy(df1, df2, real_nm, fake_nm):
    
    out_result = {}
    # true label
    out_result['real'] = np.zeros((2, 4)) # pred values (real,fake), (VGG_c-Fab_c, VGG_i-Fab_i, VGG_c-Fab_i, VGG_i-Fab_c)
    out_result['fake'] = np.zeros((2, 4))
    
    # real accuracy df1
    for k in real_nm:
        
        cur_df1 = df1[(df1['actualLabel']==k) & (df1['RealFake']=='real')]
        cur_df2 = df2[(df2['actualLabel']==k) & (df2['RealFake']=='real')]
        n = np.min([len(cur_df1), len(cur_df2)])
        cur_df1 = cur_df1.iloc[:n, :].copy()
        cur_df2 = cur_df2.iloc[:n, :].copy()
        
        # real prediction count
        out_result['real'] = out_result['real'] + get_four_measures(cur_df1, cur_df2)
    
    for k in fake_nm:
        
        cur_df1 = df1[(df1['actualLabel']==k) & (df1['RealFake']=='fake')]
        cur_df2 = df2[(df2['actualLabel']==k) & (df2['RealFake']=='fake')]
        n = np.min([len(cur_df1), len(cur_df2)])
        cur_df1 = cur_df1.iloc[:n, :].copy()
        cur_df2 = cur_df2.iloc[:n, :].copy()
        
        # real prediction count
        out_result['fake'] = out_result['fake'] + get_four_measures(cur_df1, cur_df2)
            
    return out_result
    
    
def plot_dist(in_result_df, real_nm, fake_nm, ax, title):
    
    sns.distplot(np.log(in_result_df.loc[np.logical_and(in_result_df['actualLabel'].isin(real_nm), 
                                                in_result_df['RealFake']=='real'), 'dist']), 
                 bins=np.log(np.linspace(0.1, 1, 30)), label='real', ax=ax)
    sns.distplot(np.log(in_result_df.loc[np.logical_and(in_result_df['actualLabel'].isin(fake_nm), 
                                                in_result_df['RealFake']=='fake'), 'dist']), 
                 bins=np.log(np.linspace(0.1, 1, 30)), label='fake', ax=ax)
    ax.set_title(title)
    ax.set_xlim([np.log(0.1), np.log(1)])
    ax.legend()
    

## Discrepancy Results

In [None]:
def print_result(in_res_o):
    
    in_res = {}
    in_res['real'] = in_res_o['real']*100/np.sum(in_res_o['real'])
    in_res['fake'] = in_res_o['fake']*100/np.sum(in_res_o['fake'])
    print('\t True Positve: {0:3.2f} True Negative: {1:3.2f}\n'.format(in_res['real'][0,0], 
                                                                    in_res['fake'][1,2]+
                                                                    in_res['fake'][1,1]+
                                                                    in_res['fake'][1,3]))
    print('\t \t VGG_c-Fab_c \t VGG_i-Fab_i \t VGG_c-Fab_i \t VGG_i-Fab_c \n')
    print('Real->Real \t {0:3.2f} \t\t {1:3.2f} \t\t {2:3.2f} \t\t {3:3.2f} \n'.format(in_res['real'][0, 0],
                                                                               in_res['real'][0, 1],
                                                                               in_res['real'][0, 2],
                                                                               in_res['real'][0, 3]))
    print('Real->Fake \t {0:3.2f} \t\t {1:3.2f} \t\t {2:3.2f} \t\t {3:3.2f} \n'.format(in_res['real'][1, 0],
                                                                               in_res['real'][1, 1],
                                                                               in_res['real'][1, 2],
                                                                               in_res['real'][1, 3]))
    print('Fake->Real \t {0:3.2f} \t\t {1:3.2f} \t\t {2:3.2f} \t\t {3:3.2f} \n'.format(in_res['fake'][0, 0],
                                                                               in_res['fake'][0, 1],
                                                                               in_res['fake'][0, 2],
                                                                               in_res['fake'][0, 3]))
    print('Fake->Fake \t {0:3.2f} \t\t {1:3.2f} \t\t {2:3.2f} \t\t {3:3.2f} \n'.format(in_res['fake'][1, 0],
                                                                               in_res['fake'][1, 1],
                                                                               in_res['fake'][1, 2],
                                                                               in_res['fake'][1, 3]))

### (VGG + Fabnet Metric)

In [None]:
print('Repo Leaders + FF original + Imposters + Google \n')

print('\t Leaders \n ')
print_result(get_discrepany_accuracy(vgg_repo4, fabnet_repo4,
                                     ['bo','bs','dt','ew','hc','jb'], 
                                     ['bo','bs','dt','ew','hc','jb']))
print('\t FaceForensics \n ')
print_result(get_discrepany_accuracy(vgg_repo4, fabnet_repo4,
                        ['FF_{0:03d}'.format(f) for f in range(1000)], 
                        ['FF_{0:03d}'.format(f) for f in range(1000)]))
print('\t Google \n ')
print_result(get_discrepany_accuracy(vgg_repo4, fabnet_repo4,
                        ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                        ['GG_{0:02d}'.format(f+1) for f in range(28)]))
print('\t ALL \n ')
print_result(get_discrepany_accuracy(vgg_repo4, fabnet_repo4,
                        test_files['real'].keys(), 
                        test_files['fake'].keys()))


### No Time (VGG + Fabnet Metric)

In [None]:
print('\t Leaders \n ')
print_result(get_discrepany_accuracy(vgg_results_1, fabnet_results_1,
                                     ['bo','bs','dt','ew','hc','jb'], 
                                     ['bo','bs','dt','ew','hc','jb']))
print('\t FaceForensics \n ')
print_result(get_discrepany_accuracy(vgg_results_1, fabnet_results_1,
                        ['FF_{0:03d}'.format(f) for f in range(1000)], 
                        ['FF_{0:03d}'.format(f) for f in range(1000)]))
print('\t Google \n ')
print_result(get_discrepany_accuracy(vgg_results_1, fabnet_results_1,
                        ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                        ['GG_{0:02d}'.format(f+1) for f in range(28)]))
print('\t ALL \n ')
print_result(get_discrepany_accuracy(vgg_results_1, fabnet_results_1,
                        test_files['real'].keys(), 
                        test_files['fake'].keys()))


### Compression results

In [None]:

print('\t Leaders \n ')
print_result(get_discrepany_accuracy(vgg_results_comp, fabnet_results_comp,
                                     ['bo','bs','dt','ew','hc','jb'], 
                                     ['bo','bs','dt','ew','hc','jb']))
"""print('\t FaceForensics \n ')
print_result(get_discrepany_accuracy(vgg_results_comp, fabnet_results_comp,
                        ['FF_{0:03d}'.format(f) for f in range(1000)], 
                        ['FF_{0:03d}'.format(f) for f in range(1000)]))
print('\t Google \n ')
print_result(get_discrepany_accuracy(vgg_results_comp, fabnet_results_comp,
                        ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                        ['GG_{0:02d}'.format(f+1) for f in range(28)]))
print('\t ALL \n ')
print_result(get_discrepany_accuracy(vgg_results_comp, fabnet_results_comp,
                        test_files['real'].keys(), 
                        test_files['fake'].keys()))"""


### 25 VGG + Fabnet Metric

In [None]:
print('Repo Leaders + FF original + Imposters + Google \n')

print('\t Leaders \n ')
print_result(get_discrepany_accuracy(vgg_results_25, fabnet_results_25,
                                     ['bo','bs','dt','ew','hc','jb'], 
                                     ['bo','bs','dt','ew','hc','jb']))
print('\t FaceForensics \n ')
print_result(get_discrepany_accuracy(vgg_results_25, fabnet_results_25,
                        ['FF_{0:03d}'.format(f) for f in range(1000)], 
                        ['FF_{0:03d}'.format(f) for f in range(1000)]))
print('\t Google \n ')
print_result(get_discrepany_accuracy(vgg_results_25, fabnet_results_25,
                        ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                        ['GG_{0:02d}'.format(f+1) for f in range(28)]))
print('\t ALL \n ')
print_result(get_discrepany_accuracy(vgg_results_25, fabnet_results_25,
                        test_files['real'].keys(), 
                        test_files['fake'].keys()))


### 50 VGG + Fabnet Metric

In [None]:
print('Repo Leaders + FF original + Imposters + Google \n')

print('\t Leaders \n ')
print_result(get_discrepany_accuracy(vgg_results_50, fabnet_results_50,
                                     ['bo','bs','dt','ew','hc','jb'], 
                                     ['bo','bs','dt','ew','hc','jb']))
print('\t FaceForensics \n ')
print_result(get_discrepany_accuracy(vgg_results_50, fabnet_results_50,
                        ['FF_{0:03d}'.format(f) for f in range(1000)], 
                        ['FF_{0:03d}'.format(f) for f in range(1000)]))
print('\t Google \n ')
print_result(get_discrepany_accuracy(vgg_results_50, fabnet_results_50,
                        ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                        ['GG_{0:02d}'.format(f+1) for f in range(28)]))
print('\t ALL \n ')
print_result(get_discrepany_accuracy(vgg_results_50, fabnet_results_50,
                        test_files['real'].keys(), 
                        test_files['fake'].keys()))


### 75 VGG + Fabnet Metric

In [None]:
print('Repo Leaders + FF original + Imposters + Google \n')

print('\t Leaders \n ')
print_result(get_discrepany_accuracy(vgg_results_75, fabnet_results_75,
                                     ['bo','bs','dt','ew','hc','jb'], 
                                     ['bo','bs','dt','ew','hc','jb']))
print('\t FaceForensics \n ')
print_result(get_discrepany_accuracy(vgg_results_75, fabnet_results_75,
                        ['FF_{0:03d}'.format(f) for f in range(1000)], 
                        ['FF_{0:03d}'.format(f) for f in range(1000)]))
print('\t Google \n ')
print_result(get_discrepany_accuracy(vgg_results_75, fabnet_results_75,
                        ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                        ['GG_{0:02d}'.format(f+1) for f in range(28)]))
print('\t ALL \n ')
print_result(get_discrepany_accuracy(vgg_results_75, fabnet_results_75,
                        test_files['real'].keys(), 
                        test_files['fake'].keys()))


## AUC results

In [None]:
print('Repo Only Leaders')
print('\t VGG')
print('\t \t leaders {0:0.4f}'.format(get_auc(vgg_repo1, 
                                              ['bo','bs','dt','ew','hc','jb'], 
                                              ['bo','bs','dt','ew','hc','jb'])))
print('\t Fabnet Metric')
print('\t \t leaders {0:0.4f}'.format(get_auc(fabnet_repo1, 
                                              ['bo','bs','dt','ew','hc','jb'], 
                                              ['bo','bs','dt','ew','hc','jb'])))

print('Repo Leaders + FF original')
print('\t VGG')
print('\t \t leaders {0:0.4f}'.format(get_auc(vgg_repo2, 
                                              ['bo','bs','dt','ew','hc','jb'], 
                                              ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics {0:0.4f}'.format(get_auc(vgg_repo2, 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)])))
print('\t Fabnet Metric')
print('\t \t leaders {0:0.4f}'.format(get_auc(fabnet_repo2, 
                                              ['bo','bs','dt','ew','hc','jb'], 
                                              ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics {0:0.4f}'.format(get_auc(fabnet_repo2, 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)])))


print('Repo Leaders + FF original + Imposters')
print('\t VGG')
print('\t \t leaders {0:0.4f}'.format(get_auc(vgg_repo3, 
                                              ['bo','bs','dt','ew','hc','jb'], 
                                              ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics {0:0.4f}'.format(get_auc(vgg_repo3, 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)])))
print('\t Fabnet Metric')
print('\t \t leaders {0:0.4f}'.format(get_auc(fabnet_repo3, 
                                              ['bo','bs','dt','ew','hc','jb'], 
                                              ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics {0:0.4f}'.format(get_auc(fabnet_repo3, 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)])))






In [None]:
print('Repo Leaders + FF original + Imposters + Google')
print('\t VGG')
print('\t \t leaders {0:0.4f}'.format(get_auc(vgg_repo4, 
                                              ['bo','bs','dt','ew','hc','jb'], 
                                              ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics {0:0.4f}'.format(get_auc(vgg_repo4, 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)])))
print('\t \t Google {0:0.4f}'.format(get_auc(vgg_repo4, 
                                              ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                                              ['GG_{0:02d}'.format(f+1) for f in range(28)])))


print('\t Fabnet Metric')
print('\t \t leaders {0:0.4f}'.format(get_auc(fabnet_repo4, 
                                              ['bo','bs','dt','ew','hc','jb'], 
                                              ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics {0:0.4f}'.format(get_auc(fabnet_repo4, 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)])))
print('\t \t Google {0:0.4f}'.format(get_auc(fabnet_repo4, 
                                              ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                                              ['GG_{0:02d}'.format(f+1) for f in range(28)])))

print('\t Fabnet Pool')
print('\t \t leaders {0:0.4f}'.format(get_auc(fabnetPool_repo4, 
                                              ['bo','bs','dt','ew','hc','jb'], 
                                              ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics {0:0.4f}'.format(get_auc(fabnetPool_repo4, 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                              ['FF_{0:03d}'.format(f) for f in range(1000)])))
print('\t \t Google {0:0.4f}'.format(get_auc(fabnetPool_repo4, 
                                              ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                                              ['GG_{0:02d}'.format(f+1) for f in range(28)])))

### ACCURACY results

In [None]:
print('Repo Only Leaders')
print('\t \t leaders (real, fake) {}'.format(get_dis_accuracy(vgg_repo1, fabnet_repo1,
                                                       ['bo','bs','dt','ew','hc','jb'], 
                                                       ['bo','bs','dt','ew','hc','jb'])))

print('Repo Leaders + FF original')
print('\t \t leaders (real, fake) {}'.format(get_dis_accuracy(vgg_repo2, fabnet_repo2,
                                                       ['bo','bs','dt','ew','hc','jb'], 
                                                       ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics (real, fake) {}'.format(get_dis_accuracy(vgg_repo2, fabnet_repo2,
                                                             ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                                             ['FF_{0:03d}'.format(f) for f in range(1000)])))

print('Repo Leaders + FF original + Imposter')
print('\t \t leaders (real, fake) {}'.format(get_dis_accuracy(vgg_repo3, fabnet_repo3,
                                                       ['bo','bs','dt','ew','hc','jb'], 
                                                       ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics (real, fake) {}'.format(get_dis_accuracy(vgg_repo3, fabnet_repo3,
                                                             ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                                             ['FF_{0:03d}'.format(f) for f in range(1000)])))


print('Repo Leaders + FF original + Imposters + Google')
print('\t \t leaders (real, fake) {}'.format(get_dis_accuracy(vgg_repo4, fabnet_repo4,
                                                       ['bo','bs','dt','ew','hc','jb'], 
                                                       ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics (real, fake) {}'.format(get_dis_accuracy(vgg_repo4, fabnet_repo4,
                                                             ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                                             ['FF_{0:03d}'.format(f) for f in range(1000)])))
print('\t \t Google (real, fake) {}'.format(get_dis_accuracy(vgg_repo4, fabnet_repo4,
                                                             ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                                                             ['GG_{0:02d}'.format(f+1) for f in range(28)])))

In [None]:
print('Repo Leaders + FF original + Imposters + Google')
print('\t \t leaders (real, fake) {}'.format(get_dis_accuracy(fabnet_repo4,
                                                       ['bo','bs','dt','ew','hc','jb'], 
                                                       ['bo','bs','dt','ew','hc','jb'])))
print('\t \t FaceForensics (real, fake) {}'.format(get_dis_accuracy(fabnet_repo4,
                                                             ['FF_{0:03d}'.format(f) for f in range(1000)], 
                                                             ['FF_{0:03d}'.format(f) for f in range(1000)])))
print('\t \t Google (real, fake) {}'.format(get_dis_accuracy(fabnet_repo4,
                                                             ['GG_{0:02d}'.format(f+1) for f in range(28)], 
                                                             ['GG_{0:02d}'.format(f+1) for f in range(28)])))

# AUC

|                |Only  Leaders|| Leaders + FaceForensics|| Leaders + FaceForensics + Imposter || 
|----------------|-----|--------|------------|------------|----------------|--------------------|
|                |VGG  | Fabnet |     VGG    |   Fabnet   |     VGG        |     Fabnet         |
|                |     |        |            |            |                |                    |
|Leaders         |88.6 | 93.3   |    88.6    |    92.7    |      87.9      |         28.7       |
|Face Forensics  |NA   | NA     |    99.7    |    98.2    |      99.7      |         98.2       |


# ACCURACY

|                |Only  Leaders|| Leaders + FaceForensics|| Leaders + FaceForensics + Imposter || 
|----------------|-------------|--------------|--------------|--------------|--------------|--------------|
|                |True Positive| True Negative| True Positive| True Negative| True Positive|True Negative |
|                |             |              |              |              |              |              |
|Leaders         |99.4         | 26.6         |    98.9      |    39.9      |     97.5     |    99.2      |
|Face Forensics  |NA           | NA           |    99.4      |    97.1      |     99.4     |    97.2      |


# Distributions

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(10,14))

plot_dist(vgg_repo4, ['bo','bs','dt','ew','hc','jb'], 
          ['bo','bs','dt','ew','hc','jb'], 
          ax[0, 0], 'VGG: Leaders')
plot_dist(vgg_repo4, ['FF_{0:03d}'.format(f) for f in range(1000)], 
          ['FF_{0:03d}'.format(f) for f in range(1000)], ax[1, 0], 
          'VGG: FaceForensics')
plot_dist(vgg_repo4, ['GG_{0:02d}'.format(f+1) for f in range(28)], 
          ['GG_{0:02d}'.format(f+1) for f in range(28)], ax[2, 0], 
          'VGG: Google')

plot_dist(fabnet_repo4, ['bo','bs','dt','ew','hc','jb'], 
          ['bo','bs','dt','ew','hc','jb'], 
          ax[0, 1], 'Fabnet: Leaders')
plot_dist(fabnet_repo4, ['FF_{0:03d}'.format(f) for f in range(1000)], 
          ['FF_{0:03d}'.format(f) for f in range(1000)], ax[1, 1], 
          'Fabnet: FaceForensics')
plot_dist(fabnet_repo4, ['GG_{0:02d}'.format(f+1) for f in range(28)], 
          ['GG_{0:02d}'.format(f+1) for f in range(28)], ax[2, 1], 
          'Fabnet: Google')
plt.subplots_adjust(hspace=0.3)
