In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
from functools import partial
sns.set()
sns.set_style("whitegrid")
import matplotlib
import matplotlib.pyplot as plt

In [None]:
df_path = '../data/competition_results.csv'
df = pd.read_csv(df_path)

In [None]:
legit_corrs = {
    'public_spearman': df[df['labels'].isin(['non-mem'])][['FID_public','distance_public']].corr(method='spearman').iloc[0, 1],
    'private_spearman': df[df['labels'].isin(['non-mem'])][['FID_private','distance_private']].corr(method='spearman').iloc[0, 1],
    'public_pearson': df[df['labels'].isin(['non-mem'])][['FID_public','distance_public']].corr(method='pearson').iloc[0, 1],
    'private_pearson': df[df['labels'].isin(['non-mem'])][['FID_private','distance_private']].corr(method='pearson').iloc[0, 1],
}
print(legit_corrs)

In [None]:
fontsize = 14
fontfamily = 'Times New Roman'
fig, ax = plt.subplots(figsize=(6,6))
ax.set_xlim((0, 140))
ax.set_ylim((0.15, 0.45))

sns.regplot(x="FID_public", y="distance_public", 
            data=df[df['labels'].isin(['non-mem'])][['FID_public', 'distance_public']],
            scatter_kws={"s": 3}, truncate=False, ax=ax, label='public')
ax.text(25, 0.375, f"corr(Pearson) = {legit_corrs['public_pearson']:.4f}", fontsize=fontsize, # color='C1',
        bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 5})

sns.regplot(x="FID_private", y="distance_private", 
            data=df[df['labels'].isin(['non-mem'])][['FID_private', 'distance_private']],
            scatter_kws={"s": 3}, truncate=False, ax=ax, label='private')
ax.text(60, 0.19, f"corr(Pearson) = {legit_corrs['private_pearson']:.4f}", fontsize=fontsize, # color='C0',
        bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 5})
ax.legend(prop={'size': fontsize, 'family': fontfamily})
ax.set_xlabel('FID', fontsize=fontsize, fontfamily=fontfamily)
ax.set_ylabel('memorization distance', fontsize=fontsize, fontfamily=fontfamily)

# plt.savefig('fid-mem_dist-corr.pdf', bbox_inches='tight')

In [None]:
fontsize = 14
fontfamily = 'Times New Roman'
fig, ax = plt.subplots(figsize=(6,6))
ax.set_xlim((0.15, 0.45))
ax.set_ylim((0, 100))
mems = ['aug', 'sup', 'mgan', 'ae']
print(df[df['labels'].isin(mems)]["distance_private"].shape)
sns.histplot(df[df['labels'].isin(mems)]["distance_private"], 
             label="memorized", kde=False, ax=ax, color='C0', binwidth=0.00275)
print(df[df['labels'].isin(['non-mem'])]["distance_private"].shape)
sns.histplot(df[df['labels'].isin(['non-mem'])]["distance_private"], 
             label="non-memorized", kde=False, ax=ax, color='C1', binwidth=0.00275)
ax.legend(prop={'size': fontsize, 'family': fontfamily})
ax.set_xlabel('memorization distance', fontsize=fontsize, fontfamily=fontfamily)
ax.set_ylabel('Count', fontsize=fontsize, fontfamily=fontfamily)
# plt.savefig('dist-histogram-private.pdf', bbox_inches='tight')

In [None]:
fontsize = 14
fontfamily = 'Times New Roman'
fig, ax = plt.subplots(figsize=(6,6))
ax.set_xlim((0.05, 0.3))
ax.set_ylim((0, 100))
mems = ['aug', 'sup', 'mgan', 'ae']
print(df[df['labels'].isin(mems)]["distance_public"].shape)
sns.histplot(df[df['labels'].isin(mems)]["distance_public"], 
             label="memorized", kde=False, ax=ax, color='C0')
print(df[df['labels'].isin(['non-mem'])]["distance_public"].shape)
sns.histplot(df[df['labels'].isin(['non-mem'])]["distance_public"], 
             label="non-memorized", kde=False, ax=ax, color='C1')
ax.legend(prop={'size': fontsize, 'family': fontfamily})
ax.set_xlabel('memorization distance', fontsize=fontsize, fontfamily=fontfamily)
ax.set_ylabel('Count', fontsize=fontsize, fontfamily=fontfamily)
# plt.savefig('dist-histogram-public.pdf', bbox_inches='tight')

In [None]:
fontsize = 14
fontfamily = 'Times New Roman'
sns.set_style("whitegrid")
fig, ax = plt.subplots(figsize=(6,6))
ax.set_xlim((0, 120))
ax.set_ylim((20, 140))
sns.scatterplot(x="FID_public", y="FID_private", hue='labels', style='labels', s=50,
                data=df[df['labels'].notnull()], ax=ax, 
                hue_order=['non-mem', 'mgan', 'aug', 'sup', 'ae'],
                palette={'aug': 'C0', 'sup': 'C1', 'mgan': 'C2', 'non-mem': 'C3', 'ae': 'C4'},)

ax.legend(loc='best', prop={'size': fontsize, 'family': fontfamily})
ax.set_xlabel('FID Inception (public)', fontsize=fontsize, fontfamily=fontfamily)
ax.set_ylabel('FID NASNet (private)', fontsize=fontsize, fontfamily=fontfamily)

# plt.savefig('mem-fid.pdf', bbox_inches='tight')

In [None]:
fontsize = 14
fontfamily = 'Times New Roman'
sns.set_style("whitegrid")
fig, ax = plt.subplots(figsize=(6,6))
ax.set_xlim((0, 0.3))
ax.set_ylim((0.15, 0.45))
sns.scatterplot(x="distance_public", y="distance_private", hue='labels', style='labels', s=50,
                data=df[df['labels'].notnull()], ax=ax, 
                hue_order=['non-mem', 'mgan', 'aug', 'sup', 'ae'],
                palette={'aug': 'C0', 'sup': 'C1', 'mgan': 'C2', 'non-mem': 'C3', 'ae': 'C4'},)

ax.legend(loc='best', prop={'size': fontsize, 'family': fontfamily})
ax.set_xlabel('mem-distance Inception (public)', fontsize=fontsize, fontfamily=fontfamily)
ax.set_ylabel('mem-distance NASNet (private)', fontsize=fontsize, fontfamily=fontfamily)

# plt.title("Manual Labels of different cheating methods and the scores distribution")

# plt.show(ax)
# plt.savefig('mem-distance.pdf', bbox_inches='tight')

In [None]:
fid_corr_spearman = df[['FID_private','FID_public']].corr(method='spearman').iloc[0, 1]

fontsize = 14
fontfamily = 'Times New Roman'
fig, ax = plt.subplots(figsize=(6,6))
ax.set_xlim((0, 600))
ax.set_ylim((0, 600))

sns.regplot(x="FID_public", y="FID_private", 
            data=df, scatter_kws={"s": 3}, ax=ax)
ax.text(250, 150, f'corr(Spearman)= {fid_corr_spearman:.4f}', 
        fontsize=fontsize, fontfamily=fontfamily,
        bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 5})

ax.set_xlabel('FID Inception (public)', fontsize=fontsize, fontfamily=fontfamily)
ax.set_ylabel('FID NASNet (private)', fontsize=fontsize, fontfamily=fontfamily)
# plt.savefig('fid-public-private.pdf', bbox_inches='tight')

In [None]:
# calculate the mean abs rank diff
public_rank = df['FID_public'].argsort().argsort()
private_rank = df['FID_private'].argsort().argsort()
mean_abs_rank_diff = (public_rank - private_rank).abs().mean()
print(mean_abs_rank_diff, public_rank.shape, private_rank.shape)

public_rank_legit = df[df['labels'].isin(['non-mem'])]['FID_public'].argsort().argsort()
private_rank_legit = df[df['labels'].isin(['non-mem'])]['FID_private'].argsort().argsort()
mean_abs_rank_diff_legit = (public_rank_legit - private_rank_legit).abs().mean()
print(mean_abs_rank_diff_legit, public_rank_legit.shape, private_rank_legit.shape)

i = 5
top_i_indices_public = df[df['labels'].isin(['non-mem'])]['FID_public'].argsort().iloc[:i].values
top_i_indices_private = df[df['labels'].isin(['non-mem'])]['FID_private'].argsort().iloc[:i].values
print(top_i_indices_public)
print(top_i_indices_private)
print(len(set(top_i_indices_public).intersection(top_i_indices_private)))