In [None]:
import pandas as pd
import numpy as np
import os
import fnmatch, re

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score,pairwise_distances

import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats
from scipy.stats import pearsonr

import altair as alt

In [None]:
def compute_error_rate(df):
    conf_mat = confusion_matrix(df["ground_truth"], df["Judgement.keys"], labels=["real", "fake"])
    row,_ = df.shape
    num_category = row/2
    print(row)
    real_err_rate = conf_mat[0,1]/num_category
    fake_err_rate = conf_mat[1,0]/num_category
    print(conf_mat)
    print("Error rate for real images:", real_err_rate)
    print("Error rate for fake images:", fake_err_rate)
    
    accuracy = accuracy_score(df["ground_truth"], df["Judgement.keys"])
    
    return accuracy, real_err_rate, fake_err_rate

def drop_demos(data):
    data = data.drop(range(0,3))
    data['ground_truth'] = data['ground_truth'].astype(int)
    data = data.replace(0,"fake")
    data = data.replace(1,"real")
    return data

def process_individual(file_path, file_name):
    participant_file_path = os.path.join(file_path, file_name)
    participant_data = pd.read_csv(participant_file_path)
    participant_data = drop_demos(participant_data)
    participant_data["result"] = participant_data.apply(lambda x: "correct" if x['ground_truth'] == x['Judgement.keys'] else "wrong", axis=1)
    data = participant_data[["Trial_num","image_name","image_path","ground_truth","Judgement.keys","result"]]
    rep1 = data.iloc[0:300]
    rep2 = data.iloc[300:]
    
    select_fake_rep1,_ = rep1[rep1["Judgement.keys"] == "fake"].shape
    #print(select_fake_rep1)
 
    select_fake_rep2,_ = rep2[rep2["Judgement.keys"] == "fake"].shape
    #print(select_fake_rep2)
    
    
    print("Repetition 1:")
    accuracy_rep1, real_err_rate_rep1, fake_err_rate_rep1 = compute_error_rate(rep1)
    print("Repetition 2:")
    accuracy_rep2, real_err_rate_rep2, fake_err_rate_rep2 = compute_error_rate(rep2)
    print("Overall:")
    accuracy_all, real_err_rate_all, fake_err_rate_all = compute_error_rate(data)

    
    wrong_rep1 = rep1[rep1["result"] == "wrong"]["image_name"]
    wrong_rep2 = rep2[rep2["result"] == "wrong"]["image_name"]

    wrong_in_both = list(set(wrong_rep1).intersection(wrong_rep2))
    print("Wrong in both:",len(wrong_in_both))
    
    encoded_misjudged = list(filter(lambda x: x.startswith('encoded'), wrong_in_both))
    
    real_misjudged = list(set(wrong_in_both) - set(encoded_misjudged))
    
    print("Number of encoded image been misjudged in both repetition:", len(encoded_misjudged)/150)
    print("Number of real image been misjudged in both repetition:", len(real_misjudged)/150)
    
    df_person = pd.DataFrame({'participant':file_name,
                              'real_err_rate_rep1':real_err_rate_rep1,
                              'fake_err_rate_rep1':fake_err_rate_rep1,
                              'real_err_rate_rep2':real_err_rate_rep2,
                              'fake_err_rate_rep2':fake_err_rate_rep2,
                              'real_err_rate_all':real_err_rate_all,
                              'fake_err_rate_all':fake_err_rate_all,
                              'accuracy_rep1': accuracy_rep1,
                              'accuracy_rep2': accuracy_rep2,
                              'accuracy_all': accuracy_all,
                              'select_fake_rep1':select_fake_rep1,
                              'select_fake_rep2':select_fake_rep2,
                              'real_misjudged_img_both_rep':[real_misjudged], 
                              'encoded_misjudged_img_both_rep':[encoded_misjudged], 
                              'num_real_misjudged_both_rep':len(real_misjudged),
                              'num_encoded_misjudged_both_rep':len(encoded_misjudged)}) 
    
    
    person_result = data[["result"]]
    
    return df_person, person_result


def process_all_people(file_path):
    files = list(filter(lambda f: os.path.isfile(os.path.join(file_path,f)), os.listdir(file_path)))

    df_stat = pd.DataFrame()
    
    trial_num_path = os.path.join(file_path, files[0])
    print(trial_num_path)
    
    aggregate = pd.read_csv(trial_num_path, usecols = ["Trial_num","image_name","image_path","ground_truth"])
    aggregate = drop_demos(aggregate)
    
    for person_data in files:
        if fnmatch.fnmatch(person_data, '*.csv'):
            df_person, person_result = process_individual(file_path, person_data)
            df_stat = df_stat.append(df_person, ignore_index=True)
            aggregate = pd.concat([aggregate, person_result], axis=1)
            print(aggregate)

    aggregate["all_res"] =  aggregate[aggregate.columns[4]].values.tolist()
    aggregate["num_correct"] = aggregate['all_res'].apply(lambda x: x.count("correct"))
    aggregate["num_wrong"] = aggregate['all_res'].apply(lambda x: x.count("wrong"))
    return df_stat,  aggregate 

In [None]:
## Read the data
data_path = "data/Experiment1"
all_people_real_fake, aggregate_result = process_all_people(data_path)

# Plot Error rate for each observer

In [None]:
sns.set_context(context='poster', font_scale=0.5)

data_for_boxplot = [all_people_real_fake['real_err_rate_all'], all_people_real_fake['fake_err_rate_all']]
fig1, ax1 = plt.subplots()
labels = ["Error rate for judging \n Real images", "Error rate for judging \n Generated images"]


ax1 = sns.swarmplot(data=data_for_boxplot,  palette=["#bdbdbd", "#2ca02c"], size = 8)

sns.despine()

ax1.set_title("Distribution of Observers' Error Rate")
ax1.set_xticklabels(labels)
ax1.set_ylabel("Observer's Error rate")
plt.show()

In [None]:
##Define color map

real_fake_color_palette = dict(real="#bdbdbd", fake="#2ca02c")


real_fake_hue_order = ["real","fake"]

In [None]:
## Add percent wrong column

aggregate_result["percent_wrong"] = aggregate_result["num_wrong"] / (aggregate_result["num_wrong"] + aggregate_result["num_correct"])

aggregate_result["percent_correct"] = 1 - aggregate_result["percent_wrong"]

rep = 300 * ["Rep 1"] + 300 * ["Rep 2"]

aggregate_result["Repeat"] = rep


In [None]:
## Splits 2 repeats. Match the 2nd repeat to first
rep1 = aggregate_result.iloc[0:300]
rep2 = aggregate_result.iloc[300:]

rep2 = rep2.set_index('image_name')
rep2 = rep2.reindex(index=rep1['image_name'])
rep2 = rep2.reset_index()

# Show aggregated error for fake and real judgement (Figure 2C)

In [None]:
aggregate_result.groupby("ground_truth").sum()/6000

In [None]:
rep1.groupby("ground_truth").sum()/3000

In [None]:
rep2.groupby("ground_truth").sum()/3000

# Plot distribution of error (Figure 2D)

In [None]:
real_fake_hue_order = ["fake", "real"]

real_fake_color_palette = dict(real="#bdbdbd", fake="#2ca02c")


plot_error_image = sns.displot(aggregate_result, x="percent_wrong", hue="ground_truth", col="Repeat", stat="percent", alpha=0.7,
                               binwidth=0.1, common_norm=False, fill=True, edgecolor= "white",linewidth=1,
                               hue_order=real_fake_hue_order,palette=real_fake_color_palette, legend = True)
sns.despine(offset=10, left=False, right = True)
plot_error_image.set(xticks=np.arange(0,1,0.1), yticks=np.arange(0,50,10))
#plot_error_image.set(xlabel=None, ylabel=None, xticklabels=[], yticklabels=[])
plot_error_image.set(xlabel='Percentage of observers misjudging judged an image', ylabel='Percentage of images')