In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score, confusion_matrix
import matplotlib.pyplot as plt

import pickle
import json
import numpy as np
import matplotlib as mpl
import os

In [None]:
mpl.style.use('seaborn-v0_8-colorblind')
plt.rcParams["font.family"] = "serif"

### 3.1 Experiments

In [None]:
models = ["hyperprior", "mbt2018", "cheng2020-attn", "qres17m", "qarv"]
races = ['Indian', 'Asian', 'African', 'Caucasian']
qualities = ["q_0001", "q_0009", "q_1", "q_2", "q_3"]
categories = ['skin_type', 'eye_type', 'nose_type', 'lip_type', 'hair_type', 'hair_color']
category_names = ['Skin Type', 'Eye Type', 'Nose Type', 'Lip Type', 'Hair Type', 'Hair Color']
qres = ["1", "3", "6", "9", "12"]
qarv = ["lmb_1", "lmb_4", "lmb_8", "lmb_16", "lmb_32" ]
markers = ['s', 'o', '^', '*', 'D']
linestyles = ['solid', 'dashed', 'dashdot', 'dotted', "dotted"]

In [None]:
## for a model and generate accuracies for all rates
def generate_results(dataset="celebA", model="hyperprior"):
    rates = []
    results = {}
    for i, q in enumerate(qualities):
        results[q] = {}
        #Iterate through qualities and pull dictionaries
        with open(f'/media/global_data/fair_neural_compression_data/final_predictions/{model}/{dataset}/{q}/sep_predictions.pkl', 'rb') as f:
            all_predictions = pickle.load(f)
        with open(f'/media/global_data/fair_neural_compression_data/final_predictions/hyperprior/celebA/clean/sep_labels.pkl', 'rb') as f:
            all_labels = pickle.load(f)
        
        if model == "qarv":
            with open(f'/media/global_data/fair_neural_compression_data/decoded_rfw/decoded_64x64/qarv/celebA/{qarv[i]}/stats.json', 'r') as json_file:
                        data_dict = json.load(json_file)
            bpp = data_dict['results']['bpp']
            pass
        elif model == "qres17m":
            with open(f'/media/global_data/fair_neural_compression_data/decoded_rfw/decoded_64x64/qres17m_lmb_64/celebA/{qres[i]}/stats.json', 'r') as json_file:
                        data_dict = json.load(json_file)
            bpp = data_dict['results']['bpp']
        else:
            with open(f'/media/global_data/fair_neural_compression_data/decoded_rfw/decoded_64x64/{model}/{dataset}/{q}/stats.json', 'r') as json_file:
                        data_dict = json.load(json_file)
            bpp = data_dict['est_bpp']
        rates.append(bpp)
        for race in races:
            results[q][race] = {}
            for cat in categories:
                if cat == 'skin_type':
                    pass
                pred = all_predictions[race][cat]
                labels = all_labels[race][cat]
                score = accuracy_score(pred, labels)
                results[q][race][cat] = score
                
        
    temp = {}
    for cat in categories:
        temp[cat] = {}
        for race in races:
            temp[cat][race] = [results[q][race][cat] for q in qualities]
    return temp, rates

In [None]:
with open(f'/media/global_data/fair_neural_compression_data/predictions/hyperprior/celebA/clean/sep_labels.pkl', 'rb') as f:
     pred_dict = pickle.load(f)

In [None]:
counts = []
for race in races:
    counts.append(len(pred_dict[race]["eye_type"]))
counts

In [None]:
dataset = "fairface"
fig, axes = plt.subplots(len(categories)//2, 2, figsize=(6, 9))


## Get rates
for z, model in enumerate(models):
    temp, rates = generate_results(model=model)
    for i, cat in enumerate(categories):
        accs = []
        axes[i%3][i//3].set_title(category_names[i])
        for j, q in enumerate(qualities):
            yurg = []
            for race in races:
                yurg.append(temp[cat][race][j])
           
            accs.append(np.average(yurg, weights=counts))
        axes[i%3][i//3].plot(rates, accs, label = model, linestyle = linestyles[z], marker = markers[z])
        axes[2][i//3].set_xlabel("bpp")
        axes[i%3][0].set_ylabel("Accuracy")
        
axes[2][1].legend(bbox_to_anchor=(0.7, -0.2), ncol = 3)

fig.subplots_adjust(hspace=0.25, wspace=0.25)

axes[0][0].set_ylim(0.37, 0.57)
axes[0][1].set_ylim(0.62, 0.82)


axes[1][0].set_ylim(0.75, 0.95)
axes[1][1].set_ylim(0.55, 0.75)

axes[2][0].set_ylim(0.55, 0.75)
axes[2][1].set_ylim(0.62, 0.82)

plt.show()

    
    
    ## Weighted average across races


### 3.2 Experiments

In [None]:
races = ['Indian', 'Asian', 'African', 'Caucasian']
categories = ['skin_type', 'eye_type', 'nose_type', 'lip_type', 'hair_type', 'hair_color']
category_names = ['Skin Type', 'Eye Type', 'Nose Type', 'Lip Type', 'Hair Type', 'Hair Color']
lambda_file_names = ["1"]
lambda_values = [1]
data_rate_values = ["clean"]
qualities = ["q_0001", "q_0009", "q_1", "q_2", "q_3"]
qres = ["1", "3", "6", "9", "12"]
qarv = ["lmb_1", "lmb_4", "lmb_8", "lmb_16", "lmb_32" ]
rates = []
race_markers = ['s', 'o', '^', '*']

In [None]:
## for a model and generate accuracies for all rates
def generate_results(dataset="celebA", model="hyperprior"):
    rates = []
    results = {}
    for i, q in enumerate(qualities):
        results[q] = {}
        #Iterate through qualities and pull dictionaries
        with open(f'/media/global_data/fair_neural_compression_data/final_predictions/{model}/{dataset}/{q}/sep_predictions.pkl', 'rb') as f:
            all_predictions = pickle.load(f)
        with open(f'/media/global_data/fair_neural_compression_data/final_predictions/hyperprior/celebA/clean/sep_labels.pkl', 'rb') as f:
            all_labels = pickle.load(f)

        if model == "qarv":
            with open(f'/media/global_data/fair_neural_compression_data/decoded_rfw/decoded_64x64/qarv/celebA/{qarv[i]}/stats.json', 'r') as json_file:
                        data_dict = json.load(json_file)
            bpp = data_dict['results']['bpp']

        elif model == "qres17m":
            with open(f'/media/global_data/fair_neural_compression_data/decoded_rfw/decoded_64x64/qres17m_lmb_64/celebA/{qres[i]}/stats.json', 'r') as json_file:
                        data_dict = json.load(json_file)
            bpp = data_dict['results']['bpp']
        else:
            with open(f'/media/global_data/fair_neural_compression_data/decoded_rfw/decoded_64x64/{model}/{dataset}/{q}/stats.json', 'r') as json_file:
                        data_dict = json.load(json_file)
            bpp = data_dict['est_bpp']
        rates.append(bpp)
        merged_skin_type={
              'African':(5, 4), 
              'Asian':(3, 2), 
              'Caucasian':(2, 1), 
              'Indian':(3, 2)
        }
        for race in races:
            results[q][race] = {}
            for cat in categories:
                pred = all_predictions[race][cat]
                labels = all_labels[race][cat]
                if cat == 'skin_type':
                    pred[pred==merged_skin_type[race][0]] = merged_skin_type[race][1]
                    labels[labels==merged_skin_type[race][0]] = merged_skin_type[race][1]
                    if race =="Caucasian":
                        pred[pred==3] = 1
                        labels[labels==3] = 1                       
                    print(max(labels), race, q)
                    print(confusion_matrix(labels, pred, labels=[0, 1, 2, 3, 4, 5]))
                    # pass
                score = accuracy_score(pred, labels)
                results[q][race][cat] = score
                # print(q, race, cat)
                # print(confusion_matrix(labels, pred))
        
    temp = {}
    for cat in categories:
        temp[cat] = {}
        for race in races:
            temp[cat][race] = [results[q][race][cat] for q in qualities]
    return temp, rates

In [None]:
def get_clean_pred(dataset="celebA", model="hyperprior"):
    with open(f'/media/global_data/fair_neural_compression_data/final_predictions/{model}/{dataset}/clean/sep_predictions.pkl', 'rb') as f:
                all_predictions = pickle.load(f)
    with open(f'/media/global_data/fair_neural_compression_data/final_predictions/hyperprior/celebA/clean/sep_labels.pkl', 'rb') as f:
                all_labels = pickle.load(f)
    results = {}
    merged_skin_type={
            'African':(5, 4), 
            'Asian':(3, 2), 
            'Caucasian':(2, 1), 
            'Indian':(3, 2)
    }
    for race in races:
            results[race] = {}
            for cat in categories:
                pred = all_predictions[race][cat]
                labels = all_labels[race][cat]
                if cat == 'skin_type':
                    pred[pred==merged_skin_type[race][0]] = merged_skin_type[race][1]
                    labels[labels==merged_skin_type[race][0]] = merged_skin_type[race][1]
                    if race =="Caucasian":
                        pred[pred==3] = 1
                        labels[labels==3] = 1 
                results[race][cat] = accuracy_score(pred, labels)
    return results

In [None]:
model = "mbt2018"

temp, rates = generate_results(model=model)
ff_temp, ff_rates = generate_results("fairface", model=model)

In [None]:
results = get_clean_pred()
ff_results = get_clean_pred()

In [None]:
mpl.style.use('seaborn-v0_8-colorblind')
#plt.rcParams["font.family"] = "lucida-console"

In [None]:
print(results)

In [None]:
def plot_races_and_fairness(temp, rates, results, trained_on, figure_name, write_image=False):
    fig, axes = plt.subplots(len(categories)//2, 2, figsize=(6, 9), sharey=True)
    
    #colors = ['b', 'y', 'g', 'r']
    
    for i, cat in enumerate(categories):
        zz = []
        axes[i%3][i//3].set_title(category_names[i])
        for j, race in enumerate(races):
            axes[i%3][i//3].plot(rates, temp[cat][race], marker=race_markers[j], label=race,) #color = colors[j],
            zz.append(temp[cat][race])
        #print(zz)
        axes[i%3][i//3].plot(rates, np.array(zz).max(0) - np.array(zz).min(0), marker=race_markers[j], color = 'black', label="Fairness")
        print(np.array(zz).max(0) - np.array(zz).min(0))

    for j, race in enumerate(races):
        for i, cat in enumerate(categories):
            axes[i%3][i//3].axhline(results[race][cat], linestyle="--", color=plt.rcParams['axes.prop_cycle'].by_key()['color'][j] )#color=colors[j])
    axes[2][1].legend(bbox_to_anchor=(0.7, -0.2), ncol = 3)
    plt.suptitle(model)
    if write_image:
        fig.savefig(os.path.join('../../plots/new_training_merge_skintype', model, f'{model}_{trained_on}_{figure_name}.png'), dpi=200)
    plt.show()

In [None]:
plot_races_and_fairness(temp, rates, results, 'celeba', 'racial_disparity', write_image=False)

In [None]:
plot_races_and_fairness(ff_temp, ff_rates, ff_results, 'fairface', 'racial_disparity', write_image=True)

In [None]:
def plot_races_and_fairness_single(temp, rates, results):
    categories = ["eye_type"]
    fig, axes = plt.subplots(len(categories), figsize=(3.3, 3.3), sharey=True)
    
    colors = ['b', 'y', 'g', 'r']
    axes.set_title("Accuracy vs Rate (by Group)")
    for i, cat in enumerate(categories):
        zz = []

        for j, race in enumerate(races):
            axes.plot(rates, temp[cat][race], marker=race_markers[j], color=plt.rcParams['axes.prop_cycle'].by_key()['color'][j], label=race)

            zz.append(temp[cat][race])
        axes.plot(rates, np.array(zz).max(0) - np.array(zz).min(0), marker="d", linestyle = "dashdot" , color = plt.rcParams['axes.prop_cycle'].by_key()['color'][5], label="bias")
        #print(np.array(zz).max(0) - np.array(zz).min(0))

    for j, race in enumerate(races):
        for i, cat in enumerate(categories):
            axes.axhline(results[race][cat], linestyle="--", color=plt.rcParams['axes.prop_cycle'].by_key()['color'][j])
    axes.legend(bbox_to_anchor=(1.6, 0.7))
    axes.set_xlabel("bpp")
    axes.set_ylabel("Accuracy")
    plt.show()


In [None]:
plot_races_and_fairness_single(temp, rates, results)
# todo: keep which??

### 3.3

In [None]:
def all_fairness(temp, rates, trained_on, write_image=False):
    fig, axes = plt.subplots(1, figsize=(3.3, 3.3), sharey=True)

    #colors = ['b', 'y', 'g', 'r', "darkviolet", "slategrey"]
    markers = ['s', 'o', '^', '*', "v", "h"]

    axes.set_title(f"Fairness, {model}, {trained_on}")
    
    for i, cat in enumerate(categories):
        zz = []
        for j, race in enumerate(races):
            #axes[i].plot(rates, temp[cat][race], marker=race_markers[j], color = colors[j], label=race)
            zz.append(temp[cat][race])
        axes.plot(rates, np.array(zz).max(0) - np.array(zz).min(0), marker=markers[i], label=cat)
    axes.legend(bbox_to_anchor=(1.05, 0.7))
    axes.set_xlabel("bpp")
    axes.set_ylabel("bias")

    #for j, race in enumerate(races):
    #    for i, cat in enumerate(categories):
    #        axes[i].axhline(results[race][cat], linestyle="--", color=colors[j])
    figure_name= 'all_fairness'
    if write_image:
        fig.savefig(os.path.join('../../plots/new_training_merge_skintype', model, f'{model}_{trained_on}_{figure_name}.png'), bbox_inches='tight', dpi=200)
        
    plt.show()

In [None]:
all_fairness(temp, rates, 'celeba', write_image=True)

In [None]:
all_fairness(ff_temp, ff_rates, 'fairface', write_image=True)

In [None]:
def plot_fairness_comparison(temp, ff_temp, rates, write_image=False):
    categories = ["skin_type", "eye_type", "hair_color"]
    fig, axes = plt.subplots(1, figsize=(3.3, 3.3), sharey=True)
    markers = ['s', 'o', '^', '*', "v", "D"]

    axes.set_title(f"FairFace/celebA Comparison, {model}")
    
    for i, cat in enumerate(categories):
        zz = []
        yy = []
        for j, race in enumerate(races):
            #axes[i].plot(rates, temp[cat][race], marker=race_markers[j], color = colors[j], label=race)
            zz.append(temp[cat][race])
            yy.append(ff_temp[cat][race])
        axes.plot(rates, np.array(zz).max(0) - np.array(zz).min(0), marker=markers[i],  linestyle = "-", label=f"{cat} (celebA)", color=plt.rcParams['axes.prop_cycle'].by_key()['color'][i])
        axes.plot(rates, np.array(yy).max(0) - np.array(yy).min(0), marker=markers[i], linestyle = "--", label=f"{cat} (Fairface)", color = plt.rcParams['axes.prop_cycle'].by_key()['color'][i])
    axes.legend(bbox_to_anchor=(1.05, 0.7))
    axes.set_xlabel("bpp")
    axes.set_ylabel("bias")
        #print(np.array(zz).max(0) - np.array(zz).min(0))
    
        
    #for i, cat in enumerate(categories):
       # max_acc = 0
       # min_acc = 1
       # for j, race in enumerate(races):
       #     value = results[race][cat]
       #     if value > max_acc:
                #max_acc = value
       #     if value < min_acc:
                #min_acc = value
        #axes[i].axhline(max_acc - min_acc, linestyle="--", color=colors[j])
    if write_image:
        fig.savefig(os.path.join('../../plots/new_training_merge_skintype', model, f'{model}_dataset_comp.png'), bbox_inches='tight', dpi=200)
    
    plt.show()

In [None]:
def compute_fairness(temp, ff_temp): 
    model_fairness = {}
    model_fairness["celebA"] = {}
    model_fairness["fairface"] = {}
    for i, cat in enumerate(categories):
        zz = []
        yy = []
        for j, race in enumerate(races):
            zz.append(temp[cat][race])
            yy.append(ff_temp[cat][race])
        model_fairness["celebA"][cat] = (np.array(zz).max(0) - np.array(zz).min(0)).tolist()
        model_fairness["fairface"][cat] = (np.array(yy).max(0) - np.array(yy).min(0)).tolist()
    return model_fairness
    

In [None]:
#model_list = ["hyperprior", "qres17m", "cheng2020-attn", "mbt2018", "qarv"]

#all_fairness = {}

In [None]:
#for model in model_list:
#    temp, rates = generate_results(model=model)
#    ff_temp, ff_rates = generate_results("fairface", model=model)
#    all_fairness[model] = compute_fairness(temp, ff_temp)
#all_fairness

In [None]:
### Dump Fairness value to json

#import json
#with open('sep_fairness.json', 'w') as fp:
#    json.dump(all_fairness, fp)

In [None]:
plot_fairness_comparison(temp, ff_temp, rates, write_image=True)