# Graphing Results

In [None]:
# This is a notebook for data visualization for FairPATE
DATA_PATH = 'colormnist_100_search.csv'
plot_title = "CMNIST Dataset 100 Models Ensemble"


In [None]:
pwd

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import json
import ast

In [None]:
# read the dataframe
df = pd.read_csv(DATA_PATH)
df.head()

In [None]:
# the csv is messy: there are white spaces before some column names. 
df.columns

In [None]:
# there is a problem: the ' fairness disparity gaps' column holds the dict, but as a string
# so I need to convert it
df['fair_dict'] = df[' fairness disparity gaps'].transform(lambda x: ast.literal_eval(x))

# only then am I able to fetch the smallest value
df['highest_fair_violation'] = df['fair_dict'].transform(lambda x: max(x.values()))
df.head()


In [None]:
sns.set(style = "darkgrid")

fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')

x = df['highest_fair_violation']
y = df[' achieved budget']
z = df[' accuracy']

ax.set_xlabel("Fairness Violation")
ax.set_ylabel("Privacy Budget")
ax.set_zlabel("Accuracy")

ax.scatter(x, y, z)
plt.tight_layout()
plt.show()

In [None]:
# bucket the fairness gap values
df['fairness'] = pd.cut(df['highest_fair_violation'], bins=3)

In [None]:
# bucket the accuracy values
df['binned accuracy'] = pd.cut(df[' accuracy'], bins=3)

In [None]:
# also need to bin the number of query answered
df['number answered'] = pd.cut(df[' number answered'], bins=3)

In [None]:
# plot with different colours and with accuracy as colours
ax = sns.scatterplot(data=df, 
                x=' achieved budget', 
                y='highest_fair_violation',
                hue='binned accuracy', 
                palette = 'mako', 
                legend=True,
                s=80
                )
ax.set(title=plot_title)
ax.set(xlabel='achieved budget', ylabel='fairness gap')
ax.legend(title='accuracy',loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)

In [None]:
# plot with fairness gap as line styles and number of queries answered as colours
df = df.sort_values(" achieved budget")
ax = sns.lineplot(data=df, 
                x=' achieved budget', 
                y=' accuracy',
                hue='number answered',
                style='fairness',
                palette='magma',
                legend=True
                )
ax.set(title=plot_title)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)

In [None]:
# plot with different colours and with fairness gap as colours
ax = sns.scatterplot(data=df, 
                x=' achieved budget', 
                y=' number answered',
                hue='number answered',
                palette='magma',
                legend=True,
                s=80
                )
ax.set(title=plot_title)
# ax.legend(title='fairness gap', loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)

norm = plt.Normalize(df['highest_fair_violation'].min(), df['highest_fair_violation'].max())
sm = plt.cm.ScalarMappable(cmap="magma", norm=norm)
sm.set_array([])
ax.get_legend().remove()
ax.figure.colorbar(sm, label="fairness")


In [None]:
# plot with different opacity
# darker colour means larger fairness gap (will add legend later)
alpha = [0.7, 0.5, 0.3]
for i in range(3):
  data=df[df.fairness.cat.codes == i]

  plt.scatter(data[' achieved budget'], data[' accuracy'], 
              alpha = alpha[i], color = 'olive', s = 80)

plt.xlabel("privacy budget")
plt.ylabel("accuracy")
plt.show()

In [None]:
# also need to bin the number of query answered
df['number answered'] = pd.cut(df[' number answered'], bins=3)

In [None]:
g = sns.FacetGrid(df, row='number answered', hue='fairness', palette='magma')
g.map(sns.scatterplot, ' achieved budget', ' accuracy')
g.add_legend(title='fairness gap', loc='center left', bbox_to_anchor=(0.8, 0.5))