In [None]:
#@title Run this to download data and prepare the environment.
! wget -O data.csv 'https://storage.googleapis.com/inspirit-ai-data-bucket-1/Data/AI%20Scholars/Sessions%206%20-%2010%20(Projects)/Project%20-%20Fairness/data.csv' &> /dev/null
! wget -O metric_diagrams.zip 'https://storage.googleapis.com/inspirit-ai-data-bucket-1/Data/AI%20Scholars/Sessions%206%20-%2010%20(Projects)/Project%20-%20Fairness/metric_diagrams.zip' &> /dev/null
! unzip -oq metric_diagrams.zip
! pip install aif360 fairlearn==0.4.6 &> /dev/null

from IPython.display import Image, display, Markdown
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"

SKILLS = [
    "Java",
    "Python",
    "Recruiting",
    "Web_Development",
    "Databases",
    "Machine_Learning",
    "Materials",
    "AutoCAD",
    "Data_Science",
    "Art",
    "Design",
    "Marketing",
    "Finance",
    "Accounting",
    "Writing",
    "Cloud_Computing",
    "Unix",
    "Windows",
    "Teamwork",
    "Organization",
]

HOBBIES = [
    "Basketball",
    "Tennis",
    "Swimming",
    "Running",
    "Chess",
    "Painting",
    "Hand_Stand",
]

PROTECTED = [
    "URM",
    "Female",
    "Disability",
]

OTHER = [
    "Years_Experience",
    "GPA",
    "Prestigious_University",
]

COLUMNS = ["Interview"] + PROTECTED + OTHER + SKILLS + HOBBIES

SKILLS_AND_HOBBIES = SKILLS + HOBBIES
FEATURES = SKILLS_AND_HOBBIES + OTHER + PROTECTED
FEATURES_WITHOUT_PROTECTED = SKILLS_AND_HOBBIES + OTHER

from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric
import sklearn.model_selection

def tuples_to_dict(t):
  return {
      "Female":[t[0]],
      "URM":[t[1]],
      "Disability":[t[2]]
  }

def get_aif_dataset(df, preds=None, scores=None):
  preds_df = df.copy()
  if preds is not None:
    preds_df["Interview"] = preds

  dataset= BinaryLabelDataset(
      df = preds_df,
      label_names = ["Interview"],
      protected_attribute_names = PROTECTED,
  )

  if scores is not None:
    dataset.scores = scores.reshape(-1,1)
  return dataset

def get_metric(predictions, privileged_groups, unprivileged_groups):
  dataset = BinaryLabelDataset(
      df = data_test,
      label_names = ["Interview"],
      protected_attribute_names = PROTECTED,
  )

  preds_df = data_test.copy()
  preds_df["Interview"] = predictions

  dataset_pred = BinaryLabelDataset(
      df = preds_df,
      label_names = ["Interview"],
      protected_attribute_names = PROTECTED,
  )

  return ClassificationMetric(
      dataset,
      dataset_pred,
      privileged_groups = [tuples_to_dict(t) for t in privileged_groups],
      unprivileged_groups = [tuples_to_dict(t) for t in unprivileged_groups]
  )




# Recap: Last Time

**Questions:**
* What did we do last time?
* Which models performed well in terms of accuracy?
* What do the terms **statistical parity difference**, **false negative rate difference** and **false positive rate difference** mean?

**Exercise:** Describe the **statistical parity difference**, and **false negative rate difference** and **false positive rate difference** in the following diagrams:



In [None]:
display(Image('metric_diagrams/Fairness Metrics.png'))
display(Image('metric_diagrams/Fairness Metrics (1).png'))
display(Image('metric_diagrams/Fairness Metrics (2).png'))
display(Image('metric_diagrams/Fairness Metrics (3).png'))
display(Image('metric_diagrams/Fairness Metrics (4).png'))
display(Image('metric_diagrams/Fairness Metrics (5).png'))


**Questions**:
* How did we try to improve fairness metrics?
* What were some tradeoffs that we needed to make?

# Today

Today we'll look at some more advanced techniques in machine learning fairness.

As always, we start by loading in the data. We also copy the `report_fairness_metrics` function from the last notebook. Remind yourself of what it does.

In [None]:
data = pd.read_csv("data.csv")
data_train, data_test = sklearn.model_selection.train_test_split(data, test_size=0.2, random_state=1)
x_train = data_train[FEATURES]
x_test = data_test[FEATURES]
y_train = data_train["Interview"]
y_test = data_test["Interview"]


def return_fairness_metrics(predictions, privileged_groups, unprivileged_groups):
  metrics = get_metric(predictions, privileged_groups, unprivileged_groups)
  spd = metrics.statistical_parity_difference()
  fnrd = metrics.false_negative_rate(privileged=True) - metrics.false_negative_rate(privileged=False)
  fprd = metrics.false_positive_rate(privileged=False) - metrics.false_positive_rate(privileged=True)
  aod = metrics.average_odds_difference()
  acc = metrics.accuracy()
  return spd, fnrd, fprd, aod, acc


def report_fairness_metrics(predictions, privileged_groups, unprivileged_groups):
  spd, fnrd, fprd, aod, acc = return_fairness_metrics(predictions, privileged_groups, unprivileged_groups)

  print("Statistical parity difference: ", end="")
  print(round(spd, 3))
  print("False negative rate difference: ", end="")
  print(round(fnrd, 3))
  print("False positive rate difference: ", end="")
  print(round(fprd, 3))
  print("Average odds difference: ", end="")
  print(round(aod, 3))
  print("Accuracy: ", end="")
  print(round(acc, 3))

Let's also train a Logistic Regression model on the data and remind ourselves how it does in terms of accuracy and fairness.

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(x_train, y_train)
report_fairness_metrics(lr.predict(x_test), [(0,0,0)], [(1,0,0)])

# Reject Option Classfication

Now that we have our model, we'd like to make it more fair. One way to make our predictions more fair is to leverage the probability/uncertainty interpretation of logistic regression. As you know, logistic regression outputs a score for each input which can be interpreted as a probability between 0 and 1.

Run the next cell to see our model's predicted probabilities on the test set.

In [None]:
lr.predict_proba(x_test)

**Questions**:
* How do you interpret these valuies?
* How do we get from these values to the actual predictions of the model?

Because we are doing binary classification, a probablity of 1 means the candidate was offered an interview wherease that of 0 means that they weren't. Thus, it's enough to just look at the probability that someone gets an interview. We extract this information with the following:

In [None]:
lr.predict_proba(x_test)[:, 1]

## Thresholds and Certainty

Here's an idea. Because our model is biased against the unprivileged group, **when the model is "uncertain", we could favor the unprivileged group**. For example if the model is uncertain about an applicant in the unprivileged group, give them an interview, however, if a model is uncertain about an applicant in the privileged group, don't give them an interview.

The hope here is that this procedure counteracts the bias in the model.

**Question:** How would you define when the logistic regression is uncertain?

**Exercise**: Look at some of the predictions from before. Find one case where the model was **certain** and one where the model was **uncertain**.

Let's remember some key definitions:
* A **threshold** is a value between 0 and 1 such that when the model's predicted pobability exceeds the threshold, the data point is classified as positive. Typically just 0.5.
* An **uncertainty margin** is how close to the threshold the predicted probability is to be considered 'uncertain'.


**Exercise:** Use the definition of threshold and margin to write a function that converts from a probability to one of three predictions: "Interview", "No Interview" and "Uncertain".

In [None]:
def probability_to_prediction(p, threshold, margin):
  ### BEGIN CODE HERE ###
  # Replace TODO's with the correct condition
  if "TODO":
    return "Interview"
  if "TODO":
    return "No Interview"
  else:
    return "Uncertain"
  ### END CODE HERE ###

**Exercise**: Run the next cell to visualize the classifications. Play around with the threshold and margin to see how they change the predictions of the model.

In [None]:
margin = 0.05
threshold = 0.5


def plot_predictions(margin, threshold):

  df = pd.DataFrame()
  df["Actual Label"] = y_test
  df["Predicted Probability"] = lr.predict_proba(x_test)[:, 1]


  df["Prediction"] = [probability_to_prediction(v, threshold, margin) for v in df["Predicted Probability"]]

  fig = px.scatter(df, x="Predicted Probability", y="Actual Label", color="Prediction")
  fig.add_scatter(x=[threshold, threshold], y=[0, 1], name="Threshold")
  fig.add_scatter(x=[min(1, threshold+margin), min(1, threshold+margin)], y=[0, 1], name="Uncertainty Upper Bound")
  fig.add_scatter(x=[max(0, threshold-margin), max(0, threshold-margin)], y=[0, 1], name="Uncertainty Lower Bound")

  fig.show()

plot_predictions(margin, threshold)

Using the idea from the previous paragraph, what happens to a candidate that is in the uncertainty range:
* If they are privileged?
* If they are unprivileged?

### How does shifting the threshold affect the different fairness metrics?

**Exercise**: In the next cell we'd like to visualize how changing the threshold of the model affects the fairness metrics. Complete the `plot_thresholds` function. Check out the comments in the code for more guidance.

Hint: How do you convert predicted probabilities to predictions (Ignore the margin for now and set interview as 1 and no interview as 0)?

Hint: Use the `return_fairness_metrics` function to get the fairness metrics of a set of predictions. Check the definition of this function in one of the earlier code cells. Use [(0,0,0)] as the privileged group and [(1, 0, 0)] as the unprivileged group.



In [None]:
def plot_threshold(predicted_probabilities):
  """
  predicted_probabilities is a list of probability outputs from our model.
  """
  thresholds = np.arange(0, 1, 0.02) # A list of thresholds we'd like to test
  rows = []

  for t in thresholds:
    ### BEGIN CODE HERE ###
    """
      TODO: Fill in the rows variable with a row corresponding to the
      fairness metrics of each threshold. Each row should have
      the threshold, the statistical parity difference, the
      fnrd, fprd, the average odds difference and the accuracy
      in that order.

      By the end of this for loop, rows should look like this:
      [
        (t, spd, fnrd, fprd, aod, acc),
        (t, spd, fnrd, fprd, aod, acc),
        (t, spd, fnrd, fprd, aod, acc),...
      ]
    """

    ### END CODE HERE ###

  df = pd.DataFrame(rows, columns=["Threshold", "Statistical Parity Difference", "FNRD", "FPRD", "Average Odds Difference", "Accuracy"])
  df = df.melt(id_vars='Threshold',var_name='Metric', value_name='Value')
  fig = px.line(df, x="Threshold", y="Value", color="Metric")
  # fig.add_scatter(x=predicted_probabilities, y=np.zeros(predicted_probabilities.shape), mode='markers')
  fig.show()

plot_threshold(lr.predict_proba(x_test)[:, 1])

**Exercise:** By visual inspection, identify a threshold that acheives better fairness metrics than the standard 0.5 without sacrificing much of the accuracy.

**Question:** What happens when the threshold is close to 0 or close to 1?

The **Reject Option Classification** algorithm is a brute force search over different thresholds and certainty margins.

We have some fairness metric that we want to be within a certain range (between `metric_lb`, and `metric_ub`). Then we simply search many different thresholds and certainty margins while applying our "favor the unprivileged group" strategy.

In [None]:
from aif360.algorithms.postprocessing import RejectOptionClassification

ROC = RejectOptionClassification(
    unprivileged_groups= [{"Female":1, "URM":0, "Disability":0}],
    privileged_groups= [{"Female":0, "URM":0, "Disability":0}],
    metric_name="Statistical parity difference",
    metric_ub=0.05, metric_lb=-0.05
)

In [None]:
dataset_true = get_aif_dataset(data_test)
dataset_pred = get_aif_dataset(data_test, scores=lr.predict_proba(x_test)[:, 1])
dataset_transformed = ROC.fit_predict(dataset_true, dataset_pred)

Run the next cell to see which threshold and margin the algorithm found.

In [None]:
print("Optimal classification threshold (with fairness constraints) = ", ROC.classification_threshold)
print("Optimal ROC margin = ",  ROC.ROC_margin)

**Exercise**: Use the `plot_predictions` function on the margin and the threshold value we found to visualize our predictions.

In [None]:
 ### BEGIN CODE HERE ###
 ### END CODE HERE ###

How do our new datapoints do?

In [None]:
report_fairness_metrics(dataset_transformed.labels, privileged_groups=[(0,0,0)], unprivileged_groups=[(1, 0, 0)])

**Exercise**: Experiment with different fairness metrics and bounds. Check [here](https://aif360.readthedocs.io/en/latest/modules/generated/aif360.algorithms.postprocessing.RejectOptionClassification.html#aif360.algorithms.postprocessing.RejectOptionClassification) for the possible fairness metrics we can optimize over.

# Reweighing

**Reject option classification** (previous section) lets us adapt our existing logistic regression model to satisfy some fairness constraints. Importantly, it doesn't change the underlying model at all. Thus, it is called a **postprocessing** algorithm. One drawback of this approach is that although we are guaranteed to have our fairness metric within a certain bound, we might sacrifice a lot of accuracy.

Since we are developing our algorithm in-house, we actually have more control.
In fact, **since we have access to the data (which is just our previous hiring patterns), we can change the data itself**! Algorithms that do this are known as **preprocessing** algorithms.

There's a common saying in machine learning called "Garbage in, garbage out" refering to the case where if you feed a model "garbage" input, you're going to get a "garbage model". Thus, having good data and preprocessing techniques are very important.

In the **reweighing** algorithm, we assign a weight to each datapoint in order to debias the data.

In [None]:
from aif360.algorithms.preprocessing.reweighing import Reweighing
RW = Reweighing(
    unprivileged_groups= [{"Female":1, "URM":0, "Disability":0}],
    privileged_groups= [{"Female":0, "URM":0, "Disability":0}],
)

dataset_orig_train = get_aif_dataset(data_train)

In [None]:
RW.fit(dataset_orig_train)
dataset_transf_train = RW.transform(dataset_orig_train)

In [None]:
dataset_transf_train.instance_weights

**Question**: How do you think the weights are chosen?

Since we want our model to be unbiased, we want our data to be unbiased. Thus, in the reweighing algorithm, we find weights that force the **weighted discrimination** to be 0.

**Optional:** Mathematical Details

Discrimination is just another name for the statistical parity difference which is:

$$ \text{ (Interview rate for the unprivileged group) - (Interview rate for the privileged group)}$$

We can rewrite this equation as

$$
\left(\frac{\text{Number of interviewed, unprivileged applicants}}{\text{Number of unprivileged applicants}}\right) - \left(\frac{\text{Number of interviewed, privileged applicants}}{\text{Number of privileged applicants}}\right)
$$

as rewrite again as

$$
\left(\frac{\sum_{x : x \text{ is unprivileged and got an interview}} 1 }{\sum_{x : x \text{ is unprivileged}} 1 } \right) - \left(\frac{\sum_{x : x \text{ is privileged and got an interview}} 1 }{\sum_{x : x \text{ is privileged}} 1 }\right)
$$

Here the sum notation is just adding a 1 for each person that satisfies the condition.

**Exercise:** Convince yourselves that these equations all refer to the same thing.

The weighted discrimination is then the following:
$$
\left(\frac{\sum_{x : x \text{ is unprivileged and got an interview}} W(x) }{\sum_{x : x \text{ is unprivileged}}  W(x) } \right) - \left(\frac{\sum_{x : x \text{ is privileged and got an interview}}  W(x) }{\sum_{x : x \text{ is privileged}}  W(x) }\right)
$$

Where $W(x)$ is the weight for a certain applicant.

Let's get some intuition for this. Let's say there is discrimintation in the dataset, and the interview rate for the privileged group is higher than the interview rate for the unprivileged group. Since we want the weighted discrimination to be 0, we need to either lower the interview rate for the privileged group or increase the interview rate for the unprivileged group. Let's imagine for now, all the weights are set to 1.

**Questions:**
* What happens to the equation if we increase the weight for an unprivileged applicant who gets an interview?
* What happens to the equation if we decrease the weight for an unprivileged applicant who doesn't get an interview?

Luckily, solving an equation let's us find these weights easily. Check [this](https://en.wikipedia.org/wiki/Fairness_(machine_learning)#Reweighing) for more details.




Once we have our weights, we can then train our model with an emphasis on datapoints with higher weights.

**Exercise**: Train a new LogisticRegression model on the training data with the weights found by the Reweighing algorithm.

Then evaluate the fairness and accuracy of this model using `report_fairness_metrics`

Hint: The `class_weight` parameter in the documentation [here](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) might be useful!

In [None]:
### BEGIN CODE HERE ###
### END CODE HERE ###

**Question:** How did our model do?

# Optional: Exponentiated Gradient Reduction


So far we talked about **postprocessing** algorithms which take an existing model and reinterpret the outputs. These can guarantee good performance on fairness metrics. However, we sacrifice some accuracy. We also talked about a **preprocessing** algorithm which attempts to promote fairness by changing the training data to be less biased in the first place. This approach worked out pretty well, acheiving high accuracy while also attaining good performance on fairness metrics.

There is another type of algorithm called an **inprocessing** algorithm. Instead of working with the outputs of the model (like postprocessing algorithms) or the inputs to the model (like preprocessing algorithms), *inprocessing algorithms work with the training and optimization of the model* which gives an additional layer of flexibility.

**Exponentiated Gradient Reduction** is an algorithm that factors fairness into the optimization of the algorithms. At a high level you can think of fairness being factored into the loss function. Check out the [original paper](https://arxiv.org/pdf/1803.02453.pdf) for some *very* advanced extra reading.

In [None]:
from aif360.algorithms.inprocessing.exponentiated_gradient_reduction import ExponentiatedGradientReduction


estimator = LogisticRegression()
exp_grad_red = ExponentiatedGradientReduction(estimator=estimator,
                                              constraints="EqualizedOdds",drop_prot_attr=False)
exp_grad_red.fit(dataset_orig_train)


In [None]:
exp_grad_red_pred = exp_grad_red.predict(get_aif_dataset(data_test))

In [None]:
report_fairness_metrics(exp_grad_red_pred.labels, privileged_groups=[(0,0,0)], unprivileged_groups=[(1, 0, 0)])

# Conclusion
Congratulations! You have completed the project! Feel good about what you have done. You have saved your company a lot of time all the while being extremely sensitive and cognizant of fairness and bias - something that is often overlooked.

Make sure to go back to carefully pick the best model. Once you are satisfied with the accuracy and fairness of your model it's time to present your findings and results to your bosses at Rayo Tech (or in reality your peers and parents and other instructors)!



# Extra: Fairness in the Real World - An open ended exploration

Check out [this demo](https://aif360.mybluemix.net/data) from IBM on some of the algorithms and metrics that we learned about applied to real world datasets.

To test yourself and reinforce this knowledge, we have provided you with a new dataset - this time from the real world! Check out the next notebook to explore this dataset.

# Acknowledgements
* Data and notebook by Harry Sha. Email harryshahai@gmail.com for bugs/questions!
* The [AI Fairness 360 Library](https://aif360.readthedocs.io/en/latest/) which implements the different fairness metrics and algorithms.
