# License

Copyright 2019 Hamaad Musharaf Shah

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

# Creating fair machine learning models with Generative Adversarial Networks

## Author: Hamaad Shah
---

Load up R libraries.

---

In [None]:
library(fastDummies)
library(data.table)
library(CASdatasets)
library(data.table)
library(dplyr)
library(keras)
library(hmeasure)
library(ggplot2)

Load up data, do some basic preprocessing and create the set of features, sensitives attributes and target variable.

---

In [None]:
data <- fread(input = "/Users/samson/Downloads/adult.data",
              col.names = c("age",
                            "workclass", 
                            "fnlwgt",
                            "education", 
                            "education_num", 
                            "marital_status",
                            "occupation", 
                            "relationship", 
                            "race", 
                            "sex", 
                            "capital_gain",
                            "capital_loss",
                            "hours_per_week", 
                            "country", 
                            "target"), 
              na.strings = "?")

data <- data[which(data$race %in% c("White", "Black"))]

sensitive_attribs <- c("race", "sex")

data[, race := ifelse(race == "White", 1, 0)]
data[, sex := ifelse(sex == "Male", 1, 0)]

# 0 is black and 1 is white.
# 0 is female and 1 is male.
Z <- data[, sensitive_attribs, with = FALSE]

y <- data[, target := ifelse(target == ">50K", 1, 0)][, target]

X <- data[, ":="(target = NULL,
                 race = NULL,
                 sex = NULL)]

is.missing.ind <- is.na(X)
X[is.missing.ind] <- "Unknown"


X <- dummy_cols(.data = X, select_columns = c("workclass", 
                                              "education", 
                                              "marital_status", 
                                              "occupation", 
                                              "relationship", 
                                              "country"), remove_first_dummy = TRUE)
X <- X[, ":="(workclass = NULL, 
              education = NULL, 
              marital_status = NULL, 
              occupation = NULL, 
              relationship = NULL, 
              country = NULL)]

Split the data into train and test sets.

---

In [None]:
tr.ind <- sample(x = 1:dim(X)[1], size = as.integer(0.5 * dim(X)[1]), replace = FALSE)
ts.ind <- setdiff(x = 1:dim(X)[1], y = tr.ind)

Standardise the data.

---

In [None]:
mu <- apply(X = X, MARGIN = 2, FUN = mean)
std <- apply(X = X, MARGIN = 2, FUN = sd)

standardised.X.tr <- sweep(x = X[tr.ind], MARGIN = 2, STATS = mu, FUN = "-")
standardised.X.tr <- sweep(x = standardised.X.tr, MARGIN = 2, STATS = std, FUN = "/")
standardised.X.ts <- sweep(x = X[ts.ind], MARGIN = 2, STATS = mu, FUN = "-")
standardised.X.ts <- sweep(x = standardised.X.ts, MARGIN = 2, STATS = std, FUN = "/")

Create the classifier.

---

In [None]:
classifier.input.layer <- layer_input(shape = c(dim(standardised.X.tr)[2]), name = "classifier.input")

classifier.hidden.layers <- classifier.input.layer %>%  
  layer_dense(units = 32, activation = "relu", name = "classifier.hidden.layer.1") %>%
  layer_dropout(rate = 0.2, name = "classifier.dropout.layer.1") %>% 
  layer_dense(units = 32, activation = "relu", name = "classifier.hidden.layer.2") %>% 
  layer_dropout(rate = 0.2, name = "classifier.dropout.layer.2") %>% 
  layer_dense(units = 32, activation = "relu", name = "classifier.hidden.layer.3") %>% 
  layer_dropout(rate = 0.2, name = "classifier.dropout.layer.3")

classifier.output.layer <- classifier.hidden.layers %>%
  layer_dense(units = 1, activation = "sigmoid", name = "classifier.output")

classifier.model <- keras_model(inputs = classifier.input.layer, 
                                outputs = classifier.output.layer)

classifier.model %>% compile(optimizer = "adam",
                             loss = "binary_crossentropy")

summary(classifier.model)


Create the adversarial model.

---

In [None]:
adversarial.input.layer <- layer_input(shape = c(1), name = "adversarial.input")

adversarial.hidden.layers <- adversarial.input.layer %>%  
  layer_dense(units = 32, activation = "relu", name = "adversarial.hidden.layer.1") %>%
  layer_dense(units = 32, activation = "relu", name = "adversarial.hidden.layer.2") %>% 
  layer_dense(units = 32, activation = "relu", name = "adversarial.hidden.layer.3")

race.layer <- adversarial.hidden.layers %>%
  layer_dense(units = 1, activation = "sigmoid", name = "race.layer.output")
sex.layer <- adversarial.hidden.layers %>%
  layer_dense(units = 1, activation = "sigmoid", name = "sex.layer.output")

adversarial.model <- keras_model(inputs = adversarial.input.layer, 
                                 outputs = c(race.layer, 
                                             sex.layer))

summary(adversarial.model)

We freeze the classifier weights and unfreeze the adversarial model weights in order to train the adversarial model.

---

In [None]:
unfrozen.adversarial.model <- keras_model(inputs = classifier.input.layer, 
                                          outputs = adversarial.model(object = classifier.output.layer))
freeze_weights(object = classifier.model,
               from = "classifier.input",
               to = "classifier.output")

unfreeze_weights(object = adversarial.model,
                 from = "adversarial.input",
                 to = "race.layer.output")
unfreeze_weights(object = adversarial.model,
                 from = "adversarial.input",
                 to = "sex.layer.output")

unfrozen.adversarial.model %>% compile(optimizer = "adam",
                                       loss = c("binary_crossentropy",
                                                "binary_crossentropy"))
 
summary(unfrozen.adversarial.model)

We freeze the adversarial weights and unfreeze the classifier model weights in order to train the classifier model.

---

In [None]:
frozen.adversarial.model <- keras_model(inputs = classifier.input.layer, 
                                        outputs = c(classifier.output.layer,
                                                    adversarial.model(object = classifier.output.layer)))

unfreeze_weights(object = classifier.model,
                 from = "classifier.input",
                 to = "classifier.output")

freeze_weights(object = adversarial.model,
               from = "adversarial.input",
               to = "race.layer.output")
freeze_weights(object = adversarial.model,
               from = "adversarial.input",
               to = "sex.layer.output")

summary(frozen.adversarial.model)

frozen.adversarial.model %>% compile(optimizer = "adam", metrics = NULL,
                                     loss_weights = c(1, -130, -30),
                                     loss = c("binary_crossentropy", 
                                              "binary_crossentropy", 
                                              "binary_crossentropy"))

Pre-train the classifier.

---

In [None]:
unfreeze_weights(object = classifier.model,
                 from = "classifier.input",
                 to = "classifier.output")

classifier.model %>% fit(x = as.matrix(standardised.X.tr), 
                         y = y[tr.ind],
                         epochs = 5,
                         batch_size = 500,
                         verbose = 1, 
                         shuffle = TRUE)

Pre-train the adversarial model.

---

In [None]:
freeze_weights(object = classifier.model,
               from = "classifier.input",
               to = "classifier.output")

unfreeze_weights(object = adversarial.model,
                 from = "adversarial.input",
                 to = "race.layer.output")
unfreeze_weights(object = adversarial.model,
                 from = "adversarial.input",
                 to = "sex.layer.output")

unfrozen.adversarial.model %>% fit(x = as.matrix(standardised.X.tr), 
                                   y = list(as.matrix(Z[tr.ind, "race", with = FALSE]),
                                            as.matrix(Z[tr.ind, "sex", with = FALSE])),
                                   epochs = 5,
                                   batch_size = 500,
                                   verbose = 1, 
                                   shuffle = TRUE)

Make some basic plots and check the AUC and P-rule measures to check whether the model is optimal and fair.

---

In [None]:
pred.test <- classifier.model %>% predict(as.matrix(standardised.X.ts))

plot.out <- data.table(pred.test, Z[ts.ind])
colnames(plot.out) <- c("predictions", sensitive_attribs)

threshold <- 0.5

y.z.1 <- ifelse(test = plot.out$predictions[which(plot.out$race == 1)] > threshold, yes = 1.0, no = 0.0)
y.z.0 <- ifelse(test = plot.out$predictions[which(plot.out$race == 0)] > threshold, yes = 1.0, no = 0.0)
odds <- mean(x = y.z.1) / mean(x = y.z.0)
out.race <- min(x = c(odds, 1 / odds))

y.z.1 <- ifelse(test = plot.out$predictions[which(plot.out$sex == 1)] > threshold, yes = 1.0, no = 0.0)
y.z.0 <- ifelse(test = plot.out$predictions[which(plot.out$sex == 0)] > threshold, yes = 1.0, no = 0.0)
odds <- mean(x = y.z.1) / mean(x = y.z.0)
out.sex <- min(x = c(odds, 1.0 / odds))

output.plot <- ggplot(data = plot.out) +
geom_density(aes(x = predictions,
                 fill = factor(race)),
             alpha = 0.5) +
ggtitle(paste("Predictions for income being higher than $50,000 per year\nGrouped by race\nP-rule:",
              percent(out.race),
              "\nAUROC:", 
              percent(HMeasure(true.class = y[ts.ind], scores = pred.test)$metrics$AUC))) +
scale_fill_discrete(name="Race",
                    labels=c("Black", "White")) +
scale_x_continuous(labels = percent) +
xlab(label = "Classifier predictions") +
ylab(label = "Density")
ggsave(filename = "/Users/samson/Projects/fair_ml/pre_train_result_race.png", 
       plot = output.plot)

output.plot <- ggplot(data = plot.out) +
geom_density(aes(x = predictions,
                 fill = factor(sex)),
             alpha = 0.5) +
ggtitle(paste("Predictions for income being higher than $50,000 per year\nGrouped by gender\nP-rule:", 
              percent(out.sex),
              "\nAUROC:", 
              percent(HMeasure(true.class = y[ts.ind], scores = pred.test)$metrics$AUC))) +
scale_fill_discrete(name="Gender",
                    labels=c("Female", "Male")) +
scale_x_continuous(labels = percent) +
xlab(label = "Classifier predictions") +
ylab(label = "Density")
ggsave(filename = "/Users/samson/Projects/fair_ml/pre_train_result_gender.png", 
       plot = output.plot)

Train the classifier and the adversarial model alternately. 

---

In [None]:
for (i in 1:100) {
    freeze_weights(object = classifier.model,
                   from = "classifier.input",
                   to = "classifier.output")
    unfreeze_weights(object = adversarial.model,
                     from = "adversarial.input",
                     to = "race.layer.output")
    unfreeze_weights(object = adversarial.model,
                     from = "adversarial.input",
                     to = "sex.layer.output")
    batch.ind <- sample(x = 1:length(tr.ind), size = 100, replace = FALSE)
    unfrozen.adversarial.model %>% train_on_batch(x = as.matrix(standardised.X.tr)[batch.ind, ], 
                                                  y = list(as.matrix(Z[tr.ind, "race", with = FALSE])[batch.ind, ],
                                                           as.matrix(Z[tr.ind, "sex", with = FALSE])[batch.ind, ])) 
    pred.test <- classifier.model %>% predict(as.matrix(standardised.X.ts))

    plot.out <- data.table(pred.test, Z[ts.ind])
    colnames(plot.out) <- c("predictions", sensitive_attribs)

    y.z.1 <- ifelse(test = plot.out$predictions[which(plot.out$race == 1)] > threshold, yes = 1.0, no = 0.0)
    y.z.0 <- ifelse(test = plot.out$predictions[which(plot.out$race == 0)] > threshold, yes = 1.0, no = 0.0)
    odds <- mean(x = y.z.1) / mean(x = y.z.0)
    out.race <- min(x = c(odds, 1 / odds))

    y.z.1 <- ifelse(test = plot.out$predictions[which(plot.out$sex == 1)] > threshold, yes = 1.0, no = 0.0)
    y.z.0 <- ifelse(test = plot.out$predictions[which(plot.out$sex == 0)] > threshold, yes = 1.0, no = 0.0)
    odds <- mean(x = y.z.1) / mean(x = y.z.0)
    out.sex <- min(x = c(odds, 1.0 / odds))

    unfreeze_weights(object = classifier.model,
                     from = "classifier.input",
                     to = "classifier.output")
    freeze_weights(object = adversarial.model,
                   from = "adversarial.input",
                   to = "race.layer.output")
    freeze_weights(object = adversarial.model,
                   from = "adversarial.input",
                   to = "sex.layer.output")

    batch.ind <- sample(x = 1:length(tr.ind), size = 100, replace = FALSE)
    frozen.adversarial.model %>% train_on_batch(x = as.matrix(standardised.X.tr)[batch.ind, ],
                                                y = list(as.matrix(data.table(y)[tr.ind])[batch.ind, ],
                                                         as.matrix(Z[tr.ind, "race", with = FALSE])[batch.ind, ],
                                                         as.matrix(Z[tr.ind, "sex", with = FALSE])[batch.ind, ])) 
}

Final results. 

---

In [None]:
output.plot <- ggplot(data = plot.out) +
geom_density(aes(x = predictions,
                 fill = factor(race)),
             alpha = 0.5) +
ggtitle(paste("Predictions for income being higher than $50,000 per year\nGrouped by race\nP-rule:",
              percent(out.race),
              "\nAUROC:", 
              percent(HMeasure(true.class = y[ts.ind], scores = pred.test)$metrics$AUC))) +
scale_fill_discrete(name="Race",
                    labels=c("Black", "White")) +
scale_x_continuous(labels = percent) +
xlab(label = "Classifier predictions") +
ylab(label = "Density")
ggsave(filename = "/Users/samson/Projects/fair_ml/post_train_result_race.png", 
       plot = output.plot)

output.plot <- ggplot(data = plot.out) +
geom_density(aes(x = predictions,
                 fill = factor(sex)),
             alpha = 0.5) +
ggtitle(paste("Predictions for income being higher than $50,000 per year\nGrouped by gender\nP-rule:", 
              percent(out.sex),
              "\nAUROC:", 
              percent(HMeasure(true.class = y[ts.ind], scores = pred.test)$metrics$AUC))) +
scale_fill_discrete(name="Gender",
                    labels=c("Female", "Male")) +
scale_x_continuous(labels = percent) +
xlab(label = "Classifier predictions") +
ylab(label = "Density")
ggsave(filename = "/Users/samson/Projects/fair_ml/post_train_result_gender.png", 
       plot = output.plot)