# SVM and Fairness Modified SVM
takes data for Flagstar 2019 and outputs results of classifications for each approach.

## Read in data

In [1]:
using CSV, DataFrames, Statistics, JuMP, Gurobi, GLMNet, Random, Plots

In [16]:
train = DataFrame(CSV.File("train.csv"))
test = DataFrame(CSV.File("test.csv"))
;

In [17]:
trainX = select(train, Not([:denied,:ID,:minority]))
train_minority = train[!,:minority]
train_majority = zeros(size(train_minority)[1])
train_majority[train_minority .== 0] .= 1
trainy = train[!,:denied]
testX = select(test, Not([:denied,:ID,:minority]))
testy = test[!,:denied]
;

In [26]:
trainX_racist = select(train, Not([:denied,:ID]))
testX_racist = select(test, Not([:denied,:ID]))
;

objective is to minimize: $\max \{0,1 - y_i * (\beta_0 + \beta_1 * x_1 + \beta_2 * x_2$)\}

## Train SVM
min $\sum \limits_{i = 1}^N t_i$ <br>
st: <br>
$0 \le t_i \quad \forall i \in [1,...,N]$ <br>
$1 - y_i * (\beta_0 + \beta_1 * x_1 + ... + \beta_p * x_p) \le t_i \quad \forall i \in [1,...,N]$

In [19]:
model = Model(Gurobi.Optimizer)
set_optimizer_attribute(model, "OutputFlag", 0) 
n,p = size(trainX)
@variable(model,beta[j=1:p])
@variable(model,t[i=1:n])
@variable(model,beta0)
@objective(model,Min,sum(t[i] for i=1:n))
@constraint(model,[i=1:n],0 <= t[i])
@constraint(model,[i=1:n],1 - (trainy[i]*(beta0 + sum(trainX[i,j]*beta[j] for j=1:p))) <= t[i])
optimize!(model)
opt_beta = value.(beta)
opt_beta0 = value.(beta0)
prev_loss = objective_value(model)
print(objective_value(model))
;

Academic license - for non-commercial use only
Academic license - for non-commercial use only
24307.403047147865

In [40]:
train[!,:results] = opt_beta0 .+ sum(trainX[:,j].*opt_beta[j] for j=1:p)
to_send = train[!,[:ID,:results,:minority,:denied]]
CSV.write("results/train_results.csv",to_send)
test[!,:results] = opt_beta0 .+ sum(testX[:,j].*opt_beta[j] for j=1:p)
to_send = test[!,[:ID,:results,:minority,:denied]]
CSV.write("results/test_results.csv",to_send)
;

## Add holistic fairness contraint
The percentage of loss from majority borrowers has to be less than or equal to the overall percentage of majority borrowers in the data <br>
min $\sum \limits_{i = 1}^N t_i$ <br>
st: <br>
$0 \le t_i \quad \forall i \in [1,...,N]$ <br>
$1 - y_i * (\beta_0 + \beta_1 * x_1 + ... + \beta_p * x_p) \le t_i \quad \forall i \in [1,...,N]$ <br>
$N*(3/2) * \sum \limits_{i=1}^N majority_i * t_i \le \text{MajorityCount}*\sum \limits_{i=1}^N t_i$ <br>
Where N is the total number of observations in the training data, MajorityCount is the total number of majority borrowers in the training data, and $majority_i$ is whether the $i^{\text{th}}$ observation in the training data is a majority borrower.

In [39]:
model = Model(Gurobi.Optimizer)
set_optimizer_attribute(model, "OutputFlag", 0) 
n,p = size(trainX)
num_majority = sum(train_majority[i] for i=1:n)
@variable(model,beta[j=1:p])
@variable(model,t[i=1:n])
@variable(model,beta0)
@objective(model,Min,sum(t[i] for i=1:n))
@constraint(model,[i=1:n],0 <= t[i])
@constraint(model,[i=1:n],1 - (trainy[i]*(beta0 + sum(trainX[i,j]*beta[j] for j=1:p))) <= t[i])
@constraint(model,(1.5*sum(train_majority[i]*t[i] for i=1:n))*n <= sum(t[i] for i=1:n)*num_majority)
optimize!(model)
opt_beta_fair = value.(beta)
opt_beta0_fair = value.(beta0)
print(objective_value(model))
;

Academic license - for non-commercial use only
Academic license - for non-commercial use only
29915.441899674577

In [40]:
train[!,:results] = opt_beta0_fair .+ sum(trainX[:,j].*opt_beta_fair[j] for j=1:p)
to_send = train[!,[:ID,:results,:minority,:denied]]
CSV.write("results/train_results2.csv",to_send)
test[!,:results] = opt_beta0_fair .+ sum(testX[:,j].*opt_beta_fair[j] for j=1:p)
to_send = test[!,[:ID,:results,:minority,:denied]]
CSV.write("results/test_results2.csv",to_send)
;

## Minimize majority loss subject to total loss tolerance
min $\sum \limits_{i = 1}^N majority_i * t_i$ <br>
st: <br>
$0 \le t_i \quad \forall i \in [1,...,N]$ <br>
$1 - y_i * (\beta_0 + \beta_1 * x_1 + ... + \beta_p * x_p) \le t_i \quad \forall i \in [1,...,N]$ <br>
$\sum \limits_{i = 1}^N t_i \le 1.5 * \text{PreviousLoss}$ <br>
Where N is the total number of observations in the training data, PreviousLoss is the Loss acheived from unconstrained SVM, and $majority_i$ is whether the $i^{\text{th}}$ observation in the training data is a majority borrower.

In [49]:
model = Model(Gurobi.Optimizer)
set_optimizer_attribute(model, "OutputFlag", 0) 
n,p = size(trainX)
@variable(model,beta[j=1:p])
@variable(model,t[i=1:n])
@variable(model,beta0)
@objective(model,Min,sum(train_majority[i]*t[i] for i=1:n))
@constraint(model,[i=1:n],0 <= t[i])
@constraint(model,[i=1:n],1 - (trainy[i]*(beta0 + sum(trainX[i,j]*beta[j] for j=1:p))) <= t[i])
@constraint(model,sum(t[i] for i=1:n) <= 1.5* prev_loss)
optimize!(model)
opt_beta_fair2 = value.(beta)
opt_beta0_fair2 = value.(beta0)
print(sum(value.(t[i]) for i=1:n))
;

Academic license - for non-commercial use only
Academic license - for non-commercial use only
36461.10457072188

In [50]:
train[!,:results] = opt_beta0_fair2 .+ sum(trainX[:,j].*opt_beta_fair2[j] for j=1:p)
to_send = train[!,[:ID,:results,:minority,:denied]]
CSV.write("results/train_results3.csv",to_send)
test[!,:results] = opt_beta0_fair2 .+ sum(testX[:,j].*opt_beta_fair2[j] for j=1:p)
to_send = test[!,[:ID,:results,:minority,:denied]]
CSV.write("results/test_results3.csv",to_send)
;