In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [2]:
## Load and preprocess data
X = np.genfromtxt("data/adult.csv", dtype=int, delimiter=",", skip_header=1)
dataset = 'Adult'
label = [-1]
#sens_atts = [9,12]
sens_atts = [8]
Y = X[:, label].ravel()
Z = X[:, sens_atts].ravel()
# Modify X and perform train-test split
X = np.delete(X, sens_atts + label, 1)

In [3]:
def oracle(x,y,z,lam,tau):
    weights = [(1/len(z[z==group]))*lam[group] for group in np.unique(z)]
    weight_array = np.array([weights[elem] for elem in z]).ravel()
    prc = LinearRegression().fit(x, 1-2*y, sample_weight=weight_array)
    tau = np.dot(tau,lam)
    prc.intercept_ = prc.intercept_ - tau*np.linalg.norm(prc.coef_)
    return prc

In [4]:
#pythonify this. test that it is working with smaller input.
def strat_response(x,z, prc, tau):
    dist = np.array([np.abs(np.dot(prc.coef_,elem) + prc.intercept_)/np.linalg.norm(prc.coef_) for elem in x])
    move = np.array([(dist < tau[elem]) for elem in z]).ravel()
    strat_x = np.array([(elem + tau[z[i]]*(prc.coef_/np.linalg.norm(prc.coef_))) if move[i] else elem for i, elem in enumerate(x)])
    return strat_x

In [5]:
def group_loss(x,y,z,prc,tau):
    x_strat = strat_response(x,z,prc,tau)
    y_pred = (np.dot(x_strat, prc.coef_) + prc.intercept_)<0
    group_loss = np.array([np.mean(y.reshape(-1,1)[z==group]!=y_pred.reshape(-1,1)[z==group]) for group in np.unique(z)])
    return group_loss

In [6]:
def game(x,y,z,tau,T):
    h_list = []
    lam = np.array([len(z[z==group])/len(z) for group in np.unique(z)])
    print(lam)
    for t in range(1,T):
        eta = 1/np.sqrt(t)
        h_list.append(oracle(x,y,z,lam,tau))
        lam *= np.exp(eta*group_loss(x,y,z,h_list[-1],tau))
        lam /= np.sum(lam)
        if t%20 == 0:
            print("Group Losses: ")
            print(group_loss(x,y,z,h_list[-1],tau))
            print("Dual: ")
            print(lam)
    return h_list

In [7]:
game(X,Y,Z, [0.5, 0.2], 100)

[0.33151796 0.66848204]
Group Losses: 
[0.08850049 0.21454824]
Dual: 
[0.15784364 0.84215636]
Group Losses: 
[0.08880929 0.21344564]
Dual: 
[0.10582205 0.89417795]
Group Losses: 
[0.08973567 0.2130781 ]
Dual: 
[0.07692423 0.92307577]
Group Losses: 
[0.08985919 0.21292496]
Dual: 
[0.05846992 0.94153008]


[LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegression(),
 LinearRegres