In [1]:
# import libaray
import matplotlib.pyplot as plt
import torch
import numpy as np
import random
import pandas as pd
import pickle
import copy
import torch.nn as nn
from os import listdir
from os.path import isfile, join
from collections import OrderedDict
from sklearn.metrics import accuracy_score
from eval_metrics_cls import *
import random
import math
import copy, time
import pandas as pd

In [2]:
# seting hyperparameters:
inner_lr = 0.01  # learning rate of the inner loop
dual_ud_lr = 0.01
meta_lr = 0.001  # learning rate of the outer loop
meta_dual_lr = 0.01

lamb = 1
K = 10  # K-shot per task for support data
Kq = K * 2  # Kq-shot per task for query data
c = 0.05  # fairness bound
inner_steps = 1  # number of gradient steps in the inner loop, normally equals to 1 in MAML
pd_updates = 10
tasks_per_meta_batch = 8  # number of tasks sampled from tasks repository
num_iterations = 30  # number of iterations of the outer loop

# other parameters
num_feature = 16  # number of features of the input data for each task
num_neighbors = 3
plot_every = 3
print_every = 25
repeat = 2

# PATHs
task_path = '/home/mifeng/Data/'+'bankmarketing/'
train_tasks_path = task_path+r'train'
train_model_save_path = r'train_model.pth'
tr_val_meta_losses_faires_save_path = r'tr_val_meta_losses_faires_save.txt'

test_tasks_path = task_path+r'test'
val_tasks_path = task_path+r'val'

In [3]:
# code for evaluation metrics:
def cal_discrimination(input_zy):
    a_values = []
    b_values = []

    for line in input_zy:
        if line[0] == 0:
            a_values.append(line[1])
        elif line[0] == 1:
            b_values.append(line[1])

    if len(a_values) == 0:
        discrimination = sum(b_values) * 1.0 / len(b_values)
    elif len(b_values) == 0:
        discrimination = sum(a_values) * 1.0 / len(a_values)
    else:
        discrimination = sum(a_values) * 1.0 / len(a_values) - sum(b_values) * 1.0 / len(b_values)
    return abs(discrimination)


# calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1) - 1):
        distance += (row1[i] - row2[i]) ** 2
    return math.sqrt(distance)


# Locate the most similar neighbors
# example: get_neighbors(yX, X[0], 3)
def get_neighbors(yX, target_row, num_neighbors):
    distances = list()
    for yX_row in yX:
        X_row = yX_row[1:]
        y = yX_row[0]
        dist = euclidean_distance(target_row, X_row)
        distances.append((y, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    for i in range(num_neighbors):
        neighbors.append(distances[i][0])
    return neighbors


def cal_consistency(yX, num_neighbors):
    ans = 0
    for yX_row in yX:
        temp = 0
        target_row = yX_row[1:]
        target_y = yX_row[0]
        y_neighbors = get_neighbors(yX, target_row, num_neighbors)
        for y_neighbor in y_neighbors:
            temp += abs(target_y - y_neighbor)
        ans += temp
    return (1 - (ans * 1.0) / (len(yX) * num_neighbors))


def cal_dbc(input_zy):
    length = len(input_zy)
    z_bar = np.mean(input_zy[:, 0])
    dbc = 0
    for zy in input_zy:
        dbc += (zy[0] - z_bar) * zy[1] * 1.0
    return abs(dbc / length)

In [4]:
# code for maml model:
class MAMLModel(nn.Module):
    def __init__(self):
        super(MAMLModel, self).__init__()
        self.model = nn.Sequential(OrderedDict([
            ('l1', nn.Linear(16, 40)),
            ('relu1', nn.ReLU()),
            ('l2', nn.Linear(40, 40)),
            ('relu2', nn.ReLU()),
            ('l3', nn.Linear(40, 1))
        ]))

    def forward(self, x):
        return self.model(x)

    def parameterised(self, x, weights):
        # like forward, but uses ``weights`` instead of ``model.parameters()``
        # it'd be nice if this could be generated automatically for any nn.Module...
        # print("X shape",x.shape)
        # print("weights[0].shape",weights[0])
        # print("weights[1].shape",weights[1])
        x = nn.functional.linear(x, weights[0], weights[1])
        x = nn.functional.relu(x)
        x = nn.functional.linear(x, weights[2], weights[3])
        x = nn.functional.relu(x)
        x = nn.functional.linear(x, weights[4], weights[5])
        return torch.sigmoid(x)


class MAML():
    """
    This code implements MAML for supervised few-shot regression learning.
    """

    def __init__(self, model, tasks, num_feature, lamb, inner_lr, dual_ud_lr, meta_lr, meta_dual_lr, K, Kq, inner_steps, pd_updates, tasks_per_meta_batch, c, plot_every, print_every, val_tasks_path):
        """
            tasks: name collection of each task -- list of strings
            task: df -- pandas data frame
        """

        # important objects
        self.tasks = tasks
        self.model = model
        self.num_feature = num_feature
        self.weights = list(model.parameters())  # the maml weights (primal-variable) we will be meta-optimising
        self.lamb = lamb  # the dual-variable we will be meta-optimising
        self.criterion = nn.BCELoss()
        self.meta_optimiser = torch.optim.Adam(self.weights, meta_lr)

        # hyperparameters
        self.inner_lr = inner_lr
        self.dual_ud_lr = dual_ud_lr
        self.meta_lr = meta_lr
        self.meta_dual_lr = meta_dual_lr
        self.K = K
        self.Kq = Kq
        self.inner_steps = inner_steps  # with the current design of MAML, >1 is unlikely to work well
        self.pd_updates = pd_updates
        self.tasks_per_meta_batch = tasks_per_meta_batch
        self.c = c

        # metrics
        self.plot_every = plot_every
        self.print_every = print_every

        self.meta_losses = []
        self.meta_faires = []
        self.meta_accus = []

        self.train_losses_list = []
        self.train_faires_list = []
        self.train_accus_list = []

        self.val_losses_list = []
        self.val_faires_list = []
        self.val_accus_list = []

        # others
        self.val_tasks_path = val_tasks_path

    def mean(self, a):
        return sum(a).to(dtype=torch.float) / len(a)

    def inner_loop(self, task):
        # reset inner model to current maml weights
        temp_weights = [w.clone() for w in self.weights]
        try:
            temp_lambda = copy.deepcopy(self.lamb)
        except:
            temp_lambda = (self.lamb).clone()
        a_array = []

        # sample K-shot support data from the task
        K_Xy = task.sample(self.K)
        X = K_Xy[K_Xy.columns[-self.num_feature:]].copy().values
        y = K_Xy[["y"]].values
        z = K_Xy[["z"]].values
        z_bar = np.mean(z) * np.ones((len(z), 1))

        X = torch.tensor(X, dtype=torch.float).unsqueeze(1)
        y = torch.tensor(y, dtype=torch.float).unsqueeze(1)
        ones = torch.tensor(np.ones((len(y), 1)), dtype=torch.float).unsqueeze(1)
        z = torch.tensor(z, dtype=torch.float).unsqueeze(1)
        z_bar = torch.tensor(z_bar, dtype=torch.float).unsqueeze(1)

        for co_update in range(self.pd_updates):
            # inner_steps: number of steps of gradient, it is greater or equals to one in MAML
            for step in range(self.inner_steps):
                y_hat = self.model.parameterised(X, temp_weights)
                fair = torch.abs(torch.mean((z - z_bar) * y_hat)) - self.c
                loss = (-1.0) * torch.mean(y * torch.log(y_hat) + (ones - y) * torch.log(ones - y_hat)) + temp_lambda * fair
                grad = torch.autograd.grad(loss.sum(), temp_weights)
                temp_weights = [w - self.inner_lr * g for w, g in zip(temp_weights, grad)]
            a_array.append(temp_weights)

            tilde_weight = (*map(self.mean, zip(*a_array))),

            gk = torch.abs(torch.mean((z - z_bar) * self.model.parameterised(X, tilde_weight))) - self.c
            temp_lambda = temp_lambda + self.dual_ud_lr * gk
            boolean = temp_lambda.item()
            if boolean > 0:
                temp_lambda = temp_lambda
            else:
                temp_lambda = 0

        # sample new K-shot query data from the task for meta-update and compute loss
        K_Xy = task.sample(self.Kq)
        X = K_Xy[K_Xy.columns[-self.num_feature:]].copy().values
        y = K_Xy[["y"]].values
        z = K_Xy[["z"]].values
        z_bar = np.mean(z) * np.ones((len(z), 1))

        X = torch.tensor(X, dtype=torch.float).unsqueeze(1)
        y = torch.tensor(y, dtype=torch.float).unsqueeze(1)
        z = torch.tensor(z, dtype=torch.float).unsqueeze(1)
        z_bar = torch.tensor(z_bar, dtype=torch.float).unsqueeze(1)

        y_hat = self.model.parameterised(X, temp_weights)
        fair = torch.abs(torch.mean((z - z_bar) * y_hat))
        # print("y_hat",y_hat)
        # print('y',y)
        # print('len y',len(y),len(y_hat))
        loss = self.criterion(y_hat, y)

        y_hat = y_hat.detach().numpy().reshape(len(y_hat), 1)
        y = y.detach().numpy().reshape(len(y), 1)
        accuracy = accuracy_score(y_hat.round(), y)

        return [loss, fair, accuracy]

    def val_single_task(self, task):
        # load trained parameters
        temp_weights = [w.clone() for w in list(self.model.parameters())]
        try:
            temp_lambda = copy.deepcopy(self.lamb)
        except:
            temp_lambda = (self.lamb).clone()
        a_array = []

        # sample support data for testing
        K_Xy = task.sample(self.K)
        X = K_Xy[K_Xy.columns[-self.num_feature:]].copy().values
        y = K_Xy[["y"]].values
        z = K_Xy[["z"]].values
        z_bar = np.mean(z) * np.ones((len(z), 1))

        X = torch.tensor(X, dtype=torch.float).unsqueeze(1)
        y = torch.tensor(y, dtype=torch.float).unsqueeze(1)
        ones = torch.tensor(np.ones((len(y), 1)), dtype=torch.float).unsqueeze(1)
        z = torch.tensor(z, dtype=torch.float).unsqueeze(1)
        z_bar = torch.tensor(z_bar, dtype=torch.float).unsqueeze(1)

        for co_update in range(self.pd_updates):
            for step in range(self.inner_steps):
                y_hat = self.model.parameterised(X, temp_weights)
                fair = torch.abs(torch.mean((z - z_bar) * y_hat)) - self.c
                loss = (-1.0) * torch.mean(y * torch.log(y_hat) + (ones - y) * torch.log(ones - y_hat)) + temp_lambda * fair
                grad = torch.autograd.grad(loss.sum(), temp_weights)
                temp_weights = [w - self.inner_lr * g for w, g in zip(temp_weights, grad)]
            a_array.append(temp_weights)
            tilde_weight = (*map(self.mean, zip(*a_array))),
            gk = torch.abs(torch.mean((z - z_bar) * self.model.parameterised(X, tilde_weight))) - self.c
            temp_lambda = temp_lambda + self.dual_ud_lr * gk
            boolean = temp_lambda.item()
            if boolean < 0:
                temp_lambda = 0

        # sample query data for testing
        K_Xy = task.sample(self.Kq)
        X = K_Xy[K_Xy.columns[-self.num_feature:]].copy().values
        y = K_Xy[["y"]].values
        z = K_Xy[["z"]].values
        z_bar = np.mean(z) * np.ones((len(z), 1))

        X = torch.tensor(X, dtype=torch.float).unsqueeze(1)
        y = torch.tensor(y, dtype=torch.float).unsqueeze(1)
        z = torch.tensor(z, dtype=torch.float).unsqueeze(1)
        z_bar = torch.tensor(z_bar, dtype=torch.float).unsqueeze(1)

        y_hat = self.model.parameterised(X, temp_weights)
        fair = torch.abs(torch.mean((z - z_bar) * y_hat))
        loss = self.criterion(y_hat, y)

        y_hat = y_hat.detach().numpy().reshape(len(y_hat), 1)
        y = y.detach().numpy().reshape(len(y), 1)
        accuracy = accuracy_score(y_hat.round(), y)

        return [loss, fair, accuracy]

    def val_tasks(self, tasks):
        meta_loss = 0
        meta_fair = 0
        meta_accu = 0
        for i in range(self.tasks_per_meta_batch):
            task_name = random.choice(tasks)
            val_task = pd.read_csv(self.val_tasks_path + '/' + task_name)
            [t_loss, t_fair, t_accu] = self.val_single_task(val_task)
            meta_loss += t_loss
            meta_fair += t_fair
            meta_accu += t_accu
        avg_loss = meta_loss / self.tasks_per_meta_batch
        avg_fair = meta_fair / self.tasks_per_meta_batch
        avg_accu = meta_accu / self.tasks_per_meta_batch
        return [avg_loss.item(), avg_fair.item(), avg_accu]

    def main_loop(self, num_iterations, tasks_path):
        train_loss = 0
        train_fair = 0
        train_accu = 0
        for iteration in range(1, num_iterations + 1):
            # compute meta loss
            meta_loss = 0
            meta_fair = 0
            meta_accu = 0

            start_time = time.time()

            for i in range(self.tasks_per_meta_batch):
                task_name = random.choice(self.tasks)
                task = pd.read_csv(tasks_path + '/' + task_name)
                [t_loss, t_fair, t_accu] = self.inner_loop(task)
                meta_loss += t_loss
                meta_fair += t_fair
                meta_accu += t_accu

            # compute meta gradient of loss with respect to maml weights
            meta_grads = torch.autograd.grad(meta_loss, self.weights)

            # assign meta gradient to weights and take optimisation step
            for w, g in zip(self.weights, meta_grads):
                w.grad = g
            self.meta_optimiser.step()

            # update meta dual variable
            dual_update = self.lamb + self.meta_dual_lr * (meta_fair - self.tasks_per_meta_batch * self.c)
            if dual_update.item() > 0:
                self.lamb = dual_update
            else:
                self.lamb = 0

            # log metrics
            train_loss += meta_loss.item() / self.tasks_per_meta_batch
            train_fair += meta_fair.item() / self.tasks_per_meta_batch
            train_accu += meta_accu / self.tasks_per_meta_batch

            self.train_losses_list.append(train_loss)
            self.train_faires_list.append(train_fair)
            self.train_accus_list.append(train_accu)

            tasks_for_val = [f for f in listdir(self.val_tasks_path) if isfile(join(self.val_tasks_path, f))]
            [val_loss, val_fair, val_accu] = self.val_tasks(tasks_for_val)

            self.val_losses_list.append(val_loss)
            self.val_faires_list.append(val_fair)
            self.val_accus_list.append(val_accu)

            if iteration % self.print_every == 0:
                print("{}/{}. tr_loss: {}; tr_accu: {}; tr_fair: {}; val_loss: {}; val_accu: {}; val_fair: {} -----> running time: {} seconds.".format(iteration, num_iterations, np.round(train_loss, 4),
                                                                                                                      np.round(train_accu, 4),
                                                                                                                      np.round(train_fair, 4), np.round(val_loss, 4), np.round(val_accu, 4),
                                                                                                                      np.round(val_fair, 4), np.round((time.time() - start_time), 4)))
            if iteration % self.plot_every == 0:
                self.meta_losses.append(train_loss / self.plot_every)
                train_loss = 0
                self.meta_faires.append(train_fair / self.plot_every)
                train_fair = 0
                self.meta_accus.append(train_accu / self.plot_every)
                train_accu = 0

        return [self.weights, self.lamb]

In [5]:
# code for training and testing framework

In [6]:
def mean(a):
    return sum(a).to(dtype=torch.float) / len(a)

In [7]:
def test_single_task(train_model_save_path, task, meta_lamb):
    # load the trained Fair MAML
    net = MAMLModel()
    net.load_state_dict(torch.load(train_model_save_path))

    criterion = nn.BCELoss()

    # load trained parameters
    temp_weights = [w.clone() for w in list(net.parameters())]
    try:
        temp_lambda = copy.deepcopy(meta_lamb)
    except:
        temp_lambda = (meta_lamb).clone()
    a_array = []

    # sample support data for testing
    K_Xy = task.sample(K)
    X = K_Xy[K_Xy.columns[-num_feature:]].copy().values
    y = K_Xy[["y"]].values
    z = K_Xy[["z"]].values
    z_bar = np.mean(z) * np.ones((len(z), 1))

    X = torch.tensor(X, dtype=torch.float).unsqueeze(1)
    y = torch.tensor(y, dtype=torch.float).unsqueeze(1)
    ones = torch.tensor(np.ones((len(y), 1)), dtype=torch.float).unsqueeze(1)
    z = torch.tensor(z, dtype=torch.float).unsqueeze(1)
    z_bar = torch.tensor(z_bar, dtype=torch.float).unsqueeze(1)

    for co_update in range(pd_updates):
        for step in range(inner_steps):
            y_hat = net.parameterised(X, temp_weights)
            fair = torch.abs(torch.mean((z - z_bar) * y_hat)) - c
            loss = (-1.0) * torch.mean(y * torch.log(y_hat) + (ones - y) * torch.log(ones - y_hat)) + temp_lambda * fair
            grad = torch.autograd.grad(loss.sum(), temp_weights)
            temp_weights = [w - inner_lr * g for w, g in zip(temp_weights, grad)]
        a_array.append(temp_weights)
        tilde_weight = (*map(mean, zip(*a_array))),
        gk = torch.abs(torch.mean((z - z_bar) * net.parameterised(X, tilde_weight))) - c
        temp_lambda = temp_lambda + dual_ud_lr * gk
        boolean = temp_lambda.item()
        if boolean > 0:
            temp_lambda = temp_lambda
        else:
            temp_lambda = 0


    # sample query data for testing
    K_Xy = task.sample(Kq)
    X = K_Xy[K_Xy.columns[-num_feature:]].copy().values
    y = K_Xy[["y"]].values
    z = K_Xy[["z"]].values
    X_temp = copy.deepcopy(X)
    z_temp = copy.deepcopy(z)
    z_bar = np.mean(z) * np.ones((len(z), 1))

    X = torch.tensor(X, dtype=torch.float).unsqueeze(1)
    y = torch.tensor(y, dtype=torch.float).unsqueeze(1)
    z = torch.tensor(z, dtype=torch.float).unsqueeze(1)
    z_bar = torch.tensor(z_bar, dtype=torch.float).unsqueeze(1)

    y_hat = net.parameterised(X, temp_weights)
    fair = torch.abs(torch.mean((z - z_bar) * y_hat))

    y_hat = y_hat.detach().numpy().reshape(len(y_hat), 1)
    y = y.detach().numpy().reshape(len(y), 1)

    input_zy = np.column_stack((z_temp, y_hat))
    yX = np.column_stack((y_hat, X_temp))

    accuracy = accuracy_score(y_hat.round(), y)
    discrimination = cal_discrimination(input_zy)
    consistency = cal_consistency(yX, num_neighbors)

    return [accuracy, fair, discrimination, consistency]

In [8]:
def test_tasks(tasks, meta_lamb):
    meta_accu = 0
    meta_fair = 0
    meta_disc = 0
    meta_consis = 0
    for i in range(tasks_per_meta_batch):
        task_name = random.choice(tasks)
        test_task = pd.read_csv(test_tasks_path + '/' + task_name)
        [t_accu, t_fair, t_disc, t_consis] = test_single_task(train_model_save_path, test_task, meta_lamb)
        meta_accu += t_accu
        meta_fair += t_fair
        meta_disc += t_disc
        meta_consis += t_consis
    avg_accu = meta_accu / tasks_per_meta_batch
    avg_fair = meta_fair / (tasks_per_meta_batch)
    avg_disc = meta_disc / (tasks_per_meta_batch)
    avg_consis = meta_consis / (tasks_per_meta_batch)

    return [avg_accu, avg_fair.item(), avg_disc, avg_consis]

In [9]:
# training and testing
for i in range(repeat):
        # -------------------------------------------  Training  ------------------------------------------#
        # load training tasks
        tasks_for_train = [f for f in listdir(train_tasks_path) if isfile(join(train_tasks_path, f))]

        # train maml, output train loss, and plot meta losses
        maml = MAML(MAMLModel(), tasks_for_train, num_feature, lamb, inner_lr, dual_ud_lr, meta_lr, meta_dual_lr, K, Kq, inner_steps, pd_updates, tasks_per_meta_batch, c, plot_every, print_every, val_tasks_path)
        [meta_weights, meta_lamb] = maml.main_loop(num_iterations, train_tasks_path)
        torch.save(maml.model.state_dict(), train_model_save_path)

        # writing losses and faires into a file
        with open(tr_val_meta_losses_faires_save_path, 'wb') as f:
            pickle.dump(maml.train_losses_list, f)
            pickle.dump(maml.train_faires_list, f)
            pickle.dump(maml.val_losses_list, f)
            pickle.dump(maml.val_faires_list, f)
        ####################################################################################################

        # load testing tasks
        tasks_for_test = [f for f in listdir(test_tasks_path) if isfile(join(test_tasks_path, f))]

        [test_accu, test_fair, test_disc, test_consis] = test_tasks(tasks_for_test, meta_lamb)
        print('---------------------------- %s / %s ------------------------------------------' % (i + 1, repeat))
        print('Accuracy for testing data =', test_accu)
        print('DBC for testing data =', test_fair)
        print('Discrimination for testing data =', test_disc)
        print('Consistency for testing data =', test_consis)
        print('-------------------------------------------------------------------------------')


25/30. tr_loss: 0.4653; tr_accu: 0.9688; tr_fair: 0.003; val_loss: 0.6979; val_accu: 0.5062; val_fair: 0.0027 -----> running time: 7.5426 seconds.
---------------------------- 1 / 2 ------------------------------------------
Accuracy for testing data = 0.50625
DBC for testing data = 0.004715173505246639
Discrimination for testing data = 0.020111261205229783
Consistency for testing data = 0.9796088811010123
-------------------------------------------------------------------------------
25/30. tr_loss: 0.5681; tr_accu: 0.8; tr_fair: 0.0018; val_loss: 0.6969; val_accu: 0.5562; val_fair: 0.003 -----> running time: 7.8612 seconds.
---------------------------- 2 / 2 ------------------------------------------
Accuracy for testing data = 0.58125
DBC for testing data = 0.0030298596248030663
Discrimination for testing data = 0.014812198043384281
Consistency for testing data = 0.9852548544295131
-------------------------------------------------------------------------------
