In [None]:
import numpy as np
import pandas as pd
import math
import os
import torch
import torch.nn as nn
from torch.optim import Adam, RMSprop
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset
from torch_geometric.loader import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import torch.nn.functional as F
from collections import deque
from sklearn import metrics
from functools import wraps
from time import time
import ot
from pathlib import Path
import time

n = 1000
x_dim = 3
hiddens = [x_dim + 1, 32, 64, 1]
test_size = 0.2
valid_size = 0.125
batch_size = 100
seq_len = 10
epsilon = 0.5
eps = str(epsilon).replace(".", "_")
b = 0.2
bet = str(0.2).replace(".", "_")
l = 0.1
c_hiddens = [x_dim + 1, 32, 64, 1]
g_hidden_size = 64
g_num_layers = 2
d_hidden_size = 64
d_num_layers = 2
gan_epochs = 500
train_vaca = 0
dataset = 'taiwan'

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
# device = torch.device('cpu')
print(device)

res_path = Path('../res')
log_dir = res_path / f'taiwan'
clf_path = log_dir / (f"c_model_" + ".pth")
efforts_path = log_dir / (f"effort_model" + ".pth")
gan_path = log_dir / (f"gan_model_taiwan_"  + str(gan_epochs) + "_" + str(g_hidden_size) + "_eps_" +eps+"_b_" + bet + ".pth")
re_clf_path = log_dir / (f"dp_model-" + str(6)+ ".pth")
tsne_path = log_dir / (f"syn-tsne.png")

In [None]:
def tensor(x):
    return torch.FloatTensor(x)

def to_tensor(z, x, y=None):
    if torch.is_tensor(x):
        zx = torch.cat([z, x], dim=1)
    else:
        zx = np.concatenate([z, x], axis=1)
        zx = torch.FloatTensor(zx)
    if isinstance(y, np.ndarray):
        y = torch.FloatTensor(y)
        return zx, y
    return zx

class TrueModel(nn.Module):

    def __init__(self, hiddens, seed=0):
        super().__init__()
        layers = []
        for in_dim, out_dim in zip(hiddens[:-1], hiddens[1:]):
            layers.append(nn.Linear(in_dim, out_dim))
            layers.append(nn.ReLU(inplace=True))
        layers.pop()
        layers.append(nn.Sigmoid())
        self.model = nn.Sequential(*layers)

        self.loss_fn = nn.BCELoss()
        self.optim = Adam(self.parameters())

    def forward(self, zx):
        return self.model(zx)

    def predict(self, z, x):
        zx = to_tensor(z, x)
        pred = self(zx)
        pred_y = pred.detach().round().cpu().numpy()
        return pred_y

    def fit(self, z, x, y, patience=10):
        zx = to_tensor(z, x)
        y = tensor(y)

        epoch, counter = 0, 0
        best_loss = float('inf')
        while True:
            pred = self(zx)
            loss = self.loss_fn(pred, y)

            self.optim.zero_grad()
            loss.backward()
            self.optim.step()
            
            epoch += 1
            if loss.item() <= best_loss:
                best_loss = loss.item()
                counter = 0
            else:
                counter += 1
                if counter == patience:
                    break
        #print(f"TrueModel Fit Done in {epoch} epochs!")

    def sample(self, s, x, scale=0.8):
        sx = to_tensor(s, x)
        prob = self(sx)
        y = torch.bernoulli(prob * scale)
        return y.detach().cpu().numpy()

In [None]:
def demographic_parity(sensi, pred_y):

    s0 = sum(sensi.squeeze() == 0)
    s1 = sum(sensi.squeeze() == 1)
    y0 = sum(pred_y.squeeze() == 0)
    y1 = sum(pred_y.squeeze() == 1)
    y1_s0 = sum(pred_y[sensi.squeeze() == 0].squeeze() == 1) / s0
    y1_s1 = sum(pred_y[sensi.squeeze() == 1].squeeze() == 1) / s1
    print(f"#(S=0): {s0}, #(S=1): {s1}, #(y0): {y0}, #(y1): {y1}, P(y=1|s=0)={y1_s0:.3f}, P(y=1|s=1)={y1_s1:.3f}")
    return y1_s1 - y1_s0

In [None]:
def count_time(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time()
        result = func(*args, **kwargs)
        end_time = time()
        duration = end_time - start_time
        print(f"Time: {duration:5.2f}s")
        return result
    return wrapper

class Classifier(nn.Module):

    def __init__(self, hiddens, dropout_prob = 0.2):
        super().__init__()

        layers = []
        for in_dim, out_dim in zip(hiddens[:-1], hiddens[1:]):
            layers.append(nn.Linear(in_dim, out_dim))
            layers.append(nn.ReLU(inplace=True))
            layers.append(nn.Dropout(p=dropout_prob))
        layers.pop()
        layers.append(nn.Sigmoid())

        self.model = nn.Sequential(*layers)
        self.loss_fn = nn.BCELoss()
        self.optim = Adam(self.parameters())

        self.dropout_prob = dropout_prob

    def get_params(self):
        params = []
        for param in self.parameters():
            params.append(param.detach().cpu().flatten().numpy())
        return np.hstack(params)

    def forward(self, s_mb, x_mb, num_samples = 10):
        pred = []

        for _ in range(num_samples):
            sx_mb = torch.cat([s_mb, x_mb], dim=1)
            output = self.model(sx_mb)
            pred.append(output.unsqueeze(0))  # Add the unsqueezed prediction
        return  torch.mean(torch.cat(pred, dim=0), dim=0)

    def predict(self, s_mb, x_mb):
        probs = self(s_mb, x_mb)
        pred_y = probs.detach().round().cpu().numpy()
        return pred_y

    def sample(self, s, x, scale=1.0):
        prob = self(s, x)
        y = torch.bernoulli(prob * scale)
        return y.detach().cpu().numpy()

    # @count_time
    def fit(self, loader, valid_loader, save_path, device, max_epochs = 300, patience=20):
        epoch, counter = 0, 0
        best_loss = float('inf')

        while epoch < max_epochs:
            loss = 0.
            for s_mb, x_mb, y_mb in loader:
                s_mb = s_mb.to(device)
                x_mb = x_mb.to(device)
                y_mb = y_mb.to(device)

                batch_loss = 0.
                for i in range(x_mb.size(1)):
                    pred_y_mb = self(s_mb, x_mb[:, i])
                    batch_loss += self.loss_fn(pred_y_mb, y_mb[:, i])
                loss += batch_loss.item()

                self.optim.zero_grad()
                batch_loss.backward()
                self.optim.step()

            epoch += 1
            valid_loss = self.eval(valid_loader, device)
            if valid_loss <= best_loss:
                # torch.save(self.state_dict(), save_path)
                best_loss = valid_loss
                counter = 0
            else:
                counter += 1
                if counter == patience:
                    break
            
            if epoch == 1 or epoch % 100 == 0:
                print(f'{epoch:6.0f} | loss: {loss:6.4f}')
        print(f"Classifier Fit Done in {epoch} epochs!, Counter: {counter}")

    def eval(self, loader, device, verbose=False):
        loss = 0.
        for s_mb, x_mb, y_mb in loader:
            s_mb = s_mb.to(device)
            x_mb = x_mb.to(device)
            y_mb = y_mb.to(device)

            batch_loss = 0.
            for i in range(x_mb.size(1)):
                s_mb = s_mb.to(device)
                x_mb = x_mb.to(device)
                y_mb = y_mb.to(device)

                pred_y_mb = self(s_mb, x_mb[:, i].to(device))
                batch_loss += self.loss_fn(pred_y_mb, y_mb[:, i])
                loss += batch_loss.item()

                if verbose:
                    pred_y_mb = self.predict(s_mb, x_mb[:, i])
                    true_y_mb = y_mb[:, i].cpu().numpy()
                    s_mb_np = s_mb.cpu().numpy()

                    acc = metrics.accuracy_score(true_y_mb, pred_y_mb) * 100
                    fair = demographic_parity(s_mb_np, pred_y_mb)
                    print(f"Step: {i:6.0f}, ACC: {acc:6.2f}%, FAIR: {fair:6.2f}\n")
        return loss

In [None]:
class NaiveCLF(nn.Module):
    def __init__(self, x_dim):
        super().__init__()
        self.linear = nn.Linear(x_dim, 1)
        self.loss_fn = nn.BCELoss()
        self.optim = Adam(self.parameters())

    def forward(self, s, x):
        return torch.round(torch.sigmoid(self.linear(x)))
    
    def prob(self, s, x):
        return torch.sigmoid(self.linear(x))
    
    def fit(self, s, x, y, patience=10):
        x = tensor(x)
        y = tensor(y)

        epoch, counter = 0, 0
        best_loss = float('inf')
        while True:
            pred = self(s, x)
            loss = self.loss_fn(pred, y)

            self.optim.zero_grad()
            loss.backward()
            self.optim.step()
            
            epoch += 1
            if epoch % 100 == 0:
                print(f"Epoch: {epoch}, Loss: {loss.item()}")

            if loss.item() < best_loss:
                best_loss = loss.item()
                counter = 0
            else:
                counter += 1
                if counter == patience or epoch >= 5000:
                    
                    break
#         print(f"TrueModel Fit Done in {epoch} epochs!")

In [None]:
def compute_distance_loss(data, check_divergence = False):
    pos_data = data[data[:, 0] == 1, 1:-1]
    neg_data = data[data[:, 0] == 0, 1:-1]
    w1 = torch.ones(len(pos_data)) / len(pos_data)
    w2 = torch.ones(len(neg_data)) / len(neg_data)
    M = ot.dist(pos_data, neg_data)
    loss = sinkhorn_distance(pos_data, neg_data, check_divergence)
    # loss = ot.sinkhorn2(w1, w2, M, 0.1)
    return loss


def sinkhorn_distance(data1, data2, check_divergence, epsilon = 0.1, num_iters = 100, tol=1e-3):
    n, m = data1.shape[0], data2.shape[0]
    a = torch.ones(n) / n
    b = torch.ones(m) / m
    a.to(device)
    b.to(device)

    C = torch.cdist(data1, data2, p=2) ** 2
    C = C / C.max()

    K = torch.exp(-C / epsilon) 
    K.to(device)

    u = torch.ones(n).to(device)
    v = torch.ones(m).to(device)

    for i in range(num_iters):
        
        u_prev = u.clone()
        v_prev = v.clone()
        u = a.to(device) / (K.to(device) @ v.to(device) + 1e-8)
        v = b.to(device) / (torch.transpose(K, 0, 1).to(device) @ u.to(device) + 1e-8)

        if check_divergence:
            u_change = torch.norm(u - u_prev)
            v_change = torch.norm(v - v_prev)
            if u_change < tol and v_change < tol:
                print(f"Converged in {i} iterations")
                break

    plan = torch.diag(u) @ K @ torch.diag(v)
    distance = torch.sum(plan * C).to(device)
    return distance

In [None]:
def gen_gaussian(sample_size, seed=0):

    torch.manual_seed(seed)
    np.random.seed(seed)
    
    mu0, sigma0 = [-1, -1], [[5, 1], [1, 5]]
    mu1, sigma1 = [1, 1], [[5, 1], [1, 5]]
    s0 = np.ones(sample_size, dtype=float) * 0
    X0 = np.random.multivariate_normal(mean=mu0, cov=sigma0, size=sample_size)
    s1 = np.ones(sample_size, dtype=float) * 1
    X1 = np.random.multivariate_normal(mean=mu1, cov=sigma1, size=sample_size)

    s = np.concatenate((s0, s1))
    X = np.vstack((X0, X1))
    perm = list(range(int(2*sample_size)))
    np.random.shuffle(perm)
    s = s[perm]
    X = X[perm]
    y=np.random.binomial(1, 1/(1+np.exp(-(X[:,0]+2*X[:,1]-1))))

    return s, X, y

def sequential_data(s0, x0, y0, seq_len, l=0.5, noise_factor = 1, b = 0.5, seed=0, ground_truth = True):
    torch.manual_seed(seed)
    np.random.seed(seed)
    l_og = l
    n = len(s0)
    model = NaiveCLF(x0.shape[1])
    x_dim = x0.shape[1]
    hiddens = [x_dim + 1, 32, 64, 1]
    true_model = TrueModel(hiddens)
    true_model.fit(s0.reshape(-1,1), x0, y0.reshape(-1,1))
    x0 = torch.from_numpy(x0).to(dtype=torch.float32)
    x0.requires_grad = True
    y0 = torch.from_numpy(y0).to(dtype=torch.float32)
    model.fit(s0, x0, y0.view(n, 1))
    theta_t = abs(model.optim.param_groups[0]['params'][-1].item())

    
    x = torch.empty(n, seq_len, x0.shape[1])
    y= torch.empty(n, seq_len, 1)
    x[:, 0, :] = x0
    y[:, 0, :] = y0.view(n, 1)

    prevy = y0
    
    for i in range(1, seq_len):
        yhat = model(s0, x[:,i-1,:])
        for j in range(n):
            
            if yhat[j] == 1 and prevy[j] == 0:
                l = -l_og
            elif yhat[j] == 1 and prevy[j] == 1:
                l = l_og
            else:
                l = 0
            for k in range(x0.shape[1]):
                x[j, i, k] = np.random.randn()*noise_factor + x[j, i-1, k] + l*theta_t + s0[j]*b + (1-s0[j])*0.1
        if ground_truth:
            y[:, i, :] = torch.bernoulli((1 /(1+  torch.exp(-(x[:,i,0]+3*x[:,i,1]-1))))).view(n, 1)
        else:
            y[:, i, :] = tensor(true_model.sample(tensor(s0.reshape(-1,1)), x[:, i, :]))
        prevy = y[:, i, :]
        

    # x = np.array(x, dtype=np.float32).reshape((n, seq_len, 2))
    #y = np.array(y, dtype=np.int32).reshape(n, seq_len, 1)
    return x, y, model

s0, x0, y0 = gen_gaussian(int(n/2), seed=52)
x, y, unfair_clf = sequential_data(s0, x0, y0, seq_len, l=epsilon, noise_factor=0.1, b=b, seed=52, ground_truth=False)

In [None]:
def sample_taiwan(file_path, n, seq_len, seed=0):

    df = pd.read_excel(file_path, header=1)
    torch.manual_seed(seed)
    np.random.seed(seed)
    df = df[(df['PAY_AMT1'] < 10000) & (df['PAY_AMT1'] > 10)]
    df = df[(df['PAY_AMT2'] < 10000) & (df['PAY_AMT2'] > 10)]
    df = df[(df['PAY_AMT3'] < 10000) & (df['PAY_AMT3'] > 10)]

    label0 = df[(df['SEX'] == 1)].sample(n=n, replace=False, random_state=seed)
    label1 = df[(df['default payment next month'] == 1 ) & (df['SEX'] == 2)].sample(n=int(n*0.6), replace=False, random_state=seed)
    label2 = df[(df['default payment next month'] == 0 ) & (df['SEX'] == 2)].sample(n=n-int(n*0.6), replace=False, random_state=seed)

    df = pd.concat([label0, label1, label2], axis=0)
    X = df.iloc[:, 18:20].apply(lambda x: 3 * (x - x.mean()) / (x.max() - x.min()))
    Z = df.iloc[:, 1:2].apply(lambda x: 3 * (x - x.mean()) / (x.max() - x.min()))
    S = df['SEX'] - 1
    Y = df['default payment next month'].replace({0: 1, 1: 0})
    data = pd.concat([S, Z, X, Y], axis=1)

    data = data.rename(columns={'default payment next month': 'y'})
    s0 = S.to_numpy()
    x0 = pd.concat([Z, X], axis=1).to_numpy()
    y0 = Y.to_numpy()
    _, _, unfair_clf = sequential_data(s0, x0, y0, seq_len, l=0.1, noise_factor=0.01, b = 0.01, seed=seed, ground_truth=False)

    return s0, x0, y0, unfair_clf

In [None]:
data_path = Path('../data')
file_path = '/home/fagumuci/Long-Term-EI/Long-Term-Equal-Improvability/SimLoan/data/default of credit card clients.xls'
df = pd.read_excel(file_path, header=1)
df = df[(df['PAY_AMT1'] < 10000) & (df['PAY_AMT1'] > 10)]
df = df[(df['PAY_AMT2'] < 10000) & (df['PAY_AMT2'] > 10)]
df = df[(df['PAY_AMT3'] < 10000) & (df['PAY_AMT3'] > 10)]

label0 = df[(df['default payment next month'] == 1) & (df['SEX'] == 1)].sample(n=int(n/4), replace=False, random_state=2021)
label1 = df[(df['default payment next month'] == 0) & (df['SEX'] == 1)].sample(n=int(n/4), replace=False, random_state=2021)
label2 = df[(df['default payment next month'] == 1) & (df['SEX'] == 2)].sample(n=int(n/4), replace=False, random_state=2021)
label3 = df[(df['default payment next month'] == 0) & (df['SEX'] == 2)].sample(n=int(n/4), replace=False, random_state=2021)

df = pd.concat([label0, label1, label2, label3], axis=0)
X = df.iloc[:, 18:20].apply(lambda x: 3 * (x - x.mean()) / (x.max() - x.min()))
Z = df.iloc[:, 1:2].apply(lambda x: 3 * (x - x.mean()) / (x.max() - x.min()))
S = df['SEX'] - 1
Y = df['default payment next month'].replace({0: 1, 1: 0})
data = pd.concat([S, Z, X, Y], axis=1)

data = data.rename(columns={'default payment next month': 'y'})


In [None]:
data_path = Path('../data')
file_path = '/home/fagumuci/Long-Term-EI/Long-Term-Equal-Improvability/SimLoan/data/default of credit card clients.xls'
s0, x0, y0, unfair_clf = sample_taiwan(file_path, int(n/2), seq_len, seed=52)
x, y, unfair_clf = sequential_data(s0, x0, y0, seq_len, l=epsilon, noise_factor=0.01, b = b, seed=52, ground_truth=False)

In [None]:
x = x.detach().numpy()
y = y.detach().numpy()
y = y.astype(int)
s0 = np.array(s0).reshape(len(s0),1)
s_train, s_test, x_train, x_test, y_train, y_test = train_test_split(s0, x, y, test_size=test_size, random_state=10)
s_train, s_valid, x_train, x_valid, y_train, y_valid = train_test_split(s_train, x_train, y_train, test_size=valid_size, random_state=10)
print(x_train.shape)


In [None]:
class Generator(nn.Module):

    def __init__(self, in_size, hidden_size, num_layers):
        super().__init__()

        self.num_layers = num_layers
        self.h0_linear = nn.Linear(in_size, hidden_size)
        self.rnn = nn.GRU(in_size + 3, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, in_size)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x0, noise, s0, clf):
        ss = torch.clone(s0)
        ss = ss.to(x0.device)
        s0 = torch.zeros(s0.size(0), 2).to(device).scatter_(1, s0.long(), torch.ones_like(s0))
        s0 = s0.to(x0.device)

        h0 = self.h0_linear(x0)
        h0 = h0.unsqueeze(0).repeat(self.num_layers, 1, 1)
        yt = clf(ss, x0)
        
        xs, ys = [x0], [yt]
        for i in range(noise.size(1)):
            
            y_noise = torch.cat([s0, yt, noise[:, i]], dim=-1).unsqueeze(1)
            output, h0 = self.rnn(y_noise, h0)
            # xt = self.sigmoid(self.linear(output).squeeze())
            xt = self.linear(output).squeeze()
            yt = clf(ss, xt)

            xs.append(xt)
            ys.append(yt)

        xs = torch.stack(xs, dim=1)
        ys = torch.stack(ys, dim=1)
        return xs, ys, ys.round().detach()
    
class Discriminator(nn.Module):

    def __init__(self, in_size, hidden_size, num_layers):
        super().__init__()

        self.rnn = nn.GRU(in_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        output, hn = self.rnn(x)
        output = self.linear(output)
        output = self.sigmoid(output)
        return output

class DistributionDiscriminator(nn.Module):

    def __init__(self, hiddens):
        super().__init__()

        layers = []
        for in_dim, out_dim in zip(hiddens[:-1], hiddens[1:]):
            layers.append(nn.Linear(in_dim, out_dim))
            layers.append(nn.LeakyReLU(0.2))
        layers.pop()
        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        output = self.model(x)
        return output

In [None]:
generator = Generator(x_dim, g_hidden_size, g_num_layers)
generator.to(device)
discriminator = Discriminator(x_dim, d_hidden_size, d_num_layers)
discriminator.to(device)
clf = Classifier(c_hiddens, dropout_prob=0.2)
clf.to(device)

In [None]:
from torch.optim import Adam, RMSprop
from torch.utils.data import DataLoader, TensorDataset


def train_discriminator(clf, G, D, optim, loss_fn, xs, zs, ss):
    xs_fake, _, _ = G(xs[:, 0], zs, ss, clf)
    fake = D(xs_fake.detach())
    loss_fake = loss_fn(fake, torch.zeros_like(fake))

    real = D(xs)
    loss_real = loss_fn(real, torch.ones_like(real))

    loss = loss_fake + loss_real

    optim.zero_grad()
    loss.backward()
    optim.step()

    return loss


def get_moment_loss(x_pred, x_true):
    m1 = torch.mean(torch.abs(x_pred.mean(dim=0) - x_true.mean(dim=0)))
    m2 = torch.mean(torch.abs(
        torch.sqrt(x_pred.var(dim=0, unbiased=False) + 1e-6) -
        torch.sqrt(x_true.var(dim=0, unbiased=False) + 1e-6)
    ))
    return m1 + m2


def train_generator(clf, G, D, optim, loss_fn, xs, zs, ss, gamma=100):
    xs_fake, _, _ = G(xs[:, 0], zs, ss, clf)
    fake = D(xs_fake)

    loss1 = loss_fn(fake, torch.ones_like(fake))
    loss2 = get_moment_loss(xs_fake, xs)
    loss = loss1 + gamma * loss2

    optim.zero_grad()
    loss.backward()
    optim.step()
    
    return loss, loss2

#@count_time
def train_gan(loader, clf, G, D, n_epochs, device):
    g_optim = Adam(G.parameters())
    d_optim = Adam(D.parameters())
    loss_fn = nn.BCELoss()
    hist_mmt = np.empty(0)

    for epoch in range(n_epochs):
        
        for i, (s_mb, x_mb, y_mb) in enumerate(loader, start=1):
            batch, seq, dim = x_mb.size()
            x_mb = x_mb.to(device)
            z_mb = torch.rand(batch, seq-1, dim).to(device)
            s_mb = s_mb.to(device)
            y_mb = y_mb.to(device)

            for _ in range(2):
                g_loss, mmt_loss = train_generator(clf, G, D, g_optim, loss_fn, x_mb, z_mb, s_mb)
                hist_mmt = np.append(hist_mmt, float(mmt_loss))

            for _ in range(1):
                d_loss = train_discriminator(clf, G, D, d_optim, loss_fn, x_mb, z_mb, s_mb)

            step = epoch * len(loader) + i
            if step % 1000 == 0:
                print(f'Epoch: {epoch: 6.0f} | step: {step:6.0f} | d_loss: {d_loss:6.4f} | g_loss: {g_loss: 6.4f} | mmt_loss: {mmt_loss:6.4f}')

    hist_mmt = np.asarray(hist_mmt)
    plt.plot(np.arange(0, len(hist_mmt), 1), hist_mmt)
    # plt.plot(np.arange(0, len(hist_mmt) + 1, 1), np.arange(0, len(hist_mmt) + 1, 1))

def generate_dataset_from_gan(loader, clf, G, device, extra_seq=0):
    gen_s, gen_x, gen_y = [], [], []

    batch_size = None
    for s_mb, x_mb, y_mb in loader:
        batch, seq_len, x_dim = x_mb.shape
        if batch_size is None:
            batch_size = batch

        x_mb = x_mb.to(device)
        z_mb = torch.randn(batch, seq_len + extra_seq - 1, x_dim).to(device)

        gen_x_mb, _, gen_y_mb = G(x_mb[:, 0], z_mb, s_mb, clf)
        
        gen_s.append(s_mb)
        gen_x.append(gen_x_mb)
        gen_y.append(gen_y_mb)

    gen_s = torch.cat(gen_s, dim=0).detach().cpu().numpy()
    gen_x = torch.cat(gen_x, dim=0).detach().cpu().numpy()
    gen_y = torch.cat(gen_y, dim=0).detach().cpu().numpy()

    gen_data = TensorDataset(tensor(gen_s), tensor(gen_x), tensor(gen_y))
    gen_loader = DataLoader(gen_data, batch_size=batch_size, shuffle=False)

    return gen_loader, gen_s, gen_x, gen_y

In [None]:
train_data = TensorDataset(tensor(s_train).to(device), tensor(x_train).to(device), tensor(y_train).to(device))
valid_data = TensorDataset(tensor(s_valid).to(device), tensor(x_valid).to(device),tensor(y_valid).to(device))
test_data = TensorDataset(tensor(s_test).to(device), tensor(x_test).to(device), tensor(y_test).to(device))
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)
valid_loader = DataLoader(valid_data, batch_size=len(valid_data), shuffle=False)
test_loader = DataLoader(test_data, batch_size=len(test_data), shuffle=False)

In [None]:
# clf.cuda()
if clf_path.exists():
    clf.load_state_dict(torch.load(clf_path, map_location=device))
else:
    clf.fit(train_loader, valid_loader, clf_path, device)
    torch.save(clf.state_dict(), clf_path)

In [None]:
clf.eval(test_loader, device)

In [None]:
if gan_path.exists():
    generator.load_state_dict(torch.load(gan_path, map_location=device))
else:
    train_gan(train_loader, unfair_clf.to(device), generator, discriminator, gan_epochs, device)
    torch.save(generator.state_dict(), gan_path)

In [None]:
gen_train_loader, gen_train_s, gen_train_x, gen_train_y = generate_dataset_from_gan(train_loader, unfair_clf.to(device), generator, device)
gen_valid_loader, gen_valid_s, gen_valid_x, gen_valid_y = generate_dataset_from_gan(valid_loader, unfair_clf.to(device), generator, device)
gen_test_loader, gen_test_s, gen_test_x, gen_test_y = generate_dataset_from_gan(test_loader, unfair_clf.to(device), generator, device)

In [None]:
s = np.vstack([s_train, s_test, s_valid])
x = np.vstack([x_train, x_test, x_valid])
y = np.vstack([y_train, y_test, y_valid])

In [None]:
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

In [None]:
s = np.vstack([s_train, s_test, s_valid])
x = np.vstack([x_train, x_test, x_valid])
y = np.vstack([y_train, y_test, y_valid])
gan_output = np.empty((0, x_train.shape[2] + 2))

for i in range(seq_len):
    temp = np.hstack([s, x[:, i, :], y[:, i, :]])
    gan_output = np.vstack([gan_output, temp])

df = pd.DataFrame(gan_output)
df = df.set_axis(['SEX', 'LIMIT_BAL', 'PAY_AMT1', 'PAY_AMT2', 'Y'], axis=1)
df.to_csv('taiwan.csv')

In [None]:
pairs = np.empty((0, int(x_dim*2 + 3)))

for i in range(x.shape[0]):
    temp_pair = np.empty((1, int(x_dim*2 + 3)))
    temp_pair[0, 0] = s0[i]
    for j in range(x.shape[1] - 1):
        for k in range(x[0][0].shape[0]):
            temp_pair[0, 1+k] = x[i, j, k]
        temp_pair[0, 1+x_dim] = y[i, j][0]
        for k in range(x[0][0].shape[0]):
            temp_pair[0, 2 + x.shape[2] + k] = x[i, j+1, k]
        temp_pair[0, - 1] = y[i, j+1][0]
        pairs = np.vstack([pairs, temp_pair])
        
df = pd.DataFrame(pairs)
df = df.set_axis(['SEX', 'LIMIT_BAL_1', 'PAY_AMT1_1', 'PAY_AMT2_1', 'Y_1', 'LIMIT_BAL_2', 'PAY_AMT1_2', 'PAY_AMT2_2', 'Y_2'], axis=1)
df.to_csv('taiwan.csv')   
len(pairs)

In [None]:
import os
from datasets.toy import ToySCM
from datasets.taiwan import TaiwanSCM
from models.vaca import VACA
import utils.args_parser  as argtools
from data_modules.het_scm import HeterogeneousSCMDataModule

scm = TaiwanSCM(None, 'train', len(pairs))
model_file = os.path.join('_params', 'model_vaca.yaml')
trainer_file = os.path.join('_params', 'trainer.yaml')

cfg = argtools.parse_args(model_file)
cfg.update(argtools.parse_args(trainer_file))

cfg['dataset'] = {
    'name': 'taiwan',
    'params1': {},
    'params2': {}
}

cfg['dataset']['params1']['batch_size'] = batch_size
cfg['dataset']['params1']['num_samples_tr'] = len(pairs)
cfg['dataset']['params1']['num_workers'] = 0
cfg['dataset']['params1']['equations_type'] = 'non-linear'
cfg['dataset']['params1']['normalize'] = 'lik'
cfg['dataset']['params1']['lambda_'] = 0.05
cfg['dataset']['params1']['data_dir'] = '../Data'
cfg['dataset']['params1']['device'] = device
cfg['dataset']['params1']['dataset_name'] = 'taiwan'

dataset_params = cfg['dataset']['params1']
data_module = HeterogeneousSCMDataModule(**dataset_params)

data_module.train_dataset._create_data()
data_module.valid_dataset._create_data()
data_module.test_dataset._create_data()
data_module.prepare_data()
data_module.train_dataloader()
data_module.test_dataloader()
data_module.val_dataloader()


cfg['model']['params']['is_heterogeneous'] = scm.is_heterogeneous
cfg['model']['params']['likelihood_x'] = scm.likelihood_list

cfg['model']['params']['num_nodes'] = scm.num_nodes
cfg['model']['params']['edge_dim'] = scm.edge_dimension

model_params = cfg['model']['params']

model_vaca = VACA(**model_params)
model_vaca.set_random_train_sampler(data_module.get_random_train_sampler())

model_vaca.set_optim_params(optim_params=cfg['optimizer'],
                            sched_params=cfg['scheduler'])

In [None]:
data_module.train_dataset

In [None]:
from models._evaluator import MyEvaluator

evaluator = MyEvaluator(model=model_vaca,
                        intervention_list=data_module.train_dataset.get_intervention_list(),
                        scaler=data_module.scaler
                        )
model_vaca.set_my_evaluator(evaluator=evaluator)

assert evaluator is not None
is_training = train_vaca
del cfg['trainer']['progress_bar_refresh_rate']
del cfg['trainer']['flush_logs_every_n_steps']
del cfg['trainer']['terminate_on_nan']
del cfg['trainer']['auto_select_gpus']
del cfg['trainer']['weights_summary']
cfg['trainer']['enable_model_summary'] = False
del cfg['trainer']['gpus']
del cfg['trainer']['track_grad_norm']


In [None]:
from pytorch_lightning.loggers.tensorboard import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import pytorch_lightning as pl

yaml_file = ''
if yaml_file == '':
        save_dir = argtools.mkdir(os.path.join(cfg['root_dir'],
                                               argtools.get_experiment_folder(cfg),
                                               str(cfg['seed'])))
else:
        save_dir = os.path.join(yaml_file.split('/')[:-1])
print(f'Save dir: {save_dir}')
# trainer = pl.Trainer(**cfg['model'])
logger = TensorBoardLogger(save_dir=save_dir, name='logs', default_hp_metric=False)
out = logger.log_hyperparams(argtools.flatten_cfg(cfg))

save_dir_ckpt = argtools.mkdir(os.path.join(save_dir, 'ckpt'))
ckpt_file = argtools.newest(save_dir_ckpt)
callbacks = []
if is_training == 1:
    
    checkpoint = ModelCheckpoint(monitor=model_vaca.monitor(),
                                     mode=model_vaca.monitor_mode(),
                                     save_top_k=1,
                                     save_last=True,
                                     filename='checkpoint-{epoch:02d}',
                                     dirpath=save_dir_ckpt)

    callbacks = [checkpoint]

    if cfg['early_stopping']:
            early_stopping = EarlyStopping(model_vaca.monitor(), mode=model_vaca.monitor_mode(), min_delta=0.0,
                                           patience=50)
            callbacks.append(early_stopping)

    if ckpt_file is not None:
            print(f'Loading model training: {ckpt_file}')
            trainer = pl.Trainer(logger=logger, callbacks=callbacks, **cfg['trainer'], devices='auto', accelerator='cpu')
    else:

            trainer = pl.Trainer(logger=logger, callbacks=callbacks, **cfg['trainer'], devices='auto', accelerator='cpu')

    trainer.fit(model=model_vaca, datamodule=data_module)
    trainer.validate(ckpt_path=ckpt_file, dataloaders=data_module.val_dataloader())
    trainer.test(ckpt_path=ckpt_file, dataloaders=data_module.test_dataloader())
    argtools.save_yaml(cfg, file_path=os.path.join(save_dir, 'hparams_full.yaml'))
    
else:
        model_vaca = VACA.load_from_checkpoint(ckpt_file, **model_params)

In [None]:

evaluator.set_model(model_vaca)
model_vaca.set_my_evaluator(evaluator=evaluator)


In [None]:
model_parameters = filter(lambda p: p.requires_grad, model_vaca.parameters())
params = int(sum([np.prod(p.size()) for p in model_parameters]))
model_vaca.eval()
model_vaca.freeze()

In [None]:
def p_to_csv(s, x, y, dataset):
    if isinstance(s, torch.Tensor):
        s = s.cpu().detach().numpy()
    if isinstance(x, torch.Tensor):
        x = x.cpu().detach().numpy()
    if isinstance(y, torch.Tensor):
        y = y.cpu().detach().numpy()
    
    if dataset == 'toy':
        pairs = np.empty((0, 7))
        for i in range(x.shape[0]):
            temp = np.empty((1, x.shape[2]*2 + 3))
            temp[0, 0] = s[i]
            for j in range(x.shape[1] - 1):
                for k in range(x.shape[2]):
                    temp[0, 1+k] = x[i, j, k]
                temp[0, 1+x.shape[2]] = y[i, j][0]
                for k in range(x.shape[2]):
                    temp[0, 2 + x.shape[2] + k] = x[i, j+1, k]
                temp[0, -1] = y[i, j+1][0]
            pairs = np.vstack([pairs, temp])
        df = pd.DataFrame(pairs)
        df = df.set_axis(['s', 'x1', 'z1', 'y1', 'x2', 'z2', 'y2'], axis=1)
        df.to_csv('steps.csv')
        return pairs
    elif dataset == 'taiwan':
        pairs = np.empty((0, 9))
        for i in range(x.shape[0]):
            temp = np.empty((1, x.shape[2]*2 + 3))
            temp[0, 0] = s[i]
            for j in range(x.shape[1] - 1):
                for k in range(x.shape[2]):
                    temp[0, 1+k] = x[i, j, k]
                temp[0, 1+x.shape[2]] = y[i, j][0]
                for k in range(x.shape[2]):
                    temp[0, 2 + x.shape[2] + k] = x[i, j+1, k]
                temp[0, -1] = y[i, j+1][0]
            pairs = np.vstack([pairs, temp])

        df = pd.DataFrame(pairs)
        df = df.set_axis(['SEX', 'LIMIT_BAL_1', 'PAY_AMT1_1', 'PAY_AMT2_1', 'Y_1', 'LIMIT_BAL_2', 'PAY_AMT1_2', 'PAY_AMT2_2', 'Y_2'], axis=1)
        df.to_csv('taiwan.csv')    
        return pairs   

In [None]:
def vaca_dataset(batch_size, pairs, device, dataset):
    model_file = os.path.join('_params', 'model_vaca.yaml')

    cfg = argtools.parse_args(model_file)    
    cfg['dataset'] = {
        'name': dataset,
        'params1': {},
        'params2': {}
    }

    cfg['dataset']['params1']['batch_size'] = batch_size
    cfg['dataset']['params1']['num_samples_tr'] = len(pairs)
    cfg['dataset']['params1']['num_workers'] = 0
    cfg['dataset']['params1']['equations_type'] = 'non-linear'
    cfg['dataset']['params1']['normalize'] = 'lik'
    cfg['dataset']['params1']['lambda_'] = 0.05
    cfg['dataset']['params1']['data_dir'] = '../Data'
    cfg['dataset']['params1']['device'] = device
    cfg['dataset']['params1']['dataset_name'] = dataset

    dataset_params = cfg['dataset']['params1']
    data_module = HeterogeneousSCMDataModule(**dataset_params)

    data_module.train_dataset._create_data()
    data_module.valid_dataset._create_data()
    data_module.test_dataset._create_data()
    data_module.total_dataset._create_data()
    data_module.prepare_data()
    data_module.train_dataloader()
    data_module.test_dataloader()
    data_module.val_dataloader()
    data_module.total_dataloader()
    return data_module

In [None]:

def intervention(nodes, values):
    inter = {}
    for i in range(len(nodes)):
        inter[nodes[i]] = values[i].item()
    return inter

def logistic(x):
    return torch.log(1 + torch.exp(x))


In [None]:
class Generator2(nn.Module):

    def __init__(self, in_size, hidden_size, num_layers):
        super().__init__()

        self.num_layers = num_layers
        self.h0_linear = nn.Linear(in_size, hidden_size)
        self.rnn = nn.GRU(in_size + 3, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, in_size)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x0, noise, s0, clf):
        ss = torch.clone(s0)
        ss = ss.to(x0.device)
        s0 = torch.zeros(s0.size(0), 2).to(x0.device).scatter_(1, s0.long(), torch.ones_like(s0, device=x0.device))
        s0 = s0.to(x0.device)

        h0 = self.h0_linear(x0)
        h0 = h0.unsqueeze(0).repeat(self.num_layers, 1, 1)
        yt = clf(ss, x0)
        
        xs, ys = [x0], [yt]
        for i in range(noise.size(1)):
            
            y_noise = torch.cat([s0, yt, noise[:, i]], dim=-1).unsqueeze(1)
            output, h0 = self.rnn(y_noise, h0)
            # xt = self.sigmoid(self.linear(output).squeeze())
            xt = self.linear(output).squeeze()
            yt = clf(ss, xt)

            xs.append(xt)
            ys.append(yt)

        xs = torch.stack(xs, dim=1)
        ys = torch.stack(ys, dim=1)
        return xs, ys, ys.round()
    

In [None]:
generator = Generator2(x_dim, g_hidden_size, g_num_layers)
generator.to(device)
generator.load_state_dict(torch.load(gan_path, map_location=device))

In [None]:
def intervention_step(loader, vaca, intervention_nodes, e, nodes):
    k = 0
    x_gan_input_list = []

    for batch in loader:

            batch.x_i = batch.x + e[k].to(batch.x.device).view(batch.x.shape)
            
        
            z_f, _ = vaca.model.encoder(batch.x, batch.edge_index, edge_attr=batch.edge_attr, return_mean=True, node_ids=batch.node_ids)
            z_cf_I, _ = vaca.model.encoder(batch.x_i, batch.edge_index_i, edge_attr=batch.edge_attr_i,return_mean=True, node_ids=batch.node_ids)

            z_factual = z_f.reshape(1, -1)
            z_cf_I = z_cf_I.reshape(1, -1)

            z_dec = z_factual
            z_dim = vaca.z_dim

            for node_name in intervention_nodes:
                node_idx = loader.dataset.nodes_list.index(node_name)
                z_dec[:, z_dim * node_idx:z_dim * (node_idx + 1)] = z_cf_I[:, z_dim * node_idx: z_dim * (node_idx + 1)]

            z_dec = z_dec.reshape(-1, z_dim)

            x_CF, px_z = vaca.model.decoder(z_dec, batch.edge_index_i, edge_attr=batch.edge_attr_i, return_type='mean',  node_ids=batch.node_ids)
            adj = []
            for i in range(max(x_CF.shape)):
                if type(px_z.distributions[i]) == torch.distributions.normal.Normal:
                    adj.append(px_z.distributions[i].stddev)
                else:
                    adj.append(torch.tensor(0))

            x_CF+=(torch.tensor(adj)*torch.randn_like(x_CF))

            # x_CF = x_CF.view(len(nodes), -1)

            x_gan_input_list.append(x_CF[:,-int((len(nodes))/2):-1].clone())

            k+=1
            if k == len(loader):
                break

    x_gan_input = torch.stack(x_gan_input_list, dim=0)


    return x_gan_input

In [None]:
def intervene(datamodule, loader, vaca, generator, clf, efforts, seq_len, dataset, device):
    nodes = datamodule.train_dataset.nodes_list
    intervention_nodes = datamodule.train_dataset.nodes_to_intervene
    # _inter = intervention(intervention_nodes, efforts[0])
    s = tensor(loader.dataset.X[:,0]).view(len(loader.dataset), 1).to(device)
    x_gan_input = torch.empty(len(loader), int((len(nodes)-2)))
    y_pred = torch.empty(len(loader), 0).to(device)
    y_pred.requires_grad_(True)
    datamodule.batch_size = 1
    x_post_int = torch.empty(len(loader), seq_len, int(len(nodes)/2-1)).to(device)
    x_post_int[:, 0, :] = tensor(loader.dataset.X[:, 1:int(len(nodes)/2)]).clone().to(device)
    inter_index = []
    for n in range(len(nodes)):
        if nodes[n] in intervention_nodes:
            inter_index.append(n)

    t = torch.empty(len(loader), 0)
    for ns in inter_index:
        t = torch.cat([t, torch.ones((len(loader),1))*ns], dim=1)

    t = t.type(torch.int64)

    t = t.to(device)
    efforts = efforts.to(device)
    
    for i in range(0, seq_len-1):
        
        e = torch.zeros((len(loader), len(nodes))).to(device).scatter(1, t, efforts.view(len(loader), seq_len-1, len(intervention_nodes))[:, i, :]).to(device)

        datamodule.batch_size = 1

        x_gan_input = intervention_step(loader, vaca, intervention_nodes, e, nodes)

        z_mb = torch.randn(len(loader), 1, int((len(nodes)/2-1)))

        gen_x_mb, _, gen_y_mb = generator(x_gan_input.view(len(loader), int((len(nodes)/2-1))).to(device), z_mb.to(device), s.reshape(len(loader),1).to(device), clf.to(device))
        y_pred = torch.cat([y_pred, gen_y_mb[:,0].to(device)], 1)
        
        x_next_steps = gen_x_mb[:, -2:, :]
        y_next_steps = gen_y_mb[:, -2:, :]
        x_post_int[:, i, :] = gen_x_mb[:, 0, :].clone()

        data = torch.cat([s.to(device), x_post_int[:, i, :].to(device), gen_y_mb[:,0].to(device)], 1)
        
        p_to_csv(s, x_next_steps, y_next_steps, dataset)

        datamodule = vaca_dataset(1, y_next_steps, device, dataset)
        loader = datamodule.total_dataloader()
        loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
        loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
    
    x_post_int[:, 0, :] = tensor(loader.dataset.X[:, 1:int(len(nodes)/2)]).clone().to(device)

    return y_pred, x_post_int, data
        


In [None]:
class effortNN(nn.Module):
    def __init__(self, datamodule, loader, seq_len, device):
        super(effortNN, self).__init__()
        self.fc1 = nn.Linear(int(datamodule.num_nodes), 128, bias=True, device=device)
        self.ln1 = nn.LayerNorm(128)
        self.fc2 = nn.Linear(128, 64, bias=True, device=device)
        self.ln2 = nn.LayerNorm(64)
        self.fc3 = nn.Linear(64, int(len(datamodule.train_dataset.nodes_to_intervene) * int(seq_len-1)), device=device)
        layers = []
        layers.append(self.fc1)
        layers.append(self.ln1)
        layers.append(nn.ReLU())
        layers.append(self.fc2)
        layers.append(self.ln2)
        layers.append(nn.ReLU())
        layers.append(self.fc3)

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        efforts = self.fc3(x)

        return efforts
    

def budget_penalty(efforts, total_budget):
    b_loss = 0
    for i in range(efforts.shape[0]):
        b_loss += torch.relu(torch.sum(torch.abs(efforts[i])) - total_budget)
    return b_loss

def total_loss(efforts, total_budget, improvement_loss):
    budget_loss = budget_penalty(efforts, total_budget)
    t_loss = 100000*budget_loss + improvement_loss
    return t_loss

In [None]:
def compute_st_loss(s, y_pred):
    st_loss = 0.0
    for i in range(y_pred.shape[1]):
        y_neg = y_pred[s.squeeze() == 0, i]
        y_pos = y_pred[s.squeeze() == 1, i]
        p_y_pos = torch.sum(y_pos >= 0.5) / len(y_pos)
        p_y_neg = torch.sum(y_neg >= 0.5) / len(y_neg)
        st_loss += torch.abs(p_y_pos - p_y_neg)

    return st_loss/y_pred.shape[1]

def compute_post_long_cond_probs(s, Xs, Ys, clf):
    probs = {}
    
    for i in range(len(Xs)-1):
        
        XXs_comb = np.c_[s[s == 1], Xs[i][s == 1], Xs[i+1][s == 1]]
        Xs_comb = np.c_[s[s == 1], Xs[i][s == 1]]
        lr_up = LogisticRegression(random_state=2021).fit(XXs_comb, Ys[i][s == 1])
        lr_dn = LogisticRegression(random_state=2021).fit(Xs_comb, Ys[i][s == 1])
        probs_up = lr_up.predict_proba(XXs_comb)
        probs_dn = lr_dn.predict_proba(Xs_comb)
        probs[f'pos(y{i+1}=0)'] = probs_up[:, 0] / probs_dn[:, 0]
        probs[f'pos(y{i+1}=1)'] = probs_up[:, 1] / probs_dn[:, 1]


        XXs_comb = np.c_[s[s == 0], Xs[i][s == 0], Xs[i+1][s == 0]]
        Xs_comb = np.c_[s[s == 0], Xs[i][s == 0]]
        lr_up = LogisticRegression(random_state=2021).fit(XXs_comb, Ys[i][s == 0])
        lr_dn = LogisticRegression(random_state=2021).fit(Xs_comb, Ys[i][s == 0])
        probs_up = lr_up.predict_proba(XXs_comb)
        probs_dn = lr_dn.predict_proba(Xs_comb)
        probs[f'neg(y{i+1}=0)'] = probs_up[:, 0] / probs_dn[:, 0]
        probs[f'neg(y{i+1}=1)'] = probs_up[:, 1] / probs_dn[:, 1]
    return probs

def compute_lt_loss(s, y_pred, y0):

    y_neg = y_pred[(s.squeeze() == 0) & (y0.squeeze() == 0), -1]
    y_pos = y_pred[(s.squeeze() == 1) & (y0.squeeze() == 0), -1]
    p_y_neg = torch.sum(y_neg >= 0.5) / len(y_neg)
    p_y_pos = torch.sum(y_pos >= 0.5) / len(y_pos)
    lt_loss = torch.abs(p_y_pos - p_y_neg)
    
    return lt_loss

In [None]:
p_to_csv(s, x[:, 0:2, :], y[:, 0:2, :], 'taiwan')
data_module = vaca_dataset(batch_size, s, device, dataset='taiwan')
original_pairs = data_module.total_dataloader().dataset.X

In [None]:
class TemporalAttention(nn.Module):
    def __init__(self, hidden_size):
        super(TemporalAttention, self).__init__()
        self.attention_weights = nn.Linear(hidden_size, 1)

    def forward(self, lstm_out):
        """
        Args:
            lstm_out: Tensor of shape (batch_size, seq_len, hidden_size)
        Returns:
            context: Tensor of shape (batch_size, hidden_size), the weighted sum of LSTM outputs
            attention_scores: Tensor of shape (batch_size, seq_len), attention weights for each timestep
        """
        # Compute attention scores
        scores = self.attention_weights(lstm_out).squeeze(-1)  # Shape: (batch_size, seq_len)
        scores = torch.softmax(scores, dim=1)  # Normalize scores across timesteps

        # Compute context vector as weighted sum of LSTM outputs
        context = torch.sum(lstm_out * scores.unsqueeze(-1), dim=1)  # Shape: (batch_size, hidden_size)

        return context, scores

In [None]:
class effortLSTM(nn.Module):
    def __init__(self, datamodule, loader, seq_len, device):
        super(effortLSTM, self).__init__()

        self.seq_len = seq_len
        self.num_interventions = len(datamodule.train_dataset.nodes_to_intervene)
        self.hidden_size = 128

        self.lstm = nn.LSTM(int((datamodule.num_nodes) + self.num_interventions), self.hidden_size, num_layers=2, batch_first=True)
        self.attention = TemporalAttention(self.hidden_size)
        self.init_interventions = nn.Parameter(torch.zeros(1, self.num_interventions))
        
        self.fc = nn.Linear(self.hidden_size, self.num_interventions)

        self.to(device)

    def forward(self, x):

        batch_size = x.size(0)

        h_0 = torch.zeros(2, batch_size, self.hidden_size).to(x.device)
        c_0 = torch.zeros(2, batch_size, self.hidden_size).to(x.device)

        efforts_seq = []
        efforts = self.init_interventions.repeat(batch_size, 1)

        all_hidden_states = []
        x_squeezed = x.squeeze(1)
        
        for _ in range(seq_len-1):
            x_combined = torch.cat([x_squeezed, efforts], dim=-1)
            lstm_out, (h_0, c_0) = self.lstm(x_combined.unsqueeze(1), (h_0, c_0))
            all_hidden_states.append(lstm_out.squeeze(1))

            hidden_tensor = torch.stack(all_hidden_states, dim=1)
            context, attention_scores = self.attention(hidden_tensor)

            efforts = torch.nn.functional.relu(self.fc(context))
            efforts_seq.append(efforts)

        efforts_seq = torch.stack(efforts_seq, dim=1)

        return efforts_seq
    
class effortLSTM_clf(nn.Module):
    def __init__(self, datamodule, seq_len, clf, device):
        super(effortLSTM_clf, self).__init__()

        self.seq_len = seq_len
        self.num_interventions = len(datamodule.train_dataset.nodes_to_intervene)
        self.hidden_size = 128
        self.clf = clf

        for param in self.clf.parameters():
            param.requires_grad = False

        self.lstm = nn.LSTM(int((datamodule.num_nodes) + self.num_interventions), self.hidden_size, num_layers=2, batch_first=True)
        self.attention = TemporalAttention(self.hidden_size)

        self.fc = nn.Linear(self.hidden_size, self.num_interventions)
        self.init_interventions = nn.Parameter(torch.zeros(1, self.num_interventions))

        self.to(device)

    def forward(self, x):

        s0 = x[:, 0, 0].view(x.size(0), 1)

        batch_size = x.size(0)
        
        clf_outputs = self.clf(s0, x[:, 0, -4:-1])
        
        x_with_y = torch.cat([x[:,0,:-1], clf_outputs.view(batch_size,1)], dim=1)
        h_0 = torch.zeros(2, batch_size, self.hidden_size).to(x.device)
        c_0 = torch.zeros(2, batch_size, self.hidden_size).to(x.device)

        efforts = self.init_interventions.repeat(batch_size, 1)

        all_hidden_states = []

        efforts_seq = []
        
        for _ in range(seq_len-1):
            clf_outputs = self.clf(s0, x[:, 0, -4:-1])
            x_with_y = torch.cat([x[:,0,:-1], clf_outputs.view(batch_size,1), efforts], dim=1).unsqueeze(1)
            lstm_out, (h_0, c_0) = self.lstm(x_with_y, (h_0, c_0))

            all_hidden_states.append(lstm_out.squeeze(1))
            hidden_tensor = torch.stack(all_hidden_states, dim=1)
            context, attention_scores = self.attention(hidden_tensor)

            efforts = torch.nn.functional.relu(self.fc(context))
            efforts_seq.append(efforts)

        efforts_seq = torch.stack(efforts_seq, dim=1)

        return efforts_seq

In [None]:
def save_checkpoint(epoch, model, optimizer, best_loss, filepath):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'best_loss': best_loss
    }
    torch.save(checkpoint, filepath)
    print(f"Checkpoint saved at epoch {epoch}")

def load_checkpoint(filepath, model, optimizer):
    checkpoint = torch.load(filepath)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    best_loss = checkpoint['best_loss']
    epoch = checkpoint['epoch']
    print(f"Checkpoint loaded - Resuming from epoch {epoch}, Best Loss: {best_loss}")
    return epoch, best_loss

In [None]:
import yaml

def load_config(config_path="weights.yaml"):
    """Loads the YAML configuration file."""
    with open(config_path, "r") as file:
        return yaml.safe_load(file)

def get_loss_weights(model, dataset, budget, config):
    """Retrieves the correct loss weights based on model, budget, and dataset."""
    try:
        return config["weights"][model][dataset][budget]
    except KeyError:
        raise ValueError(f"Invalid combination: model={model}, dataset={dataset}, budget={budget}")

# Example Usage
config = load_config()


## Training

In [None]:
df = pd.DataFrame(t0)
df = df.set_axis(['SEX', 'LIMIT_BAL', 'PAY_AMT1', 'PAY_AMT2', 'Y'], axis=1)
df.to_csv('taiwan.csv')
data_module = vaca_dataset(1, t0, device, 'taiwan')
original_pairs = data_module.total_dataloader().dataset.X

In [None]:
import time
t0 = original_pairs
df = pd.DataFrame(t0)
df = df.set_axis(['SEX', 'LIMIT_BAL_1', 'PAY_AMT1_1', 'PAY_AMT2_1', 'Y_1', 'LIMIT_BAL_2', 'PAY_AMT1_2', 'PAY_AMT2_2', 'Y_2'], axis=1)
df.to_csv('taiwan.csv')   
data_module = vaca_dataset(1, t0, device, 'taiwan')
loader = data_module.train_dataloader()
data_module.batch_size = 1
loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
loader = data_module.train_dataloader()
loss_fn = torch.nn.MSELoss()
net = effortLSTM(data_module, loader, seq_len, device)
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)

s = tensor(loader.dataset.X[:,0]).to(device)
y0 = tensor(loader.dataset.X[:,x_dim+1]).to(device)

b_list = [1, 2, 3, 4]
config = load_config()
for b in b_list:
    budget = b*seq_len
    df = pd.DataFrame(t0)
    df = df.set_axis(['SEX', 'LIMIT_BAL_1', 'PAY_AMT1_1', 'PAY_AMT2_1', 'Y_1', 'LIMIT_BAL_2', 'PAY_AMT1_2', 'PAY_AMT2_2', 'Y_2'], axis=1)
    df.to_csv('taiwan.csv')   
    loss_weights = get_loss_weights('LSTM', 'taiwan', budget, config)
    loss_weights = [x/sum(loss_weights) for x in loss_weights]
    imp_w, lt_w, st_w = loss_weights
    data_module = vaca_dataset(1, t0, device, 'taiwan')
    loader = data_module.train_dataloader()
    data_module.batch_size = 1
    loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
    loss_fn = torch.nn.MSELoss()
    net = effortLSTM(data_module, loader, seq_len, device)
    optimizer = torch.optim.Adam(net.parameters(), lr=0.02)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)

    efforts_path = log_dir / (f"LSTM_effort_model" + dataset + "_budget_" + str(budget) + "_eps_" + str(eps) + "_b_" + str(bet) + ".pth")
    checkpoint_path = log_dir / (f"LSTM_effort_model" + dataset + "_budget_" + str(budget) + "_eps_" + str(eps) + "_b_" + str(bet) + "_checkpoint.pth")
    best_loss = float('inf')
    patience = 0
    last_epoch=0
   

    hist_loss = []
    for epoch in range(last_epoch, 100):
        epoch_start = time.time()
        net.train()
        efforts = net(tensor(loader.dataset.X).view(len(loader.dataset), 1, int(x_dim*2)+3).to(device))
        optimizer.zero_grad()
        y_hat, x_hat, data = intervene(data_module, loader, model_vaca, generator, clf, efforts.view(len(loader), seq_len-1, 2), seq_len, dataset ,device)
        improvement_loss = loss_fn(y_hat, torch.ones_like(y_hat))
        lt_fairness = compute_distance_loss(data)
        st_fairness = compute_st_loss(s, y_hat)
        t_loss = 10000*budget_penalty(efforts, budget*0.95) + imp_w*improvement_loss + lt_w*lt_fairness + st_w*st_fairness + 20 * torch.relu(compute_st_loss(s, y_hat[:, -1].unsqueeze(1)) - 0.05)
        
        t_loss.backward()
        torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=1.0)
        optimizer.step()

        net.eval()
        with torch.no_grad():
            val_loss = 0.0
            df = pd.DataFrame(t0)
            df = df.set_axis(['SEX', 'LIMIT_BAL_1', 'PAY_AMT1_1', 'PAY_AMT2_1', 'Y_1', 'LIMIT_BAL_2', 'PAY_AMT1_2', 'PAY_AMT2_2', 'Y_2'], axis=1)
            df.to_csv('taiwan.csv')   
            data_module = vaca_dataset(1, t0, device, 'taiwan')
            val_loader = data_module.val_dataloader()
            data_module.batch_size = 1
            val_s = tensor(val_loader.dataset.X[:,0]).view(len(val_loader.dataset), 1).to(device)
            val_loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
            val_efforts = net(tensor(val_loader.dataset.X).view(len(val_loader.dataset), 1, int(x_dim*2)+3).to(device))
            val_y, val_x, val_data = intervene(data_module, val_loader, model_vaca, generator, clf, val_efforts, seq_len, 'taiwan', device)
            val_loss = 10000*budget_penalty(val_efforts, budget*0.95) + imp_w*loss_fn(val_y, torch.ones_like(val_y)) + lt_w*compute_distance_loss(val_data) + st_w*compute_st_loss(val_s, val_y) + 20 * torch.relu(compute_st_loss(val_s, val_y[:, -1].unsqueeze(1)) - 0.05)

        scheduler.step(val_loss)

        df = pd.DataFrame(t0)
        df = df.set_axis(['SEX', 'LIMIT_BAL_1', 'PAY_AMT1_1', 'PAY_AMT2_1', 'Y_1', 'LIMIT_BAL_2', 'PAY_AMT1_2', 'PAY_AMT2_2', 'Y_2'], axis=1)
        df.to_csv('taiwan.csv')   
        epoch_end = time.time()
        hist_loss.append(t_loss.item())
        if (val_loss < best_loss and epoch > 0):
            best_loss = val_loss
            patience = 0
            torch.save(net.state_dict(), efforts_path)
            save_checkpoint(epoch, net, optimizer, best_loss, checkpoint_path)
            e = epoch
            
        
        else:
            patience += 1
            if (patience > 10 and 10000*budget_penalty(efforts, budget*.95) < 0.01):
                print('Early stopping')
                print(f' Last save: {e}')
                break

        if epoch % 10 == 0:
            print(f'Epoch: {epoch: 4.0f} | Total Loss: {t_loss: 8.4f} | Budget Loss: {10000*budget_penalty(efforts, budget*.95) : 3.2f} | Long Fairness Loss: {lt_fairness: 3.4f} | Improvement Loss: {improvement_loss: 3.4f} | Time: {epoch_end - epoch_start: 3.1f} | Patience: {patience}')

    print('Last saved epoch: ', e)


In [None]:
import time
t0 = original_pairs
df = pd.DataFrame(t0)
df = df.set_axis(['SEX', 'LIMIT_BAL_1', 'PAY_AMT1_1', 'PAY_AMT2_1', 'Y_1', 'LIMIT_BAL_2', 'PAY_AMT1_2', 'PAY_AMT2_2', 'Y_2'], axis=1)
df.to_csv('taiwan.csv')   
data_module = vaca_dataset(1, t0, device, 'taiwan')
loader = data_module.train_dataloader()
data_module.batch_size = 1
loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
loader = data_module.train_dataloader()
loss_fn = torch.nn.MSELoss()
net = effortNN(data_module, loader, seq_len, device)
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)

s = tensor(loader.dataset.X[:,0]).to(device)
y0 = tensor(loader.dataset.X[:,x_dim+1]).to(device)

b_list = [1, 2, 3, 4]
config = load_config()
for b in b_list:
    budget = b*seq_len
    df = pd.DataFrame(t0)
    df = df.set_axis(['SEX', 'LIMIT_BAL_1', 'PAY_AMT1_1', 'PAY_AMT2_1', 'Y_1', 'LIMIT_BAL_2', 'PAY_AMT1_2', 'PAY_AMT2_2', 'Y_2'], axis=1)
    df.to_csv('taiwan.csv')   
    loss_weights = get_loss_weights('LSTM', 'taiwan', budget, config)
    loss_weights = [x/sum(loss_weights) for x in loss_weights]
    imp_w, lt_w, st_w = loss_weights
    data_module = vaca_dataset(1, t0, device, 'taiwan')
    loader = data_module.train_dataloader()
    data_module.batch_size = 1
    loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
    loss_fn = torch.nn.MSELoss()
    net = effortNN(data_module, loader, seq_len, device)
    optimizer = torch.optim.Adam(net.parameters(), lr=0.02)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)

    efforts_path = log_dir / (f"NN_effort_model" + dataset + "_budget_" + str(budget) + "_eps_" + str(eps) + "_b_" + str(bet) + ".pth")
    checkpoint_path = log_dir / (f"NN_effort_model" + dataset + "_budget_" + str(budget) + "_eps_" + str(eps) + "_b_" + str(bet) + "_checkpoint.pth")
    best_loss = float('inf')
    patience = 0
    last_epoch=0
   

    hist_loss = []
    for epoch in range(last_epoch, 100):
        epoch_start = time.time()
        net.train()
        efforts = net(tensor(loader.dataset.X).view(len(loader.dataset), 1, int(x_dim*2)+3).to(device))
        optimizer.zero_grad()
        y_hat, x_hat, data = intervene(data_module, loader, model_vaca, generator, clf, efforts.view(len(loader), seq_len-1, 2), seq_len, dataset ,device)
        improvement_loss = loss_fn(y_hat, torch.ones_like(y_hat))
        lt_fairness = compute_distance_loss(data)
        st_fairness = compute_st_loss(s, y_hat)
        t_loss = 10000*budget_penalty(efforts, budget*0.95) + imp_w*improvement_loss + lt_w*lt_fairness + st_w*st_fairness + 20 * torch.relu(compute_st_loss(s, y_hat[:, -1].unsqueeze(1)) - 0.05)
        
        t_loss.backward()
        torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=1.0)
        optimizer.step()

        net.eval()
        with torch.no_grad():
            val_loss = 0.0
            df = pd.DataFrame(t0)
            df = df.set_axis(['SEX', 'LIMIT_BAL_1', 'PAY_AMT1_1', 'PAY_AMT2_1', 'Y_1', 'LIMIT_BAL_2', 'PAY_AMT1_2', 'PAY_AMT2_2', 'Y_2'], axis=1)
            df.to_csv('taiwan.csv')   
            data_module = vaca_dataset(1, t0, device, 'taiwan')
            val_loader = data_module.val_dataloader()
            data_module.batch_size = 1
            val_s = tensor(val_loader.dataset.X[:,0]).view(len(val_loader.dataset), 1).to(device)
            val_loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
            val_efforts = net(tensor(val_loader.dataset.X).view(len(val_loader.dataset), 1, int(x_dim*2)+3).to(device))
            val_y, val_x, val_data = intervene(data_module, val_loader, model_vaca, generator, clf, val_efforts, seq_len, 'taiwan', device)
            val_loss = 10000*budget_penalty(val_efforts, budget*0.95) + imp_w*loss_fn(val_y, torch.ones_like(val_y)) + lt_w*compute_distance_loss(val_data) + st_w*compute_st_loss(val_s, val_y) + 20 * torch.relu(compute_st_loss(val_s, val_y[:, -1].unsqueeze(1)) - 0.05)

        scheduler.step(val_loss)

        df = pd.DataFrame(t0)
        df = df.set_axis(['SEX', 'LIMIT_BAL_1', 'PAY_AMT1_1', 'PAY_AMT2_1', 'Y_1', 'LIMIT_BAL_2', 'PAY_AMT1_2', 'PAY_AMT2_2', 'Y_2'], axis=1)
        df.to_csv('taiwan.csv')   
        epoch_end = time.time()
        hist_loss.append(t_loss.item())
        if (val_loss < best_loss and epoch > 0):
            best_loss = val_loss
            patience = 0
            torch.save(net.state_dict(), efforts_path)
            save_checkpoint(epoch, net, optimizer, best_loss, checkpoint_path)
            e = epoch
            
        
        else:
            patience += 1
            if (patience > 10 and 10000*budget_penalty(efforts, budget*.95) < 0.01):
                print('Early stopping')
                print(f' Last save: {e}')
                break

        if epoch % 10 == 0:
            print(f'Epoch: {epoch: 4.0f} | Total Loss: {t_loss: 8.4f} | Budget Loss: {10000*budget_penalty(efforts, budget*.95) : 3.2f} | Long Fairness Loss: {lt_fairness: 3.4f} | Improvement Loss: {improvement_loss: 3.4f} | Time: {epoch_end - epoch_start: 3.1f} | Patience: {patience}')

    print('Last saved epoch: ', e)


## Testing

In [None]:
seeds = list(range(2031, 2061))
og_fairness = np.empty((len(seeds), seq_len))
n = 200
model_fairness_lstm = np.empty((len(seeds), seq_len))
short_term_og = np.empty((len(seeds), seq_len))
short_term_fairness = np.empty((len(seeds), seq_len))   
dataset = 'taiwan'
loader = data_module.total_dataloader()
b_list = [1, 2, 3, 4]
for _b in b_list:
    budget = _b*seq_len
    net = effortLSTM(data_module, loader, seq_len, device)
    efforts_lstm = torch.empty(len(seeds), n, seq_len-1, len(loader.dataset.nodes_to_intervene))
    efforts_path = log_dir / (f"LSTM_effort_model" + dataset + "_budget_" + str(budget) + "_eps_" + str(eps) + "_b_" + str(bet) + ".pth")
    net.load_state_dict(torch.load(efforts_path, map_location=device))
    for seed in seeds:
        t1 = time.time()
        torch.manual_seed(seed)
        np.random.seed(seed)
        s0, x0, y0, unfair_clf = sample_taiwan(file_path, int(n/2), seq_len, seed=seed)
        gan_noise = torch.randn(len(s0), seq_len-1, x_dim).to(device)
        x0 = torch.tensor(x0, dtype=torch.float32).to(device)
        s0 = torch.tensor(s0, dtype=torch.float32).view(len(s0),1).to(device)
        x_gan, _, y_gan = generator(x0, gan_noise, s0, unfair_clf.to(device))
        p_to_csv(s0.reshape(n,1), x_gan[:, 0:2, :], y_gan[:, 0:2, :], 'taiwan')
        #
        for i in range(seq_len):
            gan_output = torch.cat([s0, x_gan[:, i, :], y_gan[:, i, :]], dim=1)
            short_loss = compute_st_loss(s0, y_gan[:, i, :])
            distance = compute_distance_loss(gan_output.to(device))
            short_term_og[seed-2031, i] = short_loss.item()
            og_fairness[seed-2031, i] = distance.item()
        data_module = vaca_dataset(1, y0, device, 'taiwan')
        loader = data_module.total_dataloader()
        loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
        efforts = net(tensor(loader.dataset.X).view(len(loader.dataset), 1, int(x_dim*2)+3).to(device))
        efforts_lstm[seed-2031] = efforts.view(len(loader), seq_len-1, 2).detach().cpu()
        y_hat, x_hat, data = intervene(data_module, loader, model_vaca, generator, clf, efforts, seq_len, dataset, device)
        for i in range(seq_len-1):
            data_t = torch.cat([data[:, 0].view(len(s0),1).to(device), x_hat[:,i,:].to(device), y_hat[:,i].view(len(loader), 1).to(device)], dim=1)
            short_loss = compute_st_loss(data[:, 0].view(n,1).to(device), y_hat[:, i].view(n,1).to(device))
            distance = compute_distance_loss(data_t)
            model_fairness_lstm[seed-2031, i+1] = distance.item()
            short_term_fairness[seed-2031, i+1] = short_loss.item()

        t2 = time.time()
        print(f'Seed: {seed} | Time: {t2 - t1: 4.2f}')
    np.save('efforts_budget_lstm_' + dataset + '_' + str(budget) + '.npy', efforts_lstm)
    np.save('lt_fairness_budget_' + dataset + '_'+str(budget)+'.npy', model_fairness_lstm)
    np.save('st_fairness_budget_' + dataset + '_'+str(budget)+'.npy', short_term_fairness)
    

In [None]:
seeds = list(range(2031, 2061))
og_fairness = np.empty((len(seeds), seq_len))
n = 200
model_fairness_lstm = np.empty((len(seeds), seq_len))
short_term_og = np.empty((len(seeds), seq_len))
short_term_fairness = np.empty((len(seeds), seq_len))   
dataset = 'taiwan'
loader = data_module.total_dataloader()
net = effortNN(data_module, loader, seq_len, device)
for b in [1, 2, 3, 4]:
    budget = b*seq_len
    efforts_lstm = torch.empty(len(seeds), n, seq_len-1, len(loader.dataset.nodes_to_intervene))
    efforts_path = log_dir / (f"NN_effort_model" + dataset + "_budget_" + str(budget) + "_eps_" + str(eps) + "_b_" + str(bet) + ".pth")
    net.load_state_dict(torch.load(efforts_path, map_location=device))
    for seed in seeds:
        t1 = time.time()
        torch.manual_seed(seed)
        np.random.seed(seed)
        s0, x0, y0, unfair_clf = sample_taiwan(file_path, int(n/2), seq_len, seed=seed)
        gan_noise = torch.randn(len(s0), seq_len-1, x_dim).to(device)
        x0 = torch.tensor(x0, dtype=torch.float32).to(device)
        s0 = torch.tensor(s0, dtype=torch.float32).view(len(s0),1).to(device)
        x_gan, _, y_gan = generator(x0, gan_noise, s0, unfair_clf.to(device))
        p_to_csv(s0.reshape(n,1), x_gan[:, 0:2, :], y_gan[:, 0:2, :], 'taiwan')
        #
        for i in range(seq_len):
            gan_output = torch.cat([s0, x_gan[:, i, :], y_gan[:, i, :]], dim=1)
            short_loss = compute_st_loss(s0, y_gan[:, i, :])
            distance = compute_distance_loss(gan_output.to(device))
            short_term_og[seed-2031, i] = short_loss.item()
            og_fairness[seed-2031, i] = distance.item()
        data_module = vaca_dataset(1, y0, device, 'taiwan')
        loader = data_module.total_dataloader()
        loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
        efforts = net(tensor(loader.dataset.X).view(len(loader.dataset), 1, int(x_dim*2)+3).to(device))
        efforts_lstm[seed-2031] = efforts.view(len(loader), seq_len-1, 2).detach().cpu()
        y_hat, x_hat, data = intervene(data_module, loader, model_vaca, generator, clf, efforts, seq_len, dataset, device)
        for i in range(seq_len-1):
            data_t = torch.cat([data[:, 0].view(len(s0),1).to(device), x_hat[:,i,:].to(device), y_hat[:,i].view(len(loader), 1).to(device)], dim=1)
            short_loss = compute_st_loss(data[:, 0].view(n,1).to(device), y_hat[:, i].view(n,1).to(device))
            distance = compute_distance_loss(data_t)
            model_fairness_lstm[seed-2031, i+1] = distance.item()
            short_term_fairness[seed-2031, i+1] = short_loss.item()

        t2 = time.time()
        print(f'Seed: {seed} | Time: {t2 - t1: 4.2f}')

    np.save('efforts_budget_nn_' + dataset + '_' + str(budget) + '.npy', efforts_lstm)
    np.save('nn_lt_fairness_budget_' + dataset + '_'+str(budget)+'.npy', model_fairness_lstm)
    np.save('nn_st_fairness_budget_' + dataset + '_'+str(budget)+'.npy', short_term_fairness)  
    

## Baseline WIP

In [None]:
from baseline import *

s0, x0, y0, unfair_clf = sample_taiwan(file_path, int(n/2), seq_len, seed=100)
x, y, unfair_clf = sequential_data(s0, x0, y0, seq_len, l=epsilon, noise_factor=0.1, seed=100, ground_truth=False)
model = logReg(x_dim + 1).to(device)
optim = torch.optim.Adam(model.parameters(), lr=0.01)

train_data = torch.cat([tensor(s0.reshape(n, 1)), tensor(x0), tensor(y0).reshape(n,1)], dim=1)[0:int(n*(1-test_size)), :]
test_data = torch.cat([tensor(s0.reshape(n, 1)), tensor(x0), tensor(y0).reshape(n,1)], dim=1)[int(n*(1-test_size)):, :]

train_dataset = SimpleDataset(train_data[:, :-1], train_data[:, -1])
test_dataset = SimpleDataset(test_data[:, :-1], test_data[:, -1])
train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)


In [None]:
ei_model = trainer_fb_fair(model, train_loader, [1,2], optim, device, 50, 0.50)

In [None]:
model = logReg(x_dim + 1).to(device)
optim = torch.optim.Adam(model.parameters(), lr=0.01)
be_model = trainer_bounded_effort(model, train_loader, [1,2], optim, device, 50, 0.5)

In [None]:
model = logReg(x_dim + 1).to(device)
optim = torch.optim.Adam(model.parameters(), lr=0.01)
iler_model = trainer_iler(model, train_loader, [1,2], optim, device, 50, 0.9)

In [None]:
model = logReg(x_dim + 1).to(device)
optim = torch.optim.Adam(model.parameters(), lr=0.01)
er_model = trainer_er(model, train_loader, [1,2], optim, device, 50, 0.9)

In [None]:
model = logReg(x_dim + 1).to(device)
optim = torch.optim.Adam(model.parameters(), lr=0.01)
dp_model = trainer_dp_fair(model, train_loader, optim, device, 50, 0.50)

In [None]:
seeds = list(range(2031, 2061))
n = 200
EI_fairness = np.empty((len(seeds), seq_len))
efforts_baseline = []
data = np.empty((n, seq_len, x_dim + 2))
loss_fn = torch.nn.MSELoss()
EI_accuracy = np.empty((len(seeds), seq_len))
EI_short_term = np.empty((len(seeds), seq_len))
b_list = [1, 2, 3, 4]
for _b in b_list:
    budget = _b* seq_len

    for seed in seeds:
        s0, x0, y0, unfair_clf = sample_taiwan(file_path, int(n/2), seq_len, seed=seed)
        test_data = torch.cat([tensor(s0.reshape(n, 1)), tensor(x0), tensor(y0).reshape(n,1)], dim=1)
        test_dataset = SimpleDataset(test_data[:, :-1].detach().cpu().numpy(), test_data[:, -1].detach().cpu().numpy())
        test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
        torch.manual_seed(seed)
        z_mb = torch.randn(len(s0), 1, x_dim).to(device)
        x0 = torch.tensor(x0, dtype=torch.float32).to(device)
        s0 = torch.tensor(s0, dtype=torch.float32).view(len(s0),1).to(device)
        distance = compute_distance_loss(test_data.to(device))
        EI_fairness[seed-2031, 0] = distance.item()    
        _y = ei_model.predict(s0, x0)
        gt_y = clf.predict(s0, x0)
        gt_y = tensor(gt_y).to(device)
        EI_accuracy[seed-2031, 0] = loss_fn(_y, gt_y.to(device)).item()
        EI_short_term[seed-2031, 0] = compute_st_loss(s0, gt_y).item()
            
        x_gan, _, y_gan = generator(x0, z_mb, s0, ei_model.to(device))
        p_to_csv(s0.reshape(n,1), x_gan[:, 0:2, :], y_gan[:, 0:2, :], 'taiwan')
        datamodule = vaca_dataset(1, y0, device, 'taiwan')
        loader = datamodule.total_dataloader()
        loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
        test_dataset = SimpleDataset(torch.cat([s0, x_gan[:, -1, :]], dim=1).detach().cpu().numpy(), y_gan[:, -1, :].detach().cpu().numpy())
        test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
        new_budget = tensor([_b])
        for i in range(1, seq_len):
            _, e, _data = test_fb_fair(ei_model, test_loader, [1,2], device, delta_effort=new_budget.item())
            effort_indices = ((test_loader.dataset.labels < 1).squeeze()).nonzero()
            efforts = torch.zeros([len(test_loader.dataset), int((x_dim)*2+3)]).to(device)
            efforts[effort_indices[0], x_dim+2:x_dim+4] = e[:, [1, 2]].to(device)
            x_gan_input = intervention_step(loader, model_vaca, datamodule.test_dataset.nodes_to_intervene, efforts, datamodule.test_dataset.nodes_list)
            x_gan_input = x_gan_input.to(device)
            _s = torch.tensor(test_loader.dataset.features[:, 0]).to(device)
            _y = ei_model.predict(_s.unsqueeze(1), x_gan_input.squeeze())
            gt_y = clf.predict(_s.unsqueeze(1), x_gan_input.squeeze())
            gt_y = tensor(gt_y).to(device)
            
            distance = compute_distance_loss(torch.cat([_s.unsqueeze(1), x_gan_input.squeeze(), gt_y], dim=1).to(device))
            EI_fairness[seed-2031, i] = distance.item()
            new_budget = _b+(_b-torch.mean(torch.sum(torch.abs(e), dim=1)))
            data[:, i, 1:-1] = x_gan_input.squeeze().detach().cpu().numpy()
            data[:, i, 0] = _s.detach().cpu().numpy()
            data[:, i, -1] = gt_y.squeeze().detach().cpu().numpy()
            acc = loss_fn(_y, gt_y.to(device))
            EI_accuracy[seed-2031, i] = acc.item()
            EI_short_term[seed-2031, i] = compute_st_loss(_s, gt_y).item()
            z_mb = torch.randn(len(_s), 1, x_dim).to(device)
            x_gan, _, y_gan = generator(x_gan_input.squeeze().to(device), z_mb, _s.unsqueeze(1), ei_model.to(device))
            p_to_csv(_s.reshape(n,1), x_gan[:, 0:2, :], y_gan[:, 0:2, :], 'taiwan')
            datamodule = vaca_dataset(1, y0, device, 'taiwan')
            loader = datamodule.total_dataloader()
            loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
            test_dataset = SimpleDataset(torch.cat([s0, x_gan[:, -1, :]], dim=1).detach().cpu().numpy(), y_gan[:, -1, :].detach().cpu().numpy())
            test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
            efforts_baseline.append(e[:, [1, 2]])
    np.save('ei_lt_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy', EI_fairness)
    np.save('ei_st_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy', EI_short_term)


In [None]:
seeds = list(range(2031, 2061))
n = 200
ER_fairness = np.empty((len(seeds), seq_len))
efforts_baseline = []
data = np.empty((n, seq_len, x_dim + 2))
loss_fn = torch.nn.MSELoss()
ER_accuracy = np.empty((len(seeds), seq_len))
ER_short_term = np.empty((len(seeds), seq_len))
b_list = [1, 2, 3, 4]
for _b in b_list:
    
    budget = _b* seq_len

    for seed in seeds:
        s0, x0, y0, unfair_clf = sample_taiwan(file_path, int(n/2), seq_len, seed=seed)
        test_data = torch.cat([tensor(s0.reshape(n, 1)), tensor(x0), tensor(y0).reshape(n,1)], dim=1)
        test_dataset = SimpleDataset(test_data[:, :-1].detach().cpu().numpy(), test_data[:, -1].detach().cpu().numpy())
        test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
        torch.manual_seed(seed)
        z_mb = torch.randn(len(s0), 1, x_dim).to(device)
        x0 = torch.tensor(x0, dtype=torch.float32).to(device)
        s0 = torch.tensor(s0, dtype=torch.float32).view(len(s0),1).to(device)
        distance = compute_distance_loss(test_data.to(device))
        ER_fairness[seed-2031, 0] = distance.item()    
        _y = ei_model.predict(s0, x0)
        gt_y = clf.predict(s0, x0)
        gt_y = tensor(gt_y).to(device)
        ER_accuracy[seed-2031, 0] = loss_fn(_y, gt_y.to(device)).item()
        ER_short_term[seed-2031, 0] = compute_st_loss(s0, gt_y).item()
            
        x_gan, _, y_gan = generator(x0, z_mb, s0, ei_model.to(device))
        p_to_csv(s0.reshape(n,1), x_gan[:, 0:2, :], y_gan[:, 0:2, :], 'taiwan')
        datamodule = vaca_dataset(1, y0, device, 'taiwan')
        loader = datamodule.total_dataloader()
        loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
        test_dataset = SimpleDataset(torch.cat([s0, x_gan[:, -1, :]], dim=1).detach().cpu().numpy(), y_gan[:, -1, :].detach().cpu().numpy())
        test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
        new_budget = tensor([_b])
        for i in range(1, seq_len):
            _, e, _data, _ = test_er(er_model, test_loader, [1,2], device, delta_effort=new_budget.item())
            effort_indices = ((test_loader.dataset.labels < 1).squeeze()).nonzero()
            efforts = torch.zeros([len(test_loader.dataset), int((x_dim)*2+3)]).to(device)
            efforts[effort_indices[0], x_dim+2:x_dim+4] = e[:, [1, 2]].to(device)
            x_gan_input = intervention_step(loader, model_vaca, datamodule.test_dataset.nodes_to_intervene, efforts, datamodule.test_dataset.nodes_list)
            x_gan_input = x_gan_input.to(device)
            _s = torch.tensor(test_loader.dataset.features[:, 0]).to(device)
            _y = er_model.predict(_s.unsqueeze(1), x_gan_input.squeeze())
            gt_y = clf.predict(_s.unsqueeze(1), x_gan_input.squeeze())
            gt_y = tensor(gt_y).to(device)
            
            distance = compute_distance_loss(torch.cat([_s.unsqueeze(1), x_gan_input.squeeze(), gt_y], dim=1).to(device))
            ER_fairness[seed-2031, i] = distance.item()
            new_budget = _b+(_b-torch.mean(torch.sum(torch.abs(e), dim=1)))
            data[:, i, 1:-1] = x_gan_input.squeeze().detach().cpu().numpy()
            data[:, i, 0] = _s.detach().cpu().numpy()
            data[:, i, -1] = gt_y.squeeze().detach().cpu().numpy()
            acc = loss_fn(_y, gt_y.to(device))
            ER_accuracy[seed-2031, i] = acc.item()
            ER_short_term[seed-2031, i] = compute_st_loss(_s, gt_y).item()
            z_mb = torch.randn(len(_s), 1, x_dim).to(device)
            x_gan, _, y_gan = generator(x_gan_input.squeeze().to(device), z_mb, _s.unsqueeze(1), er_model.to(device))
            p_to_csv(_s.reshape(n,1), x_gan[:, 0:2, :], y_gan[:, 0:2, :], 'taiwan')
            datamodule = vaca_dataset(1, y0, device, 'taiwan')
            loader = datamodule.total_dataloader()
            loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
            test_dataset = SimpleDataset(torch.cat([s0, x_gan[:, -1, :]], dim=1).detach().cpu().numpy(), y_gan[:, -1, :].detach().cpu().numpy())
            test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)

    np.save('er_lt_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy', ER_fairness)
    np.save('er_st_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy', ER_short_term)


In [None]:
seeds = list(range(2031, 2061))
n = 200
ILER_fairness = np.empty((len(seeds), seq_len))
efforts_baseline = []
data = np.empty((n, seq_len, x_dim + 2))
loss_fn = torch.nn.MSELoss()
ILER_accuracy = np.empty((len(seeds), seq_len))
ILER_short_term = np.empty((len(seeds), seq_len))
b_list = [1, 2, 3, 4]
for b in b_list:
    budget = _b* seq_len

    for seed in seeds:
        s0, x0, y0, unfair_clf = sample_taiwan(file_path, int(n/2), seq_len, seed=seed)
        test_data = torch.cat([tensor(s0.reshape(n, 1)), tensor(x0), tensor(y0).reshape(n,1)], dim=1)
        test_dataset = SimpleDataset(test_data[:, :-1].detach().cpu().numpy(), test_data[:, -1].detach().cpu().numpy())
        test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
        torch.manual_seed(seed)
        z_mb = torch.randn(len(s0), 1, x_dim).to(device)
        x0 = torch.tensor(x0, dtype=torch.float32).to(device)
        s0 = torch.tensor(s0, dtype=torch.float32).view(len(s0),1).to(device)
        distance = compute_distance_loss(test_data.to(device))
        ILER_fairness[seed-2031, 0] = distance.item()    
        _y = iler_model.predict(s0, x0)
        gt_y = clf.predict(s0, x0)
        gt_y = tensor(gt_y).to(device)
        ILER_accuracy[seed-2031, 0] = loss_fn(_y, gt_y.to(device)).item()
        ILER_short_term[seed-2031, 0] = compute_st_loss(s0, gt_y).item()
            
        x_gan, _, y_gan = generator(x0, z_mb, s0, iler_model.to(device))
        p_to_csv(s0.reshape(n,1), x_gan[:, 0:2, :], y_gan[:, 0:2, :], 'taiwan')
        datamodule = vaca_dataset(1, y0, device, 'taiwan')
        loader = datamodule.total_dataloader()
        loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
        test_dataset = SimpleDataset(torch.cat([s0, x_gan[:, -1, :]], dim=1).detach().cpu().numpy(), y_gan[:, -1, :].detach().cpu().numpy())
        test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
        new_budget = tensor([_b])
        for i in range(1, seq_len):
            _, e, _data, _ = test_iler(iler_model, test_loader, [1,2], device, delta_effort=new_budget.item())
            effort_indices = ((test_loader.dataset.labels < 1).squeeze()).nonzero()
            efforts = torch.zeros([len(test_loader.dataset), int((x_dim)*2+3)]).to(device)
            efforts[effort_indices[0], x_dim+2:x_dim+4] = e[:, [1, 2]].to(device)
            x_gan_input = intervention_step(loader, model_vaca, datamodule.test_dataset.nodes_to_intervene, efforts, datamodule.test_dataset.nodes_list)
            x_gan_input = x_gan_input.to(device)
            _s = torch.tensor(test_loader.dataset.features[:, 0]).to(device)
            _y = iler_model.predict(_s.unsqueeze(1), x_gan_input.squeeze())
            gt_y = clf.predict(_s.unsqueeze(1), x_gan_input.squeeze())
            gt_y = tensor(gt_y).to(device)
            
            distance = compute_distance_loss(torch.cat([_s.unsqueeze(1), x_gan_input.squeeze(), gt_y], dim=1).to(device))
            ILER_fairness[seed-2031, i] = distance.item()
            new_budget = _b+(_b-torch.mean(torch.sum(torch.abs(e), dim=1)))
            data[:, i, 1:-1] = x_gan_input.squeeze().detach().cpu().numpy()
            data[:, i, 0] = _s.detach().cpu().numpy()
            data[:, i, -1] = gt_y.squeeze().detach().cpu().numpy()
            acc = loss_fn(_y, gt_y.to(device))
            ILER_accuracy[seed-2031, i] = acc.item()
            ILER_short_term[seed-2031, i] = compute_st_loss(_s, gt_y).item()
            z_mb = torch.randn(len(_s), 1, x_dim).to(device)
            x_gan, _, y_gan = generator(x_gan_input.squeeze().to(device), z_mb, _s.unsqueeze(1), iler_model.to(device))
            p_to_csv(_s.reshape(n,1), x_gan[:, 0:2, :], y_gan[:, 0:2, :], 'taiwan')
            datamodule = vaca_dataset(1, y0, device, 'taiwan')
            loader = datamodule.total_dataloader()
            loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
            test_dataset = SimpleDataset(torch.cat([s0, x_gan[:, -1, :]], dim=1).detach().cpu().numpy(), y_gan[:, -1, :].detach().cpu().numpy())
            test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
    np.save('iler_lt_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy', ILER_fairness)
    np.save('iler_st_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy', ILER_short_term)

In [None]:
seeds = list(range(2031, 2061))
n = 200
BE_fairness = np.empty((len(seeds), seq_len))
be_efforts_baseline = []
data = np.empty((n, seq_len, x_dim + 2))
loss_fn = torch.nn.MSELoss()
BE_accuracy = np.empty((len(seeds), seq_len))
BE_short_term = np.empty((len(seeds), seq_len))
b_list = [1, 2, 3, 4]
for _b in b_list:
    
    budget = _b* seq_len

    for seed in seeds:
        s0, x0, y0, unfair_clf = sample_taiwan(file_path, int(n/2), seq_len, seed=seed)
        test_data = torch.cat([tensor(s0.reshape(n, 1)), tensor(x0), tensor(y0).reshape(n,1)], dim=1)
        test_dataset = SimpleDataset(test_data[:, :-1].detach().cpu().numpy(), test_data[:, -1].detach().cpu().numpy())
        test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
        torch.manual_seed(seed)
        z_mb = torch.randn(len(s0), 1, x_dim).to(device)
        x0 = torch.tensor(x0, dtype=torch.float32).to(device)
        s0 = torch.tensor(s0, dtype=torch.float32).view(len(s0),1).to(device)
        distance = compute_distance_loss(test_data.to(device))
        BE_fairness[seed-2031, 0] = distance.item()    
        _y = ei_model.predict(s0, x0)
        gt_y = clf.predict(s0, x0)
        gt_y = tensor(gt_y).to(device)
        BE_accuracy[seed-2031, 0] = loss_fn(_y, gt_y.to(device)).item()
        BE_short_term[seed-2031, 0] = compute_st_loss(s0, gt_y).item()
            
        x_gan, _, y_gan = generator(x0, z_mb, s0, ei_model.to(device))
        p_to_csv(s0.reshape(n,1), x_gan[:, 0:2, :], y_gan[:, 0:2, :], 'taiwan')
        datamodule = vaca_dataset(1, y0, device, 'taiwan')
        loader = datamodule.total_dataloader()
        loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
        test_dataset = SimpleDataset(torch.cat([s0, x_gan[:, -1, :]], dim=1).detach().cpu().numpy(), y_gan[:, -1, :].detach().cpu().numpy())
        test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
        new_budget = tensor([b])
        for i in range(1, seq_len):
            _, e, _data = test_bounded_effort(be_model, test_loader, [1,2], device=device, delta_effort=new_budget.item())
            effort_indices = ((test_loader.dataset.labels < 1).squeeze()).nonzero()
            efforts = torch.zeros([len(test_loader.dataset), int((x_dim)*2+3)]).to(device)
            efforts[effort_indices[0], x_dim+2:x_dim+4] = e[:, [1, 2]].to(device)
            x_gan_input = intervention_step(loader, model_vaca, datamodule.test_dataset.nodes_to_intervene, efforts, datamodule.test_dataset.nodes_list)
            x_gan_input = x_gan_input.to(device)
            _s = torch.tensor(test_loader.dataset.features[:, 0]).to(device)
            _y = be_model.predict(_s.unsqueeze(1), x_gan_input.squeeze())
            gt_y = clf.predict(_s.unsqueeze(1), x_gan_input.squeeze())
            gt_y = tensor(gt_y).to(device)
            
            distance = compute_distance_loss(torch.cat([_s.unsqueeze(1), x_gan_input.squeeze(), _y], dim=1).to(device))
            BE_fairness[seed-2031, i] = distance.item()
            new_budget = b+(b+torch.mean(torch.sum(torch.abs(e), dim=1)))
            data[:, i, 1:-1] = x_gan_input.squeeze().detach().cpu().numpy()
            data[:, i, 0] = _s.detach().cpu().numpy()
            data[:, i, -1] = gt_y.squeeze().detach().cpu().numpy()
            acc = loss_fn(_y, gt_y.to(device))
            BE_accuracy[seed-2031, i] = acc.item()
            BE_short_term[seed-2031, i] = compute_st_loss(_s, _y).item()
            z_mb = torch.randn(len(_s), 1, x_dim).to(device)
            x_gan, _, y_gan = generator(x_gan_input.squeeze().to(device), z_mb, _s.unsqueeze(1), be_model.to(device))
            p_to_csv(s0.reshape(n,1), x_gan[:, 0:2, :], y_gan[:, 0:2, :], 'taiwan')
            datamodule = vaca_dataset(1, y0, device, 'taiwan')
            loader = datamodule.total_dataloader()
            loader.dataset.set_intervention({'LIMIT_BAL_2': 0, 'PAY_AMT1_2': 0}, is_noise=True)
            test_dataset = SimpleDataset(torch.cat([s0, x_gan[:, -1, :]], dim=1).detach().cpu().numpy(), y_gan[:, -1, :].detach().cpu().numpy())
            test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
            be_efforts_baseline.append(e[:, [1, 2]])
    np.save('be_lt_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy', BE_fairness)
    np.save('be_st_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy', BE_short_term)

In [None]:
seeds = list(range(2031, 2061))
n = 200
DP_fairness = np.empty((len(seeds), seq_len))
data = np.empty((n, seq_len, x_dim + 2))
DP_short_term = np.empty((len(seeds), seq_len))

for seed in seeds:
    s0, x0, y0, unfair_clf = sample_taiwan(file_path, int(n/2), seq_len, seed=seed)
    test_data = torch.cat([tensor(s0.reshape(n, 1)), tensor(x0), tensor(y0).reshape(n,1)], dim=1)
    test_dataset = SimpleDataset(test_data[:, :-1].detach().cpu().numpy(), test_data[:, -1].detach().cpu().numpy())
    test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
    torch.manual_seed(seed)
    new_budget = tensor([1])
    for i in range(seq_len):
        _, _data = test_dp_fair(dp_model, test_loader, device)
        _s, _x, _y = torch.split(_data, [1, x_dim, 1], dim=1)
        gt_y = clf(_s.to(device), _x.to(device))
        DP_short_term[seed-2031, i] = compute_st_loss(_s, gt_y).item()
        z_mb = torch.randn(len(_s), 1, x_dim).to(device)
        gan_output = generator(_x.to(device), z_mb, _s, dp_model.to(device))
        gan_x = gan_output[0][:, -1, :]
        gan_y = gan_output[1][:, -1, :]
        test_dataset = SimpleDataset(torch.cat([_s, gan_x], dim=1).detach().cpu().numpy(), gan_y.detach().cpu().numpy())
        test_loader = DataLoader(test_dataset, batch_size=n, shuffle=False)
        distance = compute_distance_loss(torch.cat([_s, gan_x, gan_y], dim=1).to(device))
        DP_fairness[seed-2031, i] = distance.item()
        data[:, i, :] = torch.cat([_s, gan_x, gan_y], dim=1).detach().cpu().numpy()

In [None]:
b_list = [1, 2, 3, 4]
og_fairness = np.load('og_fairness.npy')
og_fairness_tl = np.mean(og_fairness, axis=0)
timeline = range(seq_len)
dataset = 'taiwan'


for _b in b_list:
    budget = _b * seq_len
    model_fairness_lstm_clf = np.load('lt_fairness_clf_budget_' + str(budget) + '.npy')
    model_fairness_lstm = np.load('lt_fairness_budget_' + str(budget) + '.npy')
    model_fairness_tl_lstm = np.mean(model_fairness_lstm, axis=0)
    model_fairness_tl_lstm[0] = og_fairness_tl[0]
    EI_fairness = np.load('ei_lt_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy')
    BE_fairness = np.load('be_lt_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy')
    ER_fairness = np.load('er_lt_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy')
    ILER_fairness = np.load('iler_lt_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy')
    EI_fairness_tl = np.mean(EI_fairness, axis=0)
    BE_fairness_tl = np.mean(BE_fairness, axis=0)
    DP_fairness_tl = np.mean(DP_fairness, axis=0)
    ER_fairness_tl = np.mean(ER_fairness, axis=0)
    ILER_fairness_tl = np.mean(ILER_fairness, axis=0)
    EI_fairness_tl[0] = og_fairness_tl[0]
    DP_fairness_tl[0] = og_fairness_tl[0]
    ER_fairness_tl[0] = og_fairness_tl[0]
    ILER_fairness_tl[0] = og_fairness_tl[0]
    BE_fairness_tl[0] = og_fairness_tl[0]
    fig, ax = plt.subplots()

    ax.plot(timeline, og_fairness_tl, color='blue', label='Base', marker="*")
    ax.plot(timeline, model_fairness_tl_lstm, color='orange', label='SCARF', marker="o")
    ax.plot(timeline, EI_fairness_tl, color='red', label='EI', marker="s")
    ax.plot(timeline, DP_fairness_tl, color='purple', label='DP', marker="v")
    ax.plot(timeline, BE_fairness_tl, color='green', label='BE', marker="D")
    ax.plot(timeline, ER_fairness_tl, color='pink', label='ER', marker="P")
    ax.plot(timeline, ILER_fairness_tl, color='gray', label='ILER', marker="H")

    ax.tick_params(axis='both', labelsize=16)
    ax.legend(fontsize=14, loc='upper left')
    plt.savefig("Taiwan Results - 30 seeds averaged budget = " + str(budget) + ", epsilon = "+eps+", beta = "+bet+".png")
    plt.show()
    

In [None]:
og_short_term = np.load('st_fairness_og.npy')
og_short_term_tl = np.mean(og_short_term, axis=0)
b_list = [1, 2, 3, 4]
for _b in b_list:
    budget = _b * seq_len
    short_term_fairness = np.load('st_fairness_budget_' + str(budget) + '.npy')
    short_term_lstm_tl = np.mean(short_term_fairness, axis=0)
    short_term_lstm_tl[0] = og_short_term_tl[0]
    ILER_short_term = np.load('iler_st_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy')
    ER_short_term = np.load('er_st_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy')
    EI_short_term = np.load('ei_st_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy')

    BE_short_term = np.load('be_st_fairness_budget_' + dataset + '_budget_' + str(budget) + '.npy')
    EI_short_tl = np.mean(EI_short_term, axis=0)
    DP_short_tl = np.mean(DP_short_term, axis=0)
    BE_short_term_tl = np.mean(BE_short_term, axis=0)
    ER_short_term_tl = np.mean(ER_short_term, axis=0)
    ILER_short_term_tl = np.mean(ILER_short_term, axis=0)
    EI_short_tl[0] = og_short_term_tl[0]
    DP_short_tl[0] = og_short_term_tl[0]
    BE_short_term_tl[0] = og_short_term_tl[0]
    ER_short_term_tl[0] = og_short_term_tl[0]
    ILER_short_term_tl[0] = og_short_term_tl[0]

    fig, ax = plt.subplots()

    ax.plot(timeline, og_short_term_tl, color='blue', label='Base', marker = "*")
    ax.plot(timeline, short_term_lstm_tl, color='orange', label='SCARF', marker = "o")
    ax.plot(timeline, EI_short_tl, color='red', label='EI', marker = "s")
    ax.plot(timeline, DP_short_tl, color='purple', label='DP', marker = "v")
    ax.plot(timeline, BE_short_term_tl, color='green', label='BE', marker = "D")
    # plt.plot(timeline, short_term_clf_tl, color='orange', label='Post Interventions Short Term Fairness LSTM CLF', marker = "x")
    ax.plot(timeline, ER_short_term_tl, color='pink', label='ER', marker = "P")
    ax.plot(timeline, ILER_short_term_tl, color='gray', label='ILER', marker = "H")

    ax.tick_params(axis='both', labelsize=16)
    ax.legend(fontsize=14, loc = 'upper left')
    plt.savefig("Taiwan Short Term Results - 30 seeds averaged budget = " + str(budget) + ", epsilon = "+eps+", beta = "+bet+".png")
    plt.show()

In [None]:
b_list = [1, 2, 3, 4]
color_list = ['orange', 'green', 'blue', 'purple']
og_short_term = np.load('st_fairness_og_toy.npy')
og_fairness = np.load('og_fairness_toy.npy')
fig, ax = plt.subplots()

for idx, _b in enumerate(b_list):
    lstm_fairness = np.load('lt_fairness_budget_' + str(_b*seq_len) + '.npy')
    lstm_fairness_tl = np.mean(lstm_fairness, axis=0)
    og_fairness_tl = np.mean(og_fairness, axis=0)
    og_short_term_tl = np.mean(og_short_term, axis=0)
    lstm_fairness_tl[0] = og_fairness_tl[0]
    timeline = range(seq_len)
    
    ax.plot(timeline, lstm_fairness_tl, color=color_list[idx], label=f'LT - Budget {_b*seq_len}', marker = "s")
    
ax.tick_params(axis='both', labelsize=16)
ax.legend(fontsize=14, loc = 'upper left')
plt.savefig(f'Taiwan Budget Sensitivity Long Term.png')
plt.show()


In [None]:
b_list = [1, 2, 3, 4]
color_list = ['orange', 'green', 'blue', 'purple']
og_short_term = np.load('st_fairness_og_toy.npy')
og_fairness = np.load('og_fairness_toy.npy')
fig, ax = plt.subplots()

for idx, _b in enumerate(b_list):
    lstm_short_term = np.load('st_fairness_budget_' + str(_b*seq_len) + '.npy')
    lstm_short_term_tl = np.mean(lstm_short_term, axis=0)
    lstm_short_term_tl[0] = og_short_term_tl[0]
    timeline = range(seq_len)
    
    ax.plot(timeline, lstm_short_term_tl, color=color_list[idx], label=f'ST - Budget {_b*seq_len}', marker = "s")

ax.tick_params(axis='both', labelsize=16)
ax.legend(fontsize=14, loc = 'upper left')
plt.savefig(f'Taiwan Budget Sensitivity Short Term.png')
plt.show()


In [None]:
b_list = [1, 2, 3, 4]
og_fairness = np.load('og_fairness.npy')
og_short_term = np.load('st_fairness_og.npy')
og_fairness_tl = np.mean(og_fairness, axis=0)
og_short_term_tl = np.mean(og_short_term, axis=0)
timeline = range(seq_len)
for _b in b_list:
    budget = _b * seq_len
    lstm_fairness = np.load('lt_fairness_budget_' + str(budget) + '.npy')
    lstm_fairness_tl = np.mean(lstm_fairness, axis=0)
    lstm_st_fairness = np.load('st_fairness_budget_' + str(budget) + '.npy')
    lstm_st_fairness_tl = np.mean(lstm_st_fairness, axis=0)
    nn_fairness = np.load('nn_lt_fairness_budget_' + dataset + '_'+str(budget)+'.npy')
    nn_fairness_tl = np.mean(nn_fairness, axis=0)
    nn_st_fairness = np.load('nn_st_fairness_budget_' + dataset + '_'+str(budget)+'.npy')
    nn_st_fairness_tl = np.mean(nn_st_fairness, axis=0)
    lstm_fairness_tl[0] = og_fairness_tl[0]
    lstm_st_fairness_tl[0] = og_short_term_tl[0]
    nn_fairness_tl[0] = og_fairness_tl[0]
    nn_st_fairness_tl[0] = og_short_term_tl[0]
    fig, ax = plt.subplots()
    ax.plot(timeline, nn_fairness_tl, color='green', label='MLP', marker = "s")
    ax.plot(timeline, nn_st_fairness_tl, color='green', linestyle='dashed', label='MLP - Short Term', marker = "s")
    ax.plot(timeline, lstm_fairness_tl, color='orange', label='SCARF', marker = "o")
    ax.plot(timeline, lstm_st_fairness_tl, color='orange', linestyle='dashed', label='SCARF - Short Term', marker = "o")
    ax.tick_params(axis='both', labelsize=16)
    ax.legend(fontsize=14, loc = 'upper left')
    plt.savefig(f'Taiwan Long Term vs Short Term Fairness Ablation, Budget = {budget}.png')
    plt.show()
