In [34]:
import numpy as np
from folktables import ACSDataSource, ACSMobility
from matplotlib import pyplot as plt
import torch
from scipy import stats
from scipy.sparse.linalg import lobpcg
from scipy.linalg import eigh
import pandas as pd
from inFairness.distances import MahalanobisDistances, SquaredEuclideanDistance
from inFairness.fairalgo import SenSeI
from inFairness.auditor import SenSeIAuditor
from tqdm.auto import tqdm
from utils import *

In [27]:
r = 3
p = 30
n = 500

In [28]:
data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')
acs_data = data_source.get_data(states=["TX"], download=True)
acs_data_adult = acs_data[acs_data["AGEP"] >= 18]


In [29]:
acs_data_cleaned = acs_data_adult.select_dtypes(include=["float64", "int64"])
acs_data_cleaned = acs_data_cleaned.loc[:, ~(acs_data_cleaned.isna().any())]
acs_data_cleaned = acs_data_cleaned.loc[:, (acs_data_cleaned.var(axis=0) > 0)]
acs_data_cleaned = acs_data_cleaned.sample(frac=1, axis=0)
print(np.shape(np.array(acs_data_cleaned)))


(206826, 219)


In [30]:
print("Generating synthetic data...")
X = clean_data(2 * n, p, acs_data_cleaned)
X_train = X[:n]
X_test = X[n:]

Generating synthetic data...


In [31]:
M, S, y, Astar, Kstar = generate_synthetic_data(n, r, p, X_train)

print(np.shape(X_train), np.shape(X_test))

(500, 30) (500, 30)


In [32]:
A0 = initialization(n, r, p, S, X_train, y)

[3.07213180e+04 5.61449033e-03 6.10494209e-03]
not reaching the requested tolerance 4.470348358154297e-07.
Use iteration 0 instead with accuracy 
0.0017873632962911183.

  eigenvalues, eigenvectors = lobpcg(A=XtHX, B=Sigma, X=np.random.normal(size=(p,r)))
[3.57580156e-03 4.89425776e-18 1.02395899e-03]
not reaching the requested tolerance 4.470348358154297e-07.
  eigenvalues, eigenvectors = lobpcg(A=XtHX, B=Sigma, X=np.random.normal(size=(p,r)))
  Ahat = U @ np.diag(np.sqrt(Lambda))


In [33]:
A0

array([[            nan, -1.48303545e-10,  3.61248764e-03],
       [            nan, -2.85387088e-10,  2.47951083e-03],
       [            nan,  2.63581656e-10,  1.05579390e-03],
       [            nan, -1.15843615e-10,  4.07588749e-03],
       [            nan, -1.52071261e-11,  2.26314746e-03],
       [            nan,  5.98847143e-11, -4.07034933e-03],
       [            nan,  1.37648174e-10, -9.10850880e-04],
       [            nan, -1.47389785e-10,  1.04465259e-03],
       [            nan, -1.45112056e-10, -1.64447236e-03],
       [            nan, -1.28227351e-10,  5.49811219e-04],
       [            nan, -2.77525372e-11, -7.30609711e-05],
       [            nan, -8.57878773e-11, -3.81936667e-03],
       [            nan,  5.19610940e-11,  6.90087804e-04],
       [            nan, -4.22453913e-11,  1.88136067e-03],
       [            nan,  1.14853923e-11, -1.52105242e-03],
       [            nan,  2.19395982e-11, -3.11549938e-03],
       [            nan, -7.13685194e-12

In [35]:
transition_matrices = [np.zeros((n - 1, n - 1)) for _ in range(n)]
for t in range(len(S)):
    def gap(l, i):
        if l < i:
            return l
        else:
            return l - 1
    i, j, k = S[t]
    transition_matrices[i][gap(j, i), gap(k, i)] = 0.99 if y[t] == 1 else 0.01
    transition_matrices[i][gap(k, i), gap(j, i)] = 0.99 if y[t] == -1 else 0.01

for i in range(n):
    d = np.max(np.sum(transition_matrices[i], axis=1))
    transition_matrices[i] = transition_matrices[i] / d

    self_loops = np.diag(1 - np.sum(transition_matrices[i], axis=1))

    transition_matrices[i] += self_loops

dists_from_i = []
for i in range(n):
    eigenvalues, eigenvectors = np.linalg.eigh(transition_matrices[i])
    leading_index = np.where(np.isclose(eigenvalues, 1))[0]
    leading_eigenvector = np.abs(eigenvectors[:, leading_index].real)
    dists_wo_i = np.log(leading_eigenvector)
    if not np.all(np.isfinite(dists_wo_i)):
        print(i)
    dists = np.zeros(n)
    np.put(dists, list(range(i)) + list(range(i+1, n)), dists_wo_i)
    dists_from_i.append(dists)
D = np.stack(dists_from_i)


In [36]:
J = np.identity(n) - (np.ones((n, 1)) @ np.ones((1, n))) / n
H = - J @ D @ J / 2
Xprime = J @ X_train

XtHX = Xprime.T @ H @ Xprime / (n**2)
Sigma = Xprime.T @ Xprime / n

In [37]:
eigenvalues, eigenvectors = eigh(a=XtHX, b=Sigma)

In [38]:
U = eigenvectors[:, np.argsort(eigenvalues)[-r:]]


In [39]:
U

array([[-0.07182495, -0.33177168,  0.18045973],
       [ 0.03667948,  0.04839002, -0.15150435],
       [ 0.01977101,  0.01559895, -0.03604452],
       [ 0.02108022, -0.0759725 ,  0.06825837],
       [ 1.46242186, -0.75165903, -1.14678456],
       [-0.18970503,  0.07577766, -0.34092317],
       [-0.11764776,  0.11308508, -0.49613449],
       [ 0.65319258, -0.50000915, -0.80838978],
       [-1.84068402,  1.04977482,  0.567658  ],
       [-0.5429946 ,  0.07045635, -0.2620751 ],
       [-0.55522734,  0.13345332, -0.57303416],
       [-0.18883854, -0.11849849, -0.36146077],
       [ 0.40535383,  0.39588348,  0.3328791 ],
       [ 0.18858526, -0.02809735,  0.19664328],
       [ 0.30995832,  0.33386107,  0.33151063],
       [ 0.07865835,  0.0598699 ,  0.09377199],
       [ 0.05396068,  0.00206116, -0.01872646],
       [ 0.05029955,  0.01772147, -0.1607985 ],
       [-0.27801994, -0.13740375, -0.25044412],
       [-0.03648266,  0.18355382,  0.07275123],
       [-0.02848243,  0.41063991, -0.044

In [40]:
Lambda = np.sort(eigenvalues)[-r:]

In [41]:
Lambda

array([0.00547309, 0.0098498 , 0.01286321])

In [42]:
Ahat = U @ np.diag(np.sqrt(Lambda))

In [43]:
Ahat

array([[-0.00531363, -0.03292707,  0.02046704],
       [ 0.00271356,  0.00480252, -0.01718303],
       [ 0.00146267,  0.00154814, -0.00408803],
       [ 0.00155952, -0.00753998,  0.0077416 ],
       [ 0.10819046, -0.07459928, -0.13006381],
       [-0.01403444,  0.00752064, -0.03866617],
       [-0.00870362,  0.01122326, -0.05626963],
       [ 0.04832341, -0.049624  , -0.0916844 ],
       [-0.13617443,  0.10418613,  0.06438155],
       [-0.04017092,  0.00699252, -0.02972353],
       [-0.04107591,  0.01324473, -0.06499129],
       [-0.01397034, -0.01176052, -0.04099546],
       [ 0.02998821,  0.03928992,  0.03775384],
       [ 0.0139516 , -0.00278855,  0.02230251],
       [ 0.02293082,  0.03313443,  0.03759864],
       [ 0.00581917,  0.00594186,  0.01063525],
       [ 0.00399203,  0.00020456, -0.00212388],
       [ 0.00372118,  0.00175879, -0.01823714],
       [-0.02056801, -0.0136368 , -0.02840439],
       [-0.002699  ,  0.01821701,  0.00825116],
       [-0.00210714,  0.04075444, -0.005