In [2]:
import numpy as np
import pandas as pd
from aif360.sklearn.datasets import fetch_lawschool_gpa

In [3]:
X_train, y_train = fetch_lawschool_gpa("train", numeric_only=True, dropcols="gender")
X_test, y_test = fetch_lawschool_gpa("test", numeric_only=True, dropcols="gender")
X_train

Unnamed: 0_level_0,Unnamed: 1_level_0,race,lsat,ugpa
race,gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,1,0.0,38.0,3.3
1.0,0,1.0,34.0,4.0
1.0,0,1.0,34.0,3.9
1.0,0,1.0,45.0,3.3
1.0,1,1.0,39.0,2.5
1.0,...,...,...,...
1.0,1,1.0,31.0,2.7
1.0,1,1.0,37.0,2.4
1.0,0,1.0,29.0,3.0
1.0,1,1.0,39.0,3.2


In [4]:
T_train = X_train.drop(["lsat", "ugpa"], axis=1)

In [5]:
X_train = X_train.drop(["race"], axis=1)

In [6]:
D_train = pd.concat([X_train, y_train, T_train], axis=1)

In [7]:
D_train

Unnamed: 0_level_0,Unnamed: 1_level_0,lsat,ugpa,zfygpa,race
race,gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,1,38.0,3.3,-0.57,0.0
1.0,0,34.0,4.0,1.01,1.0
1.0,0,34.0,3.9,-1.28,1.0
1.0,0,45.0,3.3,1.57,1.0
1.0,1,39.0,2.5,-0.62,1.0
1.0,...,...,...,...,...
1.0,1,31.0,2.7,-1.03,1.0
1.0,1,37.0,2.4,-0.12,1.0
1.0,0,29.0,3.0,-0.28,1.0
1.0,1,39.0,3.2,-0.02,1.0


In [8]:
T_test = X_test.drop(["lsat", "ugpa"], axis=1)

In [9]:
X_test = X_test.drop(["race"], axis=1)

In [10]:
D_test = pd.concat([X_test, y_test, T_test], axis=1)

In [11]:
def compute_discrimination_statistic(df, model, feature_cols, target_col='Y', group_col='T'):

    group_A = df[df[group_col] == 0]
    group_B = df[df[group_col] == 1]

    # Predictions
    y_A_pred = model.predict(group_A[feature_cols])
    y_B_pred = model.predict(group_B[feature_cols])

    # True values
    y_A_true = group_A[target_col]
    y_B_true = group_B[target_col]

    # Mean prediction errors
    mean_A = np.mean(y_A_pred - y_A_true)
    mean_B = np.mean(y_B_pred - y_B_true)

    return mean_A - mean_B  # d(θ)

In [12]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Step 1: Prepare your DataFrame
D_train['T'] = D_train['race']  # or whatever the group attribute is
D_train['Y'] = D_train['zfygpa']           # set 'Y' column as target

D_test['T'] = D_test['race']  # or whatever the group attribute is
D_test['Y'] = D_test['zfygpa']           # set 'Y' column as target


# Step 2: Fit the model
feature_cols = ['lsat', 'ugpa']
model = LinearRegression()
model.fit(D_train[feature_cols], D_train['Y'])

# Step 3: Compute discrimination statistic
d_theta = compute_discrimination_statistic(D_test, model, feature_cols)
print("Discrimination statistic d(θ):", d_theta)

y_pred = model.predict(D_test[feature_cols])
mse = mean_squared_error(D_test['Y'], y_pred)
print("Mean Squared Error (MSE):", mse)


Discrimination statistic d(θ): 0.5701362005060027
Mean Squared Error (MSE): 0.855195802077886


In [13]:
D_train_formatted = np.array([
    [[row['lsat'], row['ugpa']], row['zfygpa'], row['race']]
    for _, row in D_train.iterrows()
], dtype=object)

In [14]:
D_test_formatted = np.array([
    [[row['lsat'], row['ugpa']], row['zfygpa'], row['race']]
    for _, row in D_test.iterrows()
], dtype=object)

In [None]:
max_attempts = 40  # Prevent infinite loops
attempt = 0
theta_opt = "No Solution Found"


while isinstance(theta_opt, str) and attempt < max_attempts:
    print(f"🔁 Attempt {attempt + 1}")
    import importlib
    import QNDLR_MSE
    importlib.reload(QNDLR_MSE)

    from QNDLR_MSE import QNDLR

    theta_opt = QNDLR(D_train_formatted, 0.05, 0.1, 1.5, 0)
    print(theta_opt)
    attempt += 1

if isinstance(theta_opt, str):
    print("❌ QNDLR failed after 40 attempts.")
else:
    print("✅ Solution found!")
    print(theta_opt)


In [16]:
theta_opt_qndlr = [0.1206594, 0.32250269]

In [None]:
def compute_discrimination_statistic_QNDLR(D_formatted, theta_opt):
    # Extract X, Y, T from D_formatted
    X = np.array([row[0] for row in D_formatted])
    Y = np.array([row[1] for row in D_formatted])
    T = np.array([row[2] for row in D_formatted])

    theta_opt = np.array(theta_opt).flatten()

    group_A_mask = (T == 0)
    group_B_mask = (T == 1)

    # Compute predictions properly depending on X shape
    if X.ndim == 1:
        # Each X is scalar (1D array of scalars)
        y_pred = X * theta_opt if theta_opt.size == 1 else X * theta_opt[0]
    else:
        # X is 2D (n_samples x n_features)
        y_pred = X.dot(theta_opt)

    # Group predictions and true values
    y_A_pred = y_pred[group_A_mask]
    y_B_pred = y_pred[group_B_mask]

    y_A_true = Y[group_A_mask]
    y_B_true = Y[group_B_mask]

    mean_A = np.mean(y_A_pred - y_A_true)
    mean_B = np.mean(y_B_pred - y_B_true)

    d_theta = mean_A - mean_B
    return d_theta

In [18]:
compute_discrimination_statistic_QNDLR(D_test_formatted, theta_opt_qndlr)

np.float64(-0.029020382288726054)

In [22]:
y_pred_qndlr = X_test.dot(theta_opt_qndlr)
mse = mean_squared_error(D_test['Y'], y_pred_qndlr)
print("Mean Squared Error (MSE):", mse)

Mean Squared Error (MSE): 30.315084767798357


In [None]:
max_attempts = 40  # Prevent infinite loops
attempt = 0
theta_opt_qndlr2 = "No Solution Found"


while isinstance(theta_opt, str) and attempt < max_attempts:
    print(f"🔁 Attempt {attempt + 1}")
    import importlib
    import QNDLR_MSE
    importlib.reload(QNDLR_MSE)

    from QNDLR_MSE import QNDLR

    theta_opt = QNDLR(D_train_formatted, 0.05, 0.1, 1.5, 0)
    print(theta_opt_qndlr2)
    attempt += 1

if isinstance(theta_opt_qndlr2, str):
    print("❌ QNDLR failed after 40 attempts.")
else:
    print("✅ Solution found!")
    print(theta_opt_qndlr2)

In [20]:
theta_opt_qndlr2= [0.12249029, 0.05404037]

In [21]:
compute_discrimination_statistic_QNDLR(D_test_formatted, theta_opt_qndlr2)

np.float64(0.05917037759891386)

In [23]:
y_pred_qndlr2 = X_test.dot(theta_opt_qndlr2)
mse = mean_squared_error(D_test['Y'], y_pred_qndlr2)
print("Mean Squared Error (MSE):", mse)

Mean Squared Error (MSE): 22.30173903657535
