#### Examples for machine learning algorithms by dsldPy

The goal is for users to train models with a simple, intuitive interface and also understand effects on fairness-utility tradeoffs based on hyperparamater selection. Examples are shown on training/testing sets with cross validation approaches.

1) regression examples using dsldPyFairML and dsldPyQeFairML
2) classification examples using dsldPyFairML and dsldPyQeFairML
3) k-fold cross validation to choose best hyperparameters for fairness utility tradeoff

In [1]:
# load necessary libraries
import pandas as pd
import numpy as np
import sys, os

# r-conversions
import rpy2.robjects as ro
from rpy2.robjects.packages import importr

# test accuracy
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, accuracy_score

# load dsld package
sys.path.append(os.path.abspath(".."))  

R = ro.r
R('if ("dsld" %in% loadedNamespaces()) detach("package:dsld", unload=TRUE)')
R('if (!requireNamespace("devtools", quietly=TRUE)) install.packages("devtools")')
R('devtools::load_all("~/Desktop/dsld", quiet=TRUE)')

parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(os.path.join(parent_dir, "dsld"))
from Utils import preprocess_data, read_data

from dsldPyFairML import (
    dsldPyFrrm, dsldPyFgrrm, dsldPyNclm, dsldPyZlm, dsldPyZlrm, dsldPyFairML_Summary, dsldPyFairML_Predict
)

from dsldPyQeFairML import (
    dsldPyQeFairKNN, dsldPyQeFairRF, dsldPyQeFairRidgeLin, dsldPyQeFairRidgeLog, dsldPyQeFairML_Predict
)

dsld = importr("dsld")


Error importing in API mode: ImportError("dlopen(/Users/adityamittal/miniconda3/lib/python3.12/site-packages/_rinterface_cffi_api.abi3.so, 0x0002): Library not loaded: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.dylib\n  Referenced from: <1F2D8792-55A5-3398-8569-DDFF21A19C12> /Users/adityamittal/miniconda3/lib/python3.12/site-packages/_rinterface_cffi_api.abi3.so\n  Reason: tried: '/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.dylib' (no such file), '/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.dylib' (no such file)")
Trying to import in ABI mode.
R callback write-console: 




*********************



  Navigating dsld:

      Type vignette("Quick_Start",package="dsld") for a quick overview!

      Type vignette("Function_List",package="dsld") for a categorized functi

In [2]:
### regression example --- frrm(), nclm(), zlm(), qeFairKNN(), qeFairRF(), qeFairRidgeLin()

### read and preprocess data

### data preprocessing

### most dsldPy functions require a R data.frame object as input (NOT pandas dataframe)
### the preprocessing is done in the Utils.py file by the function preprocess_data
### user needs to manually provide the categorical and numerical features (list)
### the function preprocess_data returns a R data.frame object -> this is the required input for the dsldPy functions

# test and train split
df = read_data('~/Desktop/dsld/data/svcensus.RData')
test_df, train_df = train_test_split(df, test_size=0.3, random_state=42)
test_y = test_df['wageinc']
test_df = test_df.drop(columns=['wageinc'])

# preprocess data
cat_features_train = ['educ', 'occ', 'gender']
num_features_train = ['age', 'wageinc', 'wkswrkd']
svcensus_train = preprocess_data(train_df, cat_features_train, num_features_train)

cat_features_test = ['educ', 'occ', 'gender']
num_features_test = ['age', 'wkswrkd']
svcensus_test = preprocess_data(test_df, cat_features_test, num_features_test)


In [3]:
### using dsldPyFairML() function

### model training --- frrm() 
### unfairness = 0.05 // can also try different values for unfairness
a = dsldPyFrrm(svcensus_train, 'wageinc', 'gender', 0.05, definition = "sp-komiyama", lamda = 0, save = False)

# print train accuracy and correlations
print(f"train predictions: {a['train_predictions']}")
print(f"train accuracy: {a['train_accuracy']}")
print(f"train correlations: {a['train_correlations']}")

train predictions: [72300.94288981456, 60485.94790632362, 54039.50165443187, 73667.1176759253, 59045.680012413075, 31708.098759764976, 71607.35801304475, 46107.96443680182, 62855.75397272239, 72061.30303198629, 64704.79478547178, 96190.21934081278, 17908.581579332145, 79154.58635278385, 56485.67399866811, 56219.88321887697, 54585.8383104097, 89106.64482562144, 61176.04967409033, 65012.89387650562, 87849.13280903021, 59823.803115928386, 79839.41676978338, 82321.35540882153, 10073.279433766415, 74798.07592201183, 58106.01434662664, 74076.85807077396, 78351.5906253203, 53910.47395972161, 62303.52448287018, 71828.59496957944, 90512.21342205809, 64165.1455331317, 49708.41396020308, 96238.27131041388, 74066.705896357, 54363.21607514651, 57488.93264228231, 65970.10077309531, 30737.770166469134, 68516.94429057246, 64853.559382182044, 48294.473561048144, 64747.33258815921, 73977.08973908244, 62667.360946864435, 66406.15733428243, 59318.10615645327, 66425.84819882415, 74225.17089229995, 41678.52

In [4]:
### predict() on test data
a_preds = dsldPyFairML_Predict(a, svcensus_test)

# print test predictions and correlations
print(f"test predictions: {a_preds['test_predictions']}")
print(f"test correlations: {a_preds['test_correlations']}")

# manuallycompute test accuracy (MAPE)
test_accuracy = mean_absolute_error(test_y, a_preds['test_predictions'])
print(f"test accuracy: {test_accuracy}")

### the same can be done for other models --- nclm(), zlm() with dsldPyFairML_Predict() method

test predictions: [72389.22355133602, 57325.39078246606, 83349.46125982198, 59201.27247109954, 60598.253638357, 54766.850876145516, 75601.38002550432, 88631.28085817864, 34559.19644262316, 44937.09810249205, 92081.64737611973, 89055.249638072, 55843.85920631269, 71468.47957307754, 60714.26353600682, 8429.221373354732, 74193.0429887416, 93424.3655819499, 58334.117748717894, 62402.468082719766, 72650.70368657628, 54215.07584594913, 17507.103200759964, 6845.855829280297, 71623.84837314399, -8374.744013274963, 32244.24643525775, 69106.91333493136, 69113.99482017534, 57036.66902293963, 57437.045433786414, 6924.257259297598, 78873.41766238856, 95637.46547801007, 58486.897489882, 2626.6364437853, 33231.59601384817, 66818.64956649664, 77892.90331943055, 66446.7917993974, 12107.667972410685, 53711.13950484477, 88931.53730374806, 76213.99161622612, 60792.43780779416, 64687.79609014483, 78158.76652436792, 66818.58075202646, 75626.42281719822, 8010.725165059117, 37898.13483793622, 72774.9707741039

In [5]:
### using dsldPyQeFairML() functions 

### model training --- dsldQeFairRF() 
### deweightPars = {'educ': 0.2, 'occ': 0.05} // try different values for proxies
deweightPars = {'educ': 0.2, 'occ': 0.05}

a = dsldPyQeFairRF(svcensus_train, 'wageinc', 'gender', deweightPars)

# print train accuracy and correlations
print(f"train predictions: {a['train_predictions']}")
print(f"train accuracy: {a['train_accuracy']}")
print(f"train correlations: {a['train_correlations']}")

train predictions: [66457.96914366643, 62320.071062368006, 61456.01378721786, 86625.98074974549, 63352.86941985882, 435.2821568627451, 74120.9270309505, 38197.429127546086, 60674.413238685935, 79631.16558429519, 56409.75464224643, 90975.24939542107, 19.398323529411766, 80260.8300573677, 62750.19883396432, 44097.84776670489, 68333.87779196342, 61875.41913835669, 66226.21649773742, 74013.53003738211, 91634.16768984302, 62863.88933572264, 74529.2138838121, 94065.78845540267, 87.67365873015873, 76880.79954024732, 54251.52299521148, 76336.0617714747, 60982.85974855022, 37693.92812330083, 92966.03920623634, 77836.90212823696, 97291.47344692239, 85047.17896977365, 19038.242804062524, 89438.17928081664, 82822.01730453219, 46989.04128016709, 68953.83534188791, 69397.89789994067, 18288.981724629717, 59063.78246032323, 101129.41183565227, 70420.85664853327, 68119.84200542745, 80508.64210759393, 69517.61583292739, 71887.24262153677, 56975.79382504796, 71175.70027344562, 76764.17901436886, 45661.09

In [6]:
### predict on test data
a_preds = dsldPyQeFairML_Predict(a, svcensus_test)

# print test predictions and correlations
print(f"test predictions: {a_preds['test_predictions']}")
print(f"test correlations: {a_preds['test_correlations']}")

# manually compute test accuracy (MAPE)
test_accuracy = mean_absolute_error(test_y, a_preds['test_predictions'])
print(f"test accuracy: {test_accuracy}")

### the same can be done for other models --- qeFairKNN(), qeFairRidgeLin() with dsldPyQeFairML_Predict() method

test predictions: [91183.56269113295, 65345.57002242364, 84774.44074799334, 77529.15049467768, 59670.88070280589, 48786.80962899294, 70397.50541989657, 101198.15230969964, 31841.538400880832, 45397.83132461879, 98778.9259436393, 89489.96811539787, 60752.920712256906, 64934.94329866313, 62079.57915658019, 23675.512429706392, 87506.50933128422, 68091.39134163858, 68944.95153290725, 64982.84136769395, 75777.92058797769, 70430.63249542018, 18319.220822422172, 1.2691296296296295, 74751.65951892917, 158.16643634279004, 30026.517688264616, 60922.39548528203, 60810.96970638186, 55697.32953621574, 59917.638710950734, 303.62784962406016, 58526.500942357896, 92168.93167185271, 61021.07222498008, 21362.9266316844, 32718.817260729047, 37407.97275635241, 68565.81401445855, 81283.83038974181, 808.6761488597014, 62788.30268759872, 95344.69179250234, 82952.62596841756, 66526.86661617903, 68395.44497685939, 77753.03268303003, 67675.80900800352, 85287.68733775777, 26086.519250468027, 31623.3469628848, 59

In [7]:
### classification examples --- fgrrm(), zlrm(), qeFairKNN(), qeFairRF(), qeFairRidgeLog()

### read and preprocess data

# test and train split
df = read_data('~/Desktop/dsld/data/compas1.RData')
test_df, train_df = train_test_split(df, test_size=0.3, random_state=42)
test_y = test_df['two_year_recid']
test_y = test_df['two_year_recid'].map({'Yes': 1, 'No': 0})            # convert to binary
test_df = test_df.drop(columns=['two_year_recid'])

# preprocess data
cat_features = ['sex', 'race', 'two_year_recid']
num_features = ["age", "juv_fel_count","decile_score","juv_misd_count","juv_other_count","priors_count","c_jail_in","c_jail_out","c_offense_date","screening_date","in_custody","out_custody"]
compas1_train = preprocess_data(train_df, cat_features_train, num_features_train)

cat_features = ['sex', 'race']
num_features = ["age", "juv_fel_count","decile_score","juv_misd_count","juv_other_count","priors_count","c_jail_in","c_jail_out","c_offense_date","screening_date","in_custody","out_custody"]
compas1_test = preprocess_data(test_df, cat_features_test, num_features_test)


In [8]:
### using dsldPyFairML() functions 

### model training --- fgrrm() 
### unfairness = 0.1 // try different values for unfairness
a = dsldPyFgrrm(compas1_train, 'two_year_recid', 'race', 0.1, definition = "sp-komiyama", family = "binomial", lamda = 0, save = False, yesYVal = "Yes")

# print train accuracy and correlations
print(f"train predictions: {a['train_predictions']}")             # returns prob = Yes
print(f"train accuracy (misclassification rate): {a['train_accuracy']}")
print(f"train correlations: {a['train_correlations']}")

train predictions: [0.6999988843595085, 0.7976134653951307, 0.8508285462523596, 0.4654643164347272, 0.4789612301910033, 0.25235891106523595, 0.3030127975816798, 0.2148297595633877, 0.9475174775547311, 0.40949938910478734, 0.45503935582797467, 0.08134039933351905, 0.22180403948605118, 0.750292134244151, 0.7996876838469726, 0.1993485369416968, 0.6257095658418004, 0.08412486609539203, 0.6849170671754015, 0.5073860633277777, 0.4169432691886657, 0.16487739793791134, 0.2699635038138084, 0.1098771274842079, 0.3223094747779274, 0.39017183631902036, 0.4734630488191124, 0.2529989214644707, 0.1626829048950782, 0.7932712294193632, 0.41423841413438706, 0.3158053844487543, 0.12170817071161427, 0.25205511052944407, 0.5872106821863736, 0.31330520386168254, 0.4155187700549714, 0.15074960993867606, 0.5138660135525559, 0.7762150103053489, 0.10257467995033202, 0.31132768458897975, 0.5334624196964795, 0.64770070926039, 0.49077201305648366, 0.5660051065876193, 0.6839213465270876, 0.7363365144115365, 0.07617

In [9]:
### predict() on test set
a_preds = dsldPyFairML_Predict(a, compas1_test)

# print test predictions and correlations
print(f"test predictions: {a_preds['test_predictions']}") # returns prob = Yes
print(f"test correlations: {a_preds['test_correlations']}")

# manually compute test accuracy (MAPE)
y_pred = [int(round(x)) for x in a_preds['test_predictions']]
test_accuracy = accuracy_score(test_y, y_pred)
misclass_rate = 1 - test_accuracy

# print train accuracy and correlations
print(f"test accuracy (misclassification rate): {misclass_rate}")

### the same can be done for other models --- zlrm() with dsldPyFairML_Predict() method

test predictions: [0.8816675673867042, 0.9271685936325658, 0.22173540355802504, 0.379970891225766, 0.2600193122583822, 0.7353566555367674, 0.5671591650769578, 0.6359607931545178, 0.27751302361478897, 0.7417992417323518, 0.39914806677327785, 0.7221148105464487, 0.3945160315602463, 0.24100569185035337, 0.5619804214713906, 0.5692574834566783, 0.34381200070113216, 0.8312701854283351, 0.29368945446559636, 0.7295570311438669, 0.974543942887882, 0.522347591418805, 0.6018998273644357, 0.1167618058432458, 0.356942641122594, 0.729283229035867, 0.7303367416173048, 0.1616571518102298, 0.24622168789717216, 0.444562314392117, 0.19066592902369872, 0.24904974333677363, 0.7247331502373162, 0.9419403629337713, 0.28454507048695754, 0.6961251869184233, 0.6221306999452251, 0.5486978972011496, 0.10490050747314368, 0.8010718307549787, 0.6832795162951995, 0.40419072355307256, 0.3816133397407489, 0.28173690482726493, 0.13126630387647958, 0.4201307695414147, 0.30239573704187317, 0.7095992161212672, 0.9846833484

In [10]:
### using dsldPyQeFairML() functions 

### model training --- dsldQeFairKNN() 
### deweightPars = {'decile_score': 0.2, 'priors_count': 0.5} // try different values for deweightPars
deweightPars = {'decile_score': 0.2, 'priors_count': 0.5}

a = dsldPyQeFairKNN(compas1_train, 'two_year_recid', 'race', deweightPars, k = 10, scaleX = True, yesYVal = "Yes")

# print train accuracy and correlations
# in the case of classification, the train_predictions returns both predClasses and prob = Yes
print(f"train predictions: {a['train_predictions']}")     
print(f"train accuracy: {a['train_accuracy']}")
print(f"train correlations: {a['train_correlations']}")

train predictions: [<rpy2.robjects.vectors.StrVector object at 0x17630b210> [16]
R classes: ('character',)
['No', 'Yes', 'Yes', 'Yes', ..., 'No', 'No', 'No', 'No'], <rpy2.robjects.vectors.FloatVector object at 0x17638da90> [14]
R classes: ('numeric',)
[0.400000, 1.000000, 1.000000, 0.600000, ..., 0.500000, 0.500000, 0.400000, 0.000000]]
train accuracy: 0.21798520204894706
train correlations: [('race==African-American', 0.14998156736483376), ('race==Caucasian', -0.09759215386570062), ('race==Other', -0.05914343456976784), ('race==Asian', -0.04729168300247519), ('race==Hispanic', -0.04209541612083901), ('race==Native American', 0.02215592574316403)]


In [12]:
### predict() on test set
a_preds = dsldPyQeFairML_Predict(a, compas1_test)

# print test predictions and correlations
print(f"test predictions: {a_preds['test_predictions']}")
print(f"test correlations: {a_preds['test_correlations']}")

# compute test accuracy
y_pred = [int(round(x)) for x in list(a_preds['test_predictions'][1])]
test_accuracy = accuracy_score(test_y, y_pred)
misclass_rate = 1 - test_accuracy

# print train accuracy and correlations
print(f"test accuracy (misclassification rate): {misclass_rate}")

### the same can be done for other models --- dsldQeFairRF(), dsldQeFairRidgeLog() with dsldPyQeFairML_Predict() method

test predictions: [<rpy2.robjects.vectors.StrVector object at 0x17638e750> [16]
R classes: ('character',)
['Yes', 'No', 'No', 'No', ..., 'Yes', 'Yes', 'No', 'No'], <rpy2.robjects.vectors.FloatVector object at 0x17368bd50> [14]
R classes: ('numeric',)
[1.000000, 0.500000, 0.500000, 0.200000, ..., 1.000000, 0.700000, 0.200000, 0.200000]]
test correlations: [('race==African-American', 0.1470458370617937), ('race==Caucasian', -0.09053050783972662), ('race==Hispanic', -0.06186680346724754), ('race==Other', -0.05209334589315818), ('race==Asian', -0.019529004080715857), ('race==Native American', -0.008734373971507456)]
test accuracy (misclassification rate): 0.27403611517813564


In [12]:
### k-fold cross validation to find best model based on fairness and accuracy