First Task: Income Poverty / Male Female 

In [None]:
ACSIncome_categories = {
    "COW": {
        1.0: (
            "Employee of a private for-profit company or"
            "business, or of an individual, for wages,"
            "salary, or commissions"
        ),
        2.0: (
            "Employee of a private not-for-profit, tax-exempt,"
            "or charitable organization"
        ),
        3.0: "Local government employee (city, county, etc.)",
        4.0: "State government employee",
        5.0: "Federal government employee",
        6.0: (
            "Self-employed in own not incorporated business,"
            "professional practice, or farm"
        ),
        7.0: (
            "Self-employed in own incorporated business,"
            "professional practice or farm"
        ),
        8.0: "Working without pay in family business or farm",
        9.0: "Unemployed and last worked 5 years ago or earlier or never worked",
    },
    "SCHL": {
        1.0: "No schooling completed",
        2.0: "Nursery school, preschool",
        3.0: "Kindergarten",
        4.0: "Grade 1",
        5.0: "Grade 2",
        6.0: "Grade 3",
        7.0: "Grade 4",
        8.0: "Grade 5",
        9.0: "Grade 6",
        10.0: "Grade 7",
        11.0: "Grade 8",
        12.0: "Grade 9",
        13.0: "Grade 10",
        14.0: "Grade 11",
        15.0: "12th grade - no diploma",
        16.0: "Regular high school diploma",
        17.0: "GED or alternative credential",
        18.0: "Some college, but less than 1 year",
        19.0: "1 or more years of college credit, no degree",
        20.0: "Associate's degree",
        21.0: "Bachelor's degree",
        22.0: "Master's degree",
        23.0: "Professional degree beyond a bachelor's degree",
        24.0: "Doctorate degree",
    },
    "MAR": {
        1.0: "Married",
        2.0: "Widowed",
        3.0: "Divorced",
        4.0: "Separated",
        5.0: "Never married or under 15 years old",
    },
    "SEX": {1.0: "Male", 2.0: "Female"},
    "RAC1P": {
        1.0: "White alone",
        2.0: "Black or African American alone",
        3.0: "American Indian alone",
        4.0: "Alaska Native alone",
        5.0: (
            "American Indian and Alaska Native tribes specified;"
            "or American Indian or Alaska Native,"
            "not specified and no other"
        ),
        6.0: "Asian alone",
        7.0: "Native Hawaiian and Other Pacific Islander alone",
        8.0: "Some Other Race alone",
        9.0: "Two or More Races",
    },
}


In [None]:
# """Data source and problem definitions for American Community Survey (ACS) Public Use Microdata Sample (PUMS)."""
import numpy as np
import pandas as pd

import folktables

ACSIncomePovertyRatio = folktables.BasicProblem(
    features=[
        'AGEP',
        'SCHL',
        'MAR',
        'SEX',
        'DIS',
        'ESP',
        'MIG',
        'CIT',
        'MIL',
        'ANC',
        'NATIVITY',
        'RELP',
        'DEAR',
        'DEYE',
        'DREM',
        'RAC1P',
        'GCL',
        'ESR',
        'OCCP',
        'WKHP',
    ],
    target='POVPIP',
    target_transform=lambda x: x < 250,
    group='RAC1P',
    preprocess=lambda x: x,
    postprocess=lambda x: np.nan_to_num(x, -1),
)



In [None]:
ACSIncome = folktables.BasicProblem(
    features=[
        'AGEP',
        'COW',
        'SCHL',
        'MAR',
        'OCCP',
        'POBP',
        'RELP',
        'WKHP',
        'SEX',
        'RAC1P',
    ],
    target='PINCP',
    target_transform=lambda x: x > 50000,    
    group='RAC1P',
    preprocess=folktables.adult_filter,
    postprocess=lambda x: np.nan_to_num(x, -1),
)

In [None]:
import numpy as np
import pandas as pd

import folktables
ACSIncomeNew = folktables.BasicProblem(
    features=[
        'COW',
        'SCHL',
        'MAR',
        'OCCP',
        'POBP',
        'RELP',
        'WKHP',
        'SEX',
        'RAC1P',
    ],
    target='PINCP',
    target_transform=lambda x: x > 25000,    
    group='SEX',
    preprocess=folktables.adult_filter,
    postprocess=lambda x: np.nan_to_num(x, -1),
)

**Group: 'SEX'for MF, prediction task ACSIncomeNew CA**

In [None]:
from folktables import ACSDataSource, ACSEmployment, ACSIncome, generate_categories
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler
from sklearn.inspection import permutation_importance
from sklearn.pipeline import make_pipeline


from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import confusion_matrix, accuracy_score

from sklearn.impute import KNNImputer

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

from sklearn.model_selection import train_test_split, KFold, ShuffleSplit

from scipy.stats import sem
from tqdm import tqdm


# import fairlearn
from fairlearn.metrics import equalized_odds_difference
from fairlearn.metrics import equalized_odds_ratio
from fairlearn.metrics import true_positive_rate, false_positive_rate



# The ACS data source contains data for all fifty states, each of which has a
# slightly different distribution of features and response. This increases the
# diversity of environments in which we can evaluate our methods. For instance, we
# can generate another `ACSEmployment` task using data from Texas and repeat the
# experiment
data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')
acs_ca = data_source.get_data(states=["CA"], download=True)

In [None]:
# ca label, ca_features, ca_group, be able to explain what that value is and what values from 1-8 for example in CA_group
# do histogram to understand each specific understanding of what they are
# really good understanding of data for data analysis
# which features are useful for group label, predictions
# nan value because denominator is 0 and group label might not be 1 and 2

ca_features, ca_label, ca_group = ACSIncome.df_to_numpy(acs_ca)

# X_train, X_test, y_train, y_test, group_train, group_test = train_test_split(
#     ca_features, ca_label, ca_group, test_size=0.2, random_state=0)

# model = make_pipeline(StandardScaler(), LogisticRegression())
# model.fit(X_train, y_train)

# yhat = model.predict(X_test)


                              
# # print(len(y_test), len(yhat))
# print(ca_features)
# print(ca_label)
# print(ca_group)
pd.DataFrame(ca_group)


    
#     #library imports
# ca_eq_odd_diff = equalized_odds_difference(y_test, yhat, sensitive_features=group_test)
# ca_eq_odd_ratio = equalized_odds_ratio(y_test, yhat, sensitive_features=group_test)


# # print("Number of Positive samples for the 'male' group:", np.sum((y_test == 1) & (group_test == 1)))

# # print("Number of Positive samples for the 'female' group:", np.sum((y_test == 1) & (group_test == 2)))


# # print("Number of Negative samples for the 'male' group:", np.sum((y_test == 0) & (group_test == 1)))

# # print("Number of Negative samples for the 'female' group:", np.sum((y_test == 0) & (group_test == 2)))

# m_TP = np.sum((y_test == 1) & (yhat == 1) & (group_test == 1))
# m_TN = np.sum((y_test == 0) & (yhat == 0) & (group_test == 1))
# m_FP = np.sum((y_test == 0) & (yhat == 1) & (group_test == 1))
# m_FN = np.sum((y_test == 1) & (yhat == 0) & (group_test == 1))

# f_TP = np.sum((y_test == 1) & (yhat == 1)  & (group_test == 2))
# f_TN = np.sum((y_test == 0) & (yhat == 0) & (group_test == 2))
# f_FP = np.sum((y_test == 0) & (yhat == 1) & (group_test == 2))
# f_FN = np.sum((y_test == 1) & (yhat == 0)  & (group_test == 2))

# # TPR = TP / (TP + FN)
# male_tnr = m_TN / (m_TN + m_FP)
# female_TNR = f_TN / (f_TN + f_FP)


# male_FPR = m_FP / (m_FP + m_TN)
# female_FPR = f_FP / (f_FP + f_TN)
# # FNR = FN / (FN + TP)

# # white_fpr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
# # male_fpr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
# # female_fpr = np.mean(yhat[(y_test == 1) & (group_test == 2)])

# male_tpr = np.mean(yhat[(y_test == 1) & (group_test == 1)])
# female_tpr = np.mean(yhat[(y_test == 1) & (group_test == 2)])

# male_tnr = 1 - male_fpr
# # white_tnr = np.mean(yhat[(y_test == 0) & (group_test == 0)])
# # white_tnr = 1- white_fpr
# female_tnr = np.mean(yhat[(y_test == 0) & (group_test == 1)])



# # fnr = 1- tpr ? (getting nan otherwise)

# # TODO: Figure out why empty slice for W partition but not B
# # white_fnr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
# male_fnr = 1 - male_tpr  
# female_fnr = np.mean(yhat[(y_test == 0) & (group_test == 2)])




    

# # Equality of opportunity violation: 0.0397
# ca_equality_violation = male_tpr - female_tpr



# true_parity = np.abs(true_positive_rate(y_test[(group_test == 1)], yhat[(group_test == 1)]) - true_positive_rate(y_test[(group_test == 2)], yhat[(group_test == 2)]))
# parity = (true_positive_rate(y_test[(group_test == 1)], yhat[(group_test == 1)]) + true_positive_rate(y_test[(group_test == 2)], yhat[(group_test == 2)])) / 2

# male_accuracy = accuracy_score(y_test[group_test == 1], yhat[group_test == 1])
# female_accuracy = accuracy_score(y_test[group_test == 2], yhat[group_test == 2])

# print("Number of samples for the 'male' group:", np.sum((group_test == 1)))


# print("Number of samples for the 'female' group:", np.sum((group_test == 2)))
# print("CA Data")
# print("Predicting Income By Gender (M/F)")

# data = {
#     'Metric': ['Equality Violation', 'Eq Odds Difference', 'Eq Odds Ratio',
#                'True Parity', 'Parity',
#                '(M-TPR)', '(F-TPR)',
#                '(M-TNR)', '(F-TNR)',
#                '(M-FPR)', '(F-FPR)',
#                '(M-FNR)', '(F-FNR)',
#                'Accuracy (Male)', 'Accuracy (Female)'],
#     'Value': [ca_equality_violation, ca_eq_odd_diff, ca_eq_odd_ratio,
#                true_parity, parity,
#                male_tpr, female_tpr, male_tnr, female_tnr, male_fpr, female_fpr, male_fnr, female_fnr,
#                male_accuracy, female_accuracy]
# }

# table_df = pd.DataFrame(data)

# # Display the table
# print(table_df)

In [None]:
ca_features.head()

In [None]:
ca_labels.head()

**Group: 'RAC1P'for BW, prediction task ACSIncome TX**

In [None]:
from folktables import ACSDataSource, ACSEmployment, ACSIncome, generate_categories
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler
from sklearn.inspection import permutation_importance
from sklearn.pipeline import make_pipeline

from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import confusion_matrix, accuracy_score

from sklearn.impute import KNNImputer

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

from sklearn.model_selection import train_test_split, KFold, ShuffleSplit

from scipy.stats import sem
from tqdm import tqdm


# import fairlearn
from fairlearn.metrics import equalized_odds_difference
from fairlearn.metrics import equalized_odds_ratio
from fairlearn.metrics import true_positive_rate, false_positive_rate



# The ACS data source contains data for all fifty states, each of which has a
# slightly different distribution of features and response. This increases the
# diversity of environments in which we can evaluate our methods. For instance, we
# can generate another `ACSEmployment` task using data from Texas and repeat the
# experiment
data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')


acs_tx = data_source.get_data(states=["TX"], download=True)
tx_features, tx_label, tx_group = ACSIncome.df_to_numpy(acs_tx)

X_train, X_test, y_train, y_test, group_train, group_test = train_test_split(
    tx_features, tx_label, tx_group, test_size=0.2, random_state=0)

model = make_pipeline(StandardScaler(), LogisticRegression())
model.fit(X_train, y_train)

yhat = model.predict(X_test)


                              
# print(len(y_test), len(yhat))
# print(tx_features)
# print(tx_label)
# print(tx_group)


    
    #library imports
tx_eq_odd_diff = equalized_odds_difference(y_test, yhat, sensitive_features=group_test)
tx_eq_odd_ratio = equalized_odds_ratio(y_test, yhat, sensitive_features=group_test)


# print("Number of samples for the 'white' group:", np.sum((y_test == 1) & (group_test == 1)))

print("Number of samples for the 'white' group:", np.sum((group_test == 1)))

# print("Number of samples for the 'black' group:", np.sum((y_test == 1) & (group_test == 2)))

print("Number of samples for the 'black' group:", np.sum((group_test == 2)))

# white_fpr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
white_fpr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
black_fpr = np.mean(yhat[(y_test == 1) & (group_test == 2)])

white_tpr = np.mean(yhat[(y_test == 1) & (group_test == 1)])
black_tpr = np.mean(yhat[(y_test == 1) & (group_test == 2)])

white_tnr = 1 - white_fpr
# white_tnr = np.mean(yhat[(y_test == 0) & (group_test == 0)])
# white_tnr = 1- white_fpr
black_tnr = np.mean(yhat[(y_test == 0) & (group_test == 1)])



# fnr = 1- tpr ? (getting nan otherwise)

# TODO: Figure out why empty slice for W partition but not B
# white_fnr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
white_fnr = 1 - white_tpr  
black_fnr = np.mean(yhat[(y_test == 0) & (group_test == 2)])

white_accuracy = accuracy_score(y_test[group_test == 1], yhat[group_test == 1])
black_accuracy = accuracy_score(y_test[group_test == 2], yhat[group_test == 2])



    

# Equality of opportunity violation: 0.0397
tx_equality_violation = white_tpr - black_tpr



true_parity = np.abs(true_positive_rate(y_test[(group_test == 1)], yhat[(group_test == 1)]) - true_positive_rate(y_test[(group_test == 2)], yhat[(group_test == 2)]))
parity = (true_positive_rate(y_test[(group_test == 1)], yhat[(group_test == 1)]) + true_positive_rate(y_test[(group_test == 2)], yhat[(group_test == 2)])) / 2


print("Texas Data")
print("Predicting Income By Race (B/W)")
data = {
    'Metric': ['Equality Violation', 'Eq Odds Difference', 'Eq Odds Ratio',
               'True Parity', 'Parity',
               '(W-TPR)', '(B-TPR)',
               '(W-TNR)', '(B-TNR)',
               '(W-FPR)', '(B-FPR)',
               '(W-FNR)', '(B-FNR)',
               'Accuracy (White)', 'Accuracy (Black)'],
    'Value': [tx_equality_violation, tx_eq_odd_diff, tx_eq_odd_ratio,
               true_parity, parity,
               white_tpr, black_tpr, white_tnr, black_tnr, white_fpr, black_fpr, white_fnr, black_fnr,
               white_accuracy, black_accuracy]
}
table_df = pd.DataFrame(data)

# Display the table
print(table_df)


**Group: 'RAC1P'for BW, prediction task ACSIncomePovertyRatio TX**

In [None]:
from folktables import ACSDataSource, ACSEmployment, ACSIncome, generate_categories
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler
from sklearn.inspection import permutation_importance
from sklearn.pipeline import make_pipeline

from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import confusion_matrix, accuracy_score

from sklearn.impute import KNNImputer

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

from sklearn.model_selection import train_test_split, KFold, ShuffleSplit
from sklearn import metrics

from scipy.stats import sem
from tqdm import tqdm


# import fairlearn
from fairlearn.metrics import equalized_odds_difference
from fairlearn.metrics import equalized_odds_ratio
from fairlearn.metrics import true_positive_rate, false_positive_rate



# The ACS data source contains data for all fifty states, each of which has a
# slightly different distribution of features and response. This increases the
# diversity of environments in which we can evaluate our methods. For instance, we
# can generate another `ACSEmployment` task using data from Texas and repeat the
# experiment
data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')


acs_tx = data_source.get_data(states=["TX"], download=True)
tx_features, tx_label, tx_group = ACSIncomePovertyRatio.df_to_numpy(acs_tx)

X_train, X_test, y_train, y_test, group_train, group_test = train_test_split(
    tx_features, tx_label, tx_group, test_size=0.2, random_state=0)

model = make_pipeline(StandardScaler(), LogisticRegression())
model.fit(X_train, y_train)

yhat = model.predict(X_test)


                              
# print(len(y_test), len(yhat))
# print(tx_features)
# print(tx_label)
# print(tx_group)


    
    #library imports
tx_eq_odd_diff = equalized_odds_difference(y_test, yhat, sensitive_features=group_test)
tx_eq_odd_ratio = equalized_odds_ratio(y_test, yhat, sensitive_features=group_test)


# print("Number of samples for the 'white' group:", np.sum((y_test == 1) & (group_test == 1)))

print("Number of samples for the 'white' group:", np.sum((group_test == 1)))

# print("Number of samples for the 'black' group:", np.sum((y_test == 1) & (group_test == 2)))

print("Number of samples for the 'black' group:", np.sum((group_test == 2)))

# white_fpr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
# white_fpr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
# white_fpr, w_tpr, w_thresholds = metrics.roc_curve(y_test, , pos_label=2)

black_fpr = np.mean(yhat[(y_test == 1) & (group_test == 2)])

white_tpr = np.mean(yhat[(y_test == 1) & (group_test == 1)])
black_tpr = np.mean(yhat[(y_test == 1) & (group_test == 2)])

white_tnr = 1 - white_fpr
# white_tnr = np.mean(yhat[(y_test == 0) & (group_test == 0)])
# white_tnr = 1- white_fpr
black_tnr = np.mean(yhat[(y_test == 0) & (group_test == 1)])



# fnr = 1- tpr ? (getting nan otherwise)

# TODO: Figure out why empty slice for W partition but not B / and M not F??)
# white_fnr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
white_fnr = 1 - white_tpr  
black_fnr = np.mean(yhat[(y_test == 0) & (group_test == 2)])

white_accuracy = accuracy_score(y_test[group_test == 1], yhat[group_test == 1])
black_accuracy = accuracy_score(y_test[group_test == 2], yhat[group_test == 2])



    

# Equality of opportunity violation: 0.0397
tx_equality_violation = white_tpr - black_tpr



true_parity = np.abs(true_positive_rate(y_test[(group_test == 1)], yhat[(group_test == 1)]) - true_positive_rate(y_test[(group_test == 2)], yhat[(group_test == 2)]))
parity = (true_positive_rate(y_test[(group_test == 1)], yhat[(group_test == 1)]) + true_positive_rate(y_test[(group_test == 2)], yhat[(group_test == 2)])) / 2


print("Texas Data")
print("Predicting Income By Race (B/W)")
data = {
    'Metric': ['Equality Violation', 'Eq Odds Difference', 'Eq Odds Ratio',
               'True Parity', 'Parity',
               '(W-TPR)', '(B-TPR)',
               '(W-TNR)', '(B-TNR)',
               '(W-FPR)', '(B-FPR)',
               '(W-FNR)', '(B-FNR)',
               'Accuracy (White)', 'Accuracy (Black)'],
    'Value': [tx_equality_violation, tx_eq_odd_diff, tx_eq_odd_ratio,
               true_parity, parity,
               white_tpr, black_tpr, white_tnr, black_tnr, white_fpr, black_fpr, white_fnr, black_fnr,
               white_accuracy, black_accuracy]
}
table_df = pd.DataFrame(data)

# Display the table
print(table_df)


**Group: 'RAC1P'for BW, prediction task ACSIncomePovertyRatio CA**

In [None]:
from folktables import ACSDataSource, ACSEmployment, ACSIncome, generate_categories
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler
from sklearn.inspection import permutation_importance
from sklearn.pipeline import make_pipeline

from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import confusion_matrix, accuracy_score

from sklearn.impute import KNNImputer

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

from sklearn.model_selection import train_test_split, KFold, ShuffleSplit

from scipy.stats import sem
from tqdm import tqdm


# import fairlearn
from fairlearn.metrics import equalized_odds_difference
from fairlearn.metrics import equalized_odds_ratio
from fairlearn.metrics import true_positive_rate, false_positive_rate



# The ACS data source contains data for all fifty states, each of which has a
# slightly different distribution of features and response. This increases the
# diversity of environments in which we can evaluate our methods. For instance, we
# can generate another `ACSEmployment` task using data from Texas and repeat the
# experiment
data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')


acs_tx = data_source.get_data(states=["TX"], download=True)
tx_features, tx_label, tx_group = ACSIncomePovertyRatio.df_to_numpy(acs_tx)

X_train, X_test, y_train, y_test, group_train, group_test = train_test_split(
    tx_features, tx_label, tx_group, test_size=0.2, random_state=0)

model = make_pipeline(StandardScaler(), LogisticRegression())
model.fit(X_train, y_train)

yhat = model.predict(X_test)


                              
# print(len(y_test), len(yhat))
# print(tx_features)
# print(tx_label)
# print(tx_group)


    
    #library imports
tx_eq_odd_diff = equalized_odds_difference(y_test, yhat, sensitive_features=group_test)
tx_eq_odd_ratio = equalized_odds_ratio(y_test, yhat, sensitive_features=group_test)


# print("Number of samples for the 'white' group:", np.sum((y_test == 1) & (group_test == 1)))

print("Number of samples for the 'white' group:", np.sum((group_test == 1)))

# print("Number of samples for the 'black' group:", np.sum((y_test == 1) & (group_test == 2)))

print("Number of samples for the 'black' group:", np.sum((group_test == 2)))

# white_fpr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
white_fpr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
black_fpr = np.mean(yhat[(y_test == 1) & (group_test == 2)])

white_tpr = np.mean(yhat[(y_test == 1) & (group_test == 1)])
black_tpr = np.mean(yhat[(y_test == 1) & (group_test == 2)])

white_tnr = 1 - white_fpr
# white_tnr = np.mean(yhat[(y_test == 0) & (group_test == 0)])
# white_tnr = 1- white_fpr
black_tnr = np.mean(yhat[(y_test == 0) & (group_test == 1)])



# fnr = 1- tpr ? (getting nan otherwise)

# TODO: Figure out why empty slice for W partition but not B / and M not F??)
# white_fnr = np.mean(yhat[(y_test == 1) & (group_test == 0)])
white_fnr = 1 - white_tpr  
black_fnr = np.mean(yhat[(y_test == 0) & (group_test == 2)])

white_accuracy = accuracy_score(y_test[group_test == 1], yhat[group_test == 1])
black_accuracy = accuracy_score(y_test[group_test == 2], yhat[group_test == 2])



    

# Equality of opportunity violation: 0.0397
tx_equality_violation = white_tpr - black_tpr



true_parity = np.abs(true_positive_rate(y_test[(group_test == 1)], yhat[(group_test == 1)]) - true_positive_rate(y_test[(group_test == 2)], yhat[(group_test == 2)]))
parity = (true_positive_rate(y_test[(group_test == 1)], yhat[(group_test == 1)]) + true_positive_rate(y_test[(group_test == 2)], yhat[(group_test == 2)])) / 2


print("Texas Data")
print("Predicting Income By Race (B/W)")
data = {
    'Metric': ['Equality Violation', 'Eq Odds Difference', 'Eq Odds Ratio',
               'True Parity', 'Parity',
               '(W-TPR)', '(B-TPR)',
               '(W-TNR)', '(B-TNR)',
               '(W-FPR)', '(B-FPR)',
               '(W-FNR)', '(B-FNR)',
               'Accuracy (White)', 'Accuracy (Black)'],
    'Value': [tx_equality_violation, tx_eq_odd_diff, tx_eq_odd_ratio,
               true_parity, parity,
               white_tpr, black_tpr, white_tnr, black_tnr, white_fpr, black_fpr, white_fnr, black_fnr,
               white_accuracy, black_accuracy]
}
table_df = pd.DataFrame(data)

# Display the table
print(table_df)
