In [None]:
import pandas as pd
import numpy as np

In [None]:
folder = 'no_protected_model'  # Folder in results containing data and where analysis will be saved

In [None]:
preds = np.load(f'results/{folder}/pred.npy')
testX = np.load(f'results/{folder}/testX.npy')
testY = np.load(f'results/{folder}/testY.npy')
print(f"Shapes: {preds.shape}, {testX.shape}, {testY.shape}")

In [None]:
# Import partially preprocessed version so there aren't too many columns
test_df = pd.read_csv('dataset/test_split_partially_preprocessed.csv')
# test_df = pd.read_csv('dataset/test_preprocessed.csv')  # Full dataset

# Add truncation (since this wasn't done for partially_preprocessed version)
test_df.loc[test_df.INCWAGE_CPIU_2010 > 100000, 'INCWAGE_CPIU_2010'] = 100000

test_df.shape

In [None]:
testY[:10]

In [None]:
pd.Series(testY).head(10)

In [None]:
# This ensures that the indices are correct
assert((test_df.INCWAGE_CPIU_2010 == pd.Series(testY)).all())

In [None]:
# Check if SERIAL or PERNUM are in testX
# for i in range(testX.shape[1]):
#     seriesConverted = pd.Series(testX[:,i])
#     if (test_df.SERIAL == seriesConverted).all():
#         print(f"Column {i} is SERIAL")
#     if (test_df.PERNUM == seriesConverted).all():
#         print(f"Column {i} is PERNUM")

In [None]:
print("Old shape: ", test_df.shape)
test_df['Income_Pred'] = preds
print("New shape: ", test_df.shape)

In [None]:
# These are metrics with no rounding
test_df['Income_Pred'].describe()

In [None]:
test_df['Income_Pred'] = test_df['Income_Pred'].round(0)

In [None]:
# These are metrics with rounding
test_df['Income_Pred'].describe()

In [None]:
test_df.INCWAGE_CPIU_2010.describe()

## Look for biases

### Create underprediction/overprediction data

In [None]:
# Number of predictions that are perfect
(test_df.Income_Pred.round(0) == test_df.INCWAGE_CPIU_2010).values.sum()

In [None]:
test_df['Pred_Error'] = test_df['Income_Pred'] - test_df['INCWAGE_CPIU_2010']
test_df['Pred_Error'].describe()

In [None]:
test_df['Pred_AbsError'] = test_df['Pred_Error'].abs()
test_df['Pred_AbsError'].describe()

In [None]:
print("Income summary")
print(test_df.INCWAGE_CPIU_2010.describe())
print("\nIncome prediction summary")
print(test_df.Income_Pred.describe())
print("\nAbsolute error summary")
print(test_df.Pred_AbsError.describe())
print("\nRelative error summary")
print(test_df.Pred_Error.describe())

### Test differences in accuracy and under/overprediction rates

In [None]:
test_df.columns

In [None]:
protected_cols = [
    'isFemale', 
    'isAmericanIndian', 'isAsian', 'isBlack', 'isPacificIslander', 'isWhite', 'isOtherRace', 'isHispanic',
    'bornInUS',
    'isMarried', 'wasMarried', 'neverMarried',
    'sameSexMarriage', 'mixedRaceMarriage',
]

In [None]:
totalRows = len(test_df)  # Total number of rows in the dataset
summaryEntries = 0  # Number of entries in the summary

with open(f'results/{folder}/analysis/summary1.txt', 'w') as f:
    for col in protected_cols:
        if test_df[col].dtype != 'bool':
            raise Exception(f"Column {col} is not boolean")

        assert(not test_df[col].isna().values.any())

        numTrue = test_df[col].values.sum()  # Number of entries for which the column is true
        numFalse = totalRows - numTrue  # Number of entries for which the column is false

        dfTrue = test_df[test_df[col]]
        dfFalse = test_df[~test_df[col]]

        print(f'Of the people for whom {col} is true ({numTrue} of {totalRows} entries, or {numTrue / totalRows * 100}%), actual salaries are:')
        print(dfTrue.INCWAGE_CPIU_2010.describe())
        print("Predictions are")
        print(dfTrue.Income_Pred.describe())
        print("Absolute error is:")
        print(dfTrue.Pred_AbsError.describe())
        print("Relative error is:")
        print(dfTrue.Pred_Error.describe())
        print("\n")
        print(f'Of the people for whom {col} is false ({numFalse} of {totalRows} entries, or {numFalse / totalRows * 100}%), actual salaries are:')
        print(dfFalse.INCWAGE_CPIU_2010.describe())
        print("Predictions are")
        print(dfFalse.Income_Pred.describe())
        print("Absolute error is:")
        print(dfFalse.Pred_AbsError.describe())
        print("Relative error is:")
        print(dfFalse.Pred_Error.describe())
        print("\n\n")

        # Look for interesting cases to add to summary

        # Large difference in mean prediction
        if(abs(dfTrue.Income_Pred.mean() - dfFalse.Income_Pred.mean()) > 1000):
            f.write(f"Mean prediction for {col} is significantly different than mean prediction for not {col}\n")
            f.write(f"Mean prediction for {col}: {dfTrue.Income_Pred.mean()}")
            f.write(f"\tMean prediction for not {col}: {dfFalse.Income_Pred.mean()}")
            f.write("\n\n")
            summaryEntries += 1
        # Large difference in mean absolute error
        if(abs(dfTrue.Pred_AbsError.mean() - dfFalse.Pred_AbsError.mean()) > 1000):
            f.write(f"Mean absolute error for {col} is significantly different than mean absolute error for not {col}\n")
            f.write(f"Mean absolute error for {col}: {dfTrue.Pred_AbsError.mean()}")
            f.write(f"\tMean absolute error for not {col}: {dfFalse.Pred_AbsError.mean()}")
            f.write("\n\n")
            summaryEntries += 1
        # Large difference in mean relative error
        if(abs(dfTrue.Pred_Error.mean() - dfFalse.Pred_Error.mean()) > 750):
            f.write(f"Mean relative error for {col} is significantly different than mean relative error for not {col}\n")
            f.write(f"Mean relative error for {col}: {dfTrue.Pred_Error.mean()}")
            f.write(f"\tMean relative error for not {col}: {dfFalse.Pred_Error.mean()}")
            f.write("\n\n")
            summaryEntries += 1

In [None]:
summaryEntries

Full analysis (this includes every column, not just the ones that are considered protected). I'd recommend either only using the summary or using this in conjunction with the shap analysis since the detailed text file is so large.

In [None]:
totalRows = len(test_df)  # Total number of rows in the dataset
summaryEntries = 0  # Number of entries in the summary

with open(f'results/{folder}/analysis/summary2.txt', 'w') as f:
    for col in test_df.columns:
        if test_df[col].dtype != 'bool':
            continue
            # raise Exception(f"Column {col} is not boolean")

        assert(not test_df[col].isna().values.any())

        numTrue = test_df[col].values.sum()  # Number of entries for which the column is true
        numFalse = totalRows - numTrue  # Number of entries for which the column is false

        dfTrue = test_df[test_df[col]]
        dfFalse = test_df[~test_df[col]]

        print(f'Of the people for whom {col} is true ({numTrue} of {totalRows} entries, or {numTrue / totalRows * 100}%), actual salaries are:')
        print(dfTrue.INCWAGE_CPIU_2010.describe())
        print("Predictions are")
        print(dfTrue.Income_Pred.describe())
        print("Absolute error is:")
        print(dfTrue.Pred_AbsError.describe())
        print("Relative error is:")
        print(dfTrue.Pred_Error.describe())
        print("\n")
        print(f'Of the people for whom {col} is false ({numFalse} of {totalRows} entries, or {numFalse / totalRows * 100}%), actual salaries are:')
        print(dfFalse.INCWAGE_CPIU_2010.describe())
        print("Predictions are")
        print(dfFalse.Income_Pred.describe())
        print("Absolute error is:")
        print(dfFalse.Pred_AbsError.describe())
        print("Relative error is:")
        print(dfFalse.Pred_Error.describe())
        print("\n\n")

        # Look for interesting cases to add to summary

        # Large difference in mean prediction
        if(abs(dfTrue.Income_Pred.mean() - dfFalse.Income_Pred.mean()) > 3000):
            f.write(f"Mean prediction for {col} is significantly different than mean prediction for not {col}\n")
            f.write(f"Mean prediction for {col}: {dfTrue.Income_Pred.mean()}")
            f.write(f"\tMean prediction for not {col}: {dfFalse.Income_Pred.mean()}")
            f.write("\n\n")
            summaryEntries += 1
        # Large difference in mean absolute error
        if(abs(dfTrue.Pred_AbsError.mean() - dfFalse.Pred_AbsError.mean()) > 2000):
            f.write(f"Mean absolute error for {col} is significantly different than mean absolute error for not {col}\n")
            f.write(f"Mean absolute error for {col}: {dfTrue.Pred_AbsError.mean()}")
            f.write(f"\tMean absolute error for not {col}: {dfFalse.Pred_AbsError.mean()}")
            f.write("\n\n")
            summaryEntries += 1
        # Large difference in mean relative error
        if(abs(dfTrue.Pred_Error.mean() - dfFalse.Pred_Error.mean()) > 1500):
            f.write(f"Mean relative error for {col} is significantly different than mean relative error for not {col}\n")
            f.write(f"Mean relative error for {col}: {dfTrue.Pred_Error.mean()}")
            f.write(f"\tMean relative error for not {col}: {dfFalse.Pred_Error.mean()}")
            f.write("\n\n")
            summaryEntries += 1

In [None]:
summaryEntries