# Bias in AI Coursework

In [363]:
# Importing relevant libraries 
import pandas as pd
import numpy as np
import scipy.stats as stats
import random
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score


### Question 1

In [364]:
# Reads in the dataset file
data = pd.read_excel("recruitment.xls")

# Displays the start of the dataset
data.head()


Unnamed: 0,ApplicantCode,Gender,BAMEyn,ShortlistedNY,Interviewed,FemaleONpanel,OfferNY,AcceptNY,JoinYN
0,1,1,2,1,1,1.0,1.0,1.0,1.0
1,2,1,2,1,1,1.0,1.0,1.0,1.0
2,3,1,2,1,1,1.0,1.0,1.0,1.0
3,4,1,2,1,1,2.0,1.0,1.0,1.0
4,5,1,2,1,1,2.0,0.0,,


### Question 2

In [365]:
# Calculates data spread statistics 
mean_gender = data["Gender"].mean()
standard_deviation_gender = data["Gender"].std()
variance_gender = data["Gender"].var()
minimum_gender = data["Gender"].min()
maximum_gender = data["Gender"].max()
mean_BAMEyn = data["BAMEyn"].mean()
standard_deviation_BAMEyn = data["BAMEyn"].std()
variance_BAMEyn = data["BAMEyn"].var()
minimum_BAMEyn = data["BAMEyn"].min()
maximum_BAMEyn = data["BAMEyn"].max()

# Calculates the total number of applicants per group
total_num_male = len(data[data["Gender"] == 1])
total_num_female = len(data[data["Gender"] == 2])
total_num_BAME = len(data[data["BAMEyn"] == 1])
total_num_nonBAME = len(data[data["BAMEyn"] == 2])

# Calculates the number of shortlisted applicants per group
shortlisted_num_of_males = len(data[(data["ShortlistedNY"] == 1) & (data["Gender"] == 1)])
shortlisted_num_of_females = len(data[(data["ShortlistedNY"] == 1) & (data["Gender"] == 2)])
shortlisted_num_of_BAME = len(data[(data["ShortlistedNY"] == 1) & (data["BAMEyn"] == 1)])
shortlisted_num_of_nonBAME = len(data[(data["ShortlistedNY"] == 1) & (data["BAMEyn"] == 2)])

# Calculates the number of interviewed applicants per group
interviewed_num_of_males = len(data[(data["Interviewed"] == 1) & (data["Gender"] == 1)])
interviewed_num_of_females = len(data[(data["Interviewed"] == 1) & (data["Gender"] == 2)])
interviewed_num_of_BAME = len(data[(data["Interviewed"] == 1) & (data["BAMEyn"] == 1)])
interviewed_num_of_nonBAME = len(data[(data["Interviewed"] == 1) & (data["BAMEyn"] == 2)])

# Calculates the number of job offer holders per group
offer_num_of_males = len(data[(data["OfferNY"] == 1) & (data["Gender"] == 1)])
offer_num_of_females = len(data[(data["OfferNY"] == 1) & (data["Gender"] == 2)])
offer_num_of_BAME = len(data[(data["OfferNY"] == 1) & (data["BAMEyn"] == 1)])
offer_num_of_nonBAME = len(data[(data["OfferNY"] == 1) & (data["BAMEyn"] == 2)])

# Calculates the number of accepted applicants per group
accepted_num_of_males = len(data[(data["AcceptNY"] == 1) & (data["Gender"] == 1)])
accepted_num_of_females = len(data[(data["AcceptNY"] == 1) & (data["Gender"] == 2)])
accepted_num_of_BAME = len(data[(data["AcceptNY"] == 1) & (data["BAMEyn"] == 1)])
accepted_num_of_nonBAME = len(data[(data["AcceptNY"] == 1) & (data["BAMEyn"] == 2)])

# Calculates the number of joined applicants per group
joined_num_of_males = len(data[(data["JoinYN"] == 1) & (data["Gender"] == 1)])
joined_num_of_females = len(data[(data["JoinYN"] == 1) & (data["Gender"] == 2)])
joined_num_of_BAME = len(data[(data["JoinYN"] == 1) & (data["BAMEyn"] == 1)])
joined_num_of_nonBAME = len(data[(data["JoinYN"] == 1) & (data["BAMEyn"] == 2)])


In [366]:
# Displays all the defined  statistics relevant to the privileged and unprivileged groups
print("Gender Minimum Value: " + str(minimum_gender))
print("Gender Maximum Value: " + str(maximum_gender))
print("Gender Range: " + str(maximum_gender - minimum_gender))
print("Gender Mean: " + str(mean_gender))
print("Gender Standard Deviation: " + str(standard_deviation_gender))
print("Gender Variance: " + str(variance_gender))
print("\n")
print("Total Number of Males: " + str(total_num_male))
print("Shortlisted Males: " + str(shortlisted_num_of_males))
print("Interviewed Males: " + str(interviewed_num_of_males))
print("Offered Males: " + str(offer_num_of_males))
print("Accepted Males: " + str(accepted_num_of_males))
print("Joined Males: " + str(joined_num_of_males))
print("\n")
print("Total Number of Females: " + str(total_num_female))
print("Shortlisted Females: " + str(shortlisted_num_of_females))
print("Interviewed Females: " + str(interviewed_num_of_females))
print("Offered Females: " + str(offer_num_of_females))
print("Accepted Females: " + str(accepted_num_of_females))
print("Joined Females: " + str(joined_num_of_females))
print("\n")
print("BAMEyn Minimum Value: " + str(minimum_BAMEyn))
print("BAMEyn Maximum Value: " + str(maximum_BAMEyn))
print("BAMEyn Range: " + str(maximum_BAMEyn - minimum_BAMEyn))
print("BAMEyn Mean: " + str(mean_BAMEyn))
print("BAMEyn Standard Deviation: " + str(standard_deviation_BAMEyn))
print("BAMEyn Variance: " + str(variance_BAMEyn))
print("\n")
print("Total Number of BAME: " + str(total_num_BAME))
print("Shortlisted BAME: " + str(shortlisted_num_of_BAME))
print("Interviewed BAME: " + str(interviewed_num_of_BAME))
print("Offered BAME: " + str(offer_num_of_BAME))
print("Accepted BAME: " + str(accepted_num_of_BAME))
print("Joined BAME: " + str(joined_num_of_BAME))
print("\n")
print("Total Number of Non-BAME: " + str(total_num_nonBAME))
print("Shortlisted Non-BAME: " + str(shortlisted_num_of_nonBAME))
print("Interviewed Non-BAME: " + str(interviewed_num_of_nonBAME))
print("Offered Non-BAME: " + str(offer_num_of_nonBAME))
print("Accepted Non-BAME: " + str(accepted_num_of_nonBAME))
print("Joined Non-BAME: " + str(joined_num_of_nonBAME))


Gender Minimum Value: 1
Gender Maximum Value: 2
Gender Range: 1
Gender Mean: 1.7214285714285715
Gender Standard Deviation: 0.4490987732574203
Gender Variance: 0.2016897081413198


Total Number of Males: 78
Shortlisted Males: 38
Interviewed Males: 27
Offered Males: 18
Accepted Males: 12
Joined Males: 12


Total Number of Females: 202
Shortlisted Females: 50
Interviewed Females: 28
Offered Females: 10
Accepted Females: 6
Joined Females: 6


BAMEyn Minimum Value: 1
BAMEyn Maximum Value: 2
BAMEyn Range: 1
BAMEyn Mean: 1.5678571428571428
BAMEyn Standard Deviation: 0.49626098133697955
BAMEyn Variance: 0.24627496159754195


Total Number of BAME: 121
Shortlisted BAME: 19
Interviewed BAME: 13
Offered BAME: 8
Accepted BAME: 4
Joined BAME: 4


Total Number of Non-BAME: 159
Shortlisted Non-BAME: 69
Interviewed Non-BAME: 42
Offered Non-BAME: 20
Accepted Non-BAME: 14
Joined Non-BAME: 14


### Question 3

In [367]:
# Displays the result of being invited to an interview or offered a job, given the gender of an applicant is in
print("P(Invited to an interview|Male): " + str(shortlisted_num_of_males) + "/" + str(total_num_male), int(shortlisted_num_of_males)/int(total_num_male))
print("P(Offered a Job|Male): " + str(offer_num_of_males) + "/" + str(total_num_male), int(offer_num_of_males)/int(total_num_male))
print("P(Invited to an interview|Female): " + str(shortlisted_num_of_females) + "/" + str(total_num_female), int(shortlisted_num_of_females)/int(total_num_female))
print("P(Offered a Job|Female): " + str(offer_num_of_females) + "/" + str(total_num_female), int(offer_num_of_females)/int(total_num_female))
print("\n")

# Displays the result of being invited to an interview or offered a job, given whether an applicant is BAME
print("P(Invited to an interview|BAME): " + str(shortlisted_num_of_BAME) + "/" + str(total_num_BAME), int(shortlisted_num_of_BAME)/int(total_num_BAME))
print("P(Offered a Job|BAME): " + str(offer_num_of_BAME) + "/" + str(total_num_BAME), int(offer_num_of_BAME)/int(total_num_BAME))
print("P(Invited to an interview|Non-BAME): " + str(shortlisted_num_of_nonBAME) + "/" + str(total_num_nonBAME), int(shortlisted_num_of_nonBAME)/int(total_num_nonBAME))
print("P(Offered a Job|Non-BAME): " + str(offer_num_of_nonBAME) + "/" + str(total_num_nonBAME), int(offer_num_of_nonBAME)/int(total_num_nonBAME))


P(Invited to an interview|Male): 38/78 0.48717948717948717
P(Offered a Job|Male): 18/78 0.23076923076923078
P(Invited to an interview|Female): 50/202 0.24752475247524752
P(Offered a Job|Female): 10/202 0.04950495049504951


P(Invited to an interview|BAME): 19/121 0.15702479338842976
P(Offered a Job|BAME): 8/121 0.06611570247933884
P(Invited to an interview|Non-BAME): 69/159 0.4339622641509434
P(Offered a Job|Non-BAME): 20/159 0.12578616352201258


### Question 5

In [368]:
# The rows within the "BAMEyn" and "shortlisted" table
chi_shortlisted_BAMEyn = [[shortlisted_num_of_BAME, (total_num_BAME - shortlisted_num_of_BAME)], [shortlisted_num_of_nonBAME, (total_num_nonBAME - shortlisted_num_of_nonBAME)]]
chi_shortlisted_gender = [[shortlisted_num_of_males, (total_num_male - shortlisted_num_of_males)], [shortlisted_num_of_females, (total_num_female - shortlisted_num_of_females)]]
# Define the significance value
sigval = 0.05


In [369]:
# TEST 1
# Define h0
print("h0: There is no relationship between an applicant that is BAME / not BAME and whether they were shortlisted or not")
# Calculate the expected, p-value, degree of freedom and chi-squared values for the BAMEyn table
chi_BAMEyn, pvalue_BAMEyn, dof_BAMEyn, expected_BAMEyn = stats.chi2_contingency(chi_shortlisted_BAMEyn)
print("Shortlisted and BAMEyn p value: " + str(pvalue_BAMEyn))

# Evaluate the results
if pvalue_BAMEyn <= sigval:
    print("Shortlisted and BAMEyn DEPEND on one another (reject H0)")
    print("\n")
else:
    print("Shortlisted and BAMEyn are INDEPENDENT of one another (H0 is true)")
    print("\n")
    

h0: There is no relationship between an applicant that is BAME / not BAME and whether they were shortlisted or not
Shortlisted and BAMEyn p value: 1.4720402657609345e-06
Shortlisted and BAMEyn DEPEND on one another (reject H0)




In [370]:
# TEST 2
# Define h0
print("There is no relationship between an applicant’s gender and whether they were shortlisted or not")

# Calculate the expected, p-value, degree of freedom and chi-squared values for the gender table
chi_gender, pvalue_gender, dof_gender, expected_gender = stats.chi2_contingency(chi_shortlisted_gender)
print("Shortlisted and BAMEyn p value: " + str(pvalue_gender))

# Evaluate the results
if pvalue_gender <= sigval:
    print("Shortlisted and the applicants gender DEPEND on one another (reject H0)")
else:
    print("Shortlisted and the applicants gender are INDEPENDENT of one another (H0 is true)")


There is no relationship between an applicant’s gender and whether they were shortlisted or not
Shortlisted and BAMEyn p value: 0.00019227016669192619
Shortlisted and the applicants gender DEPEND on one another (reject H0)


In [371]:
# print()
# print(data.loc[(data["OfferNY"] == 1)].shape[0])
# print(data.loc[(data["OfferNY"] == 1) & (data["Gender"] == 1)].shape[0])
# print(data.loc[(data["OfferNY"] == 1) & (data["Gender"] == 2)].shape[0])
# print(data.loc[(data["OfferNY"] == 1) & (data["Gender"] == 1)].shape[0])
# print(data.loc[(data["OfferNY"] == 1) & (data["Gender"] == 2)].shape[0])
# print(data.loc[(data["ShortlistedNY"] == 1) & (data["Gender"] == 1)].shape[0])

# data["ShortlistedNY"].sum()