In [None]:
# 🧪 Bias Demonstration: Resume Screening
# This notebook simulates a basic bias scenario often seen in AI-driven hiring systems.
# Goal: Show how biased data can influence classification decisions.

# Imports
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# 1. Create a small biased dataset
# This toy dataset assumes gender-coded resumes (0 = male, 1 = female),
# with historical hiring outcomes biased toward males.
data = pd.DataFrame({
    'years_experience': [1, 3, 5, 7, 2, 4, 6, 8],
    'gender': [0, 0, 0, 0, 1, 1, 1, 1],  # 0 = male, 1 = female
    'hired':   [0, 1, 1, 1, 0, 0, 0, 1]  # biased toward males
})
data

# 2. Train a logistic regression model
# We'll train on this biased data and observe how predictions reflect historical unfairness.
X = data[['years_experience', 'gender']]
y = data['hired']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, zero_division=0))

# 3. Visualize prediction bias
# We compare predicted hiring probabilities by gender.
data['predicted_proba'] = model.predict_proba(X)[:, 1]
sns.boxplot(data=data, x='gender', y='predicted_proba')
plt.xticks([0,1], ['Male', 'Female'])
plt.title('Predicted Hiring Probability by Gender')
plt.show()

# 🔍 Observation
# Even with similar experience levels, the model trained on biased data assigns lower hiring probabilities 
# to female candidates.
#
# Note: This is a minimal toy example for demonstration purposes.
# Real-world datasets are larger and may reveal more pronounced bias effects.
