<a href="https://colab.research.google.com/github/aniruddh47/AIES_LAB/blob/main/ex2_aies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install fairlearn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from fairlearn.metrics import MetricFrame, selection_rate, true_positive_rate, false_positive_rate, false_negative_rate

# === Step 1: Load the dataset from local path ===

# Replace this path with the correct one on your machine
file_path = '/content/drug_consumption.data'  # e.g., 'path/to/your/file/drug_consumption.data'

# Column names from UCI documentation
columns = [
    "ID", "Age", "Gender", "Education", "Country", "Ethnicity", "Nscore", "Escore", "Oscore", "Ascore", "Cscore",
    "Impulsive", "SS", "Alcohol", "Amphet", "Amyl", "Benzos", "Caff", "Cannabis", "Choc", "Coke", "Crack", "Ecstasy",
    "Heroin", "Ketamine", "Legalh", "LSD", "Meth", "Mushrooms", "Nicotine", "Semer", "VSA"
]

df = pd.read_csv(file_path, header=None, names=columns)

# === Step 2: Preprocessing ===

# Convert categorical drug usage to binary: user (1) vs non-user (0)
def binary_drug_use(col):
    return df[col].apply(lambda x: 1 if x in ['CL1', 'CL2', 'CL3', 'CL4', 'CL5', 'CL6'] else 0)

df['CocaineUser'] = binary_drug_use('Coke')  # target variable
df['Gender'] = df['Gender'].map({-0.48246: 'Female', 0.48246: 'Male'})  # decode gender

# Drop unneeded columns
df = df.drop(['ID', 'Coke'], axis=1)

# Encode gender for sensitive attribute analysis
le = LabelEncoder()
df['Gender_encoded'] = le.fit_transform(df['Gender'])  # 0: Female, 1: Male

# === Step 3: Define features, label, sensitive attribute ===

features = ['Age', 'Education', 'Nscore', 'Escore', 'Oscore', 'Ascore', 'Cscore', 'Impulsive', 'SS']
X = df[features]
y = df['CocaineUser']
sensitive_attr = df['Gender']

# === Step 4: Train/test split ===

X_train, X_test, y_train, y_test, s_train, s_test = train_test_split(
    X, y, sensitive_attr, test_size=0.3, random_state=42, stratify=y
)

# === Step 5: Model training ===

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# === Step 6: Fairness metrics ===

metric_frame = MetricFrame(
    metrics={
        'TPR': true_positive_rate,
        'FPR': false_positive_rate,
        'FNR': false_negative_rate,
        'Selection Rate': selection_rate
    },
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=s_test
)

print("Fairness Metrics by Gender Group:\n", metric_frame.by_group)

# Optional: fairness gap
tpr_gap = abs(metric_frame.by_group['TPR']['Male'] - metric_frame.by_group['TPR']['Female'])
print(f"\nTPR Gap (Male vs Female): {tpr_gap:.3f}")


Collecting fairlearn
  Downloading fairlearn-0.12.0-py3-none-any.whl.metadata (7.0 kB)
Downloading fairlearn-0.12.0-py3-none-any.whl (240 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fairlearn
Successfully installed fairlearn-0.12.0
Fairness Metrics by Gender Group:
              TPR       FPR       FNR  Selection Rate
Gender                                              
Female  0.648276  0.446043  0.351724        0.549296
Male    0.467890  0.213873  0.532110        0.312057

TPR Gap (Male vs Female): 0.180
