In [1]:
import pandas as pd
from sklearn.metrics import confusion_matrix

def compute_fairness_metrics1(df, thresholds, group_column):
    """
    Computes fairness metrics for different groups using group-specific thresholds.
    
    Parameters:
    - df (pd.DataFrame): Data containing 'True_Label' and 'Predicted_Score'.
    - thresholds (dict): Dictionary mapping group values (e.g., 0 for DAVIS, 1 for Pharos) to their classification threshold.
    - group_column (str): Column name specifying the group membership.
    
    Returns:
    - dict: Fairness metrics including Demographic Parity, Equalized Odds, Equal Opportunity, Disparate Impact, FNR, and FOR.
    """
    
    # Apply group-specific threshold to determine predicted labels
    df['Predicted_Label'] = df.apply(lambda row: int(row['Predicted_Score'] >= thresholds[row[group_column]]), axis=1)

    # Separate groups
    group_1 = df[df[group_column] == 1]  # Pharos (understudied proteins)
    group_0 = df[df[group_column] == 0]  # DAVIS (well-studied proteins)

    # Compute Demographic Parity
    dp_group_1 = group_1['Predicted_Label'].mean()
    dp_group_0 = group_0['Predicted_Label'].mean()
    demographic_parity = abs(dp_group_1 - dp_group_0)

    # Compute Confusion Matrices
    cm_1 = confusion_matrix(group_1['True_Label'], group_1['Predicted_Label'])
    cm_0 = confusion_matrix(group_0['True_Label'], group_0['Predicted_Label'])

    # Extract True Positive Rate (TPR) and False Positive Rate (FPR)
    tpr_1 = cm_1[1, 1] / (cm_1[1, 0] + cm_1[1, 1]) if (cm_1[1, 0] + cm_1[1, 1]) > 0 else 0
    tpr_0 = cm_0[1, 1] / (cm_0[1, 0] + cm_0[1, 1]) if (cm_0[1, 0] + cm_0[1, 1]) > 0 else 0

    fpr_1 = cm_1[0, 1] / (cm_1[0, 0] + cm_1[0, 1]) if (cm_1[0, 0] + cm_1[0, 1]) > 0 else 0
    fpr_0 = cm_0[0, 1] / (cm_0[0, 0] + cm_0[0, 1]) if (cm_0[0, 0] + cm_0[0, 1]) > 0 else 0

    equalized_odds = abs(tpr_1 - tpr_0) + abs(fpr_1 - fpr_0)
    equal_opportunity = abs(tpr_1 - tpr_0)

    # Compute Disparate Impact
    dp_impact = dp_group_1 / dp_group_0 if dp_group_0 > 0 else float('inf')

    # Compute False Negative Rate (FNR) and False Omission Rate (FOR)
    fnr_1 = cm_1[1, 0] / (cm_1[1, 0] + cm_1[1, 1]) if (cm_1[1, 0] + cm_1[1, 1]) > 0 else 0
    fnr_0 = cm_0[1, 0] / (cm_0[1, 0] + cm_0[1, 1]) if (cm_0[1, 0] + cm_0[1, 1]) > 0 else 0

    for_1 = cm_1[0, 0] / (cm_1[0, 0] + cm_1[0, 1]) if (cm_1[0, 0] + cm_1[0, 1]) > 0 else 0
    for_0 = cm_0[0, 0] / (cm_0[0, 0] + cm_0[0, 1]) if (cm_0[0, 0] + cm_0[0, 1]) > 0 else 0

    return {
        'Demographic Parity': demographic_parity,
        'Equalized Odds': equalized_odds,
        'Equal Opportunity': equal_opportunity,
        'Disparate Impact': dp_impact,
        'False Negative Rate (Pharos)': fnr_1,
        'False Negative Rate (DAVIS)': fnr_0,
        'False Omission Rate (Pharos)': for_1,
        'False Omission Rate (DAVIS)': for_0
    }


In [2]:
# Load datasets
df_davis = pd.read_csv("test_predictions_1_19.csv")
df_pharos = pd.read_csv("test_predictions_pharos (2).csv")

# Add a group column
df_davis['Group'] = 0  # DAVIS (well-studied proteins)
df_pharos['Group'] = 1  # PHAROS (understudied proteins)

# Merge both datasets
df = pd.concat([df_davis, df_pharos], ignore_index=True)

# Define group-specific thresholds
thresholds = {0: 0.297257661819458, 1: 0.019704099744558334}

# Compute fairness metrics
fairness_results = compute_fairness_metrics1(df, thresholds, 'Group')

# Print results
for metric, value in fairness_results.items():
    print(f"{metric}: {value:.4f}")


Demographic Parity: 0.4832
Equalized Odds: 0.4052
Equal Opportunity: 0.0259
Disparate Impact: 2.8537
False Negative Rate (Pharos): 0.1186
False Negative Rate (DAVIS): 0.0927
False Omission Rate (Pharos): 0.3943
False Omission Rate (DAVIS): 0.7736


In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load datasets
df_davis = pd.read_csv("test_predictions_1_19.csv")
df_pharos = pd.read_csv("test_predictions_pharos (2).csv")

# Add source column
df_davis['Group'] = 'DAVIS'
df_pharos['Group'] = 'PHAROS'

# Merge datasets
df = pd.concat([df_davis, df_pharos], ignore_index=True)

# Define classification thresholds
thresholds = {'DAVIS': 0.297257661819458, 'PHAROS': 0.019704099744558334}

# Apply thresholds to classify predictions
df['Predicted_Label'] = df.apply(lambda row: 1 if row['Predicted_Score'] >= thresholds[row['Group']] else 0, axis=1)

# Compute fairness metrics
def compute_fairness_metrics2(df):
    metrics = {}
    for group in df['Group'].unique():
        subset = df[df['Group'] == group]
        y_true, y_pred = subset['True_Label'], subset['Predicted_Label']
        
        metrics[group] = {
            'Accuracy': accuracy_score(y_true, y_pred),
            'Precision': precision_score(y_true, y_pred, zero_division=0),
            'Recall': recall_score(y_true, y_pred, zero_division=0),
            'F1-Score': f1_score(y_true, y_pred, zero_division=0),
            'Positive Rate': y_pred.mean(),  # Proportion classified as positive
        }
    
    # Compute fairness disparities
    davis_metrics, pharos_metrics = metrics['DAVIS'], metrics['PHAROS']
    fairness_disparities = {
        'Accuracy Gap': davis_metrics['Accuracy'] - pharos_metrics['Accuracy'],
        'Precision Gap': davis_metrics['Precision'] - pharos_metrics['Precision'],
        'Recall Gap': davis_metrics['Recall'] - pharos_metrics['Recall'],
        'F1-Score Gap': davis_metrics['F1-Score'] - pharos_metrics['F1-Score'],
        'Positive Rate Gap': davis_metrics['Positive Rate'] - pharos_metrics['Positive Rate'],
    }
    
    return metrics, fairness_disparities

metrics, disparities = compute_fairness_metrics2(df)

# Print results
print("Group-wise Metrics:")
for group, metric_vals in metrics.items():
    print(f"\n{group}:")
    for metric, value in metric_vals.items():
        print(f"  {metric}: {value:.4f}")

print("\nFairness Disparities (DAVIS - PHAROS):")
for metric, value in disparities.items():
    print(f"  {metric}: {value:.4f}")


Group-wise Metrics:

DAVIS:
  Accuracy: 0.7803
  Precision: 0.1752
  Recall: 0.9073
  F1-Score: 0.2937
  Positive Rate: 0.2607

PHAROS:
  Accuracy: 0.6383
  Precision: 0.5937
  Recall: 0.8814
  F1-Score: 0.7095
  Positive Rate: 0.7439

Fairness Disparities (DAVIS - PHAROS):
  Accuracy Gap: 0.1420
  Precision Gap: -0.4185
  Recall Gap: 0.0259
  F1-Score Gap: -0.4158
  Positive Rate Gap: -0.4832


In [8]:
# Load datasets
df_davis = pd.read_csv("test_predictions_b3.csv")
df_pharos = pd.read_csv("test_predictions_pharos_allp (2).csv")

# Add a group column
df_davis['Group'] = 0  # DAVIS (well-studied proteins)
df_pharos['Group'] = 1  # PHAROS (understudied proteins)

# Merge both datasets
df = pd.concat([df_davis, df_pharos], ignore_index=True)

# Define group-specific thresholds
thresholds = {0: 0.007697489112615585, 1: 0.025588534772396088}

# Compute fairness metrics
fairness_results = compute_fairness_metrics1(df, thresholds, 'Group')

# Print results
for metric, value in fairness_results.items():
    print(f"{metric}: {value:.4f}")

# Add source column
df_davis['Group'] = 'DAVIS'
df_pharos['Group'] = 'PHAROS'

# Merge datasets
df = pd.concat([df_davis, df_pharos], ignore_index=True)

# Define classification thresholds
thresholds = {'DAVIS': 0.007697489112615585
, 'PHAROS': 0.025588534772396088}

# Apply thresholds to classify predictions
df['Predicted_Label'] = df.apply(lambda row: 1 if row['Predicted_Score'] >= thresholds[row['Group']] else 0, axis=1)

metrics, disparities = compute_fairness_metrics2(df)

# Print results
print("Group-wise Metrics:")
for group, metric_vals in metrics.items():
    print(f"\n{group}:")
    for metric, value in metric_vals.items():
        print(f"  {metric}: {value:.4f}")

print("\nFairness Disparities (DAVIS - PHAROS):")
for metric, value in disparities.items():
    print(f"  {metric}: {value:.4f}")

Demographic Parity: 0.2464
Equalized Odds: 0.1034
Equal Opportunity: 0.0931
Disparate Impact: 1.5333
False Negative Rate (Pharos): 0.2346
False Negative Rate (DAVIS): 0.1415
False Omission Rate (Pharos): 0.6000
False Omission Rate (DAVIS): 0.5897
Group-wise Metrics:

DAVIS:
  Accuracy: 0.6207
  Precision: 0.2141
  Recall: 0.8585
  F1-Score: 0.3427
  Positive Rate: 0.4620

PHAROS:
  Accuracy: 0.7396
  Precision: 0.9118
  Recall: 0.7654
  F1-Score: 0.8322
  Positive Rate: 0.7083

Fairness Disparities (DAVIS - PHAROS):
  Accuracy Gap: -0.1189
  Precision Gap: -0.6976
  Recall Gap: 0.0931
  F1-Score Gap: -0.4895
  Positive Rate Gap: -0.2464


In [9]:
# Load datasets
df_davis = pd.read_csv("test_predictions_b3.csv")
df_pharos = pd.read_csv("test_predictions_pharos (2).csv")

# Add a group column
df_davis['Group'] = 0  # DAVIS (well-studied proteins)
df_pharos['Group'] = 1  # PHAROS (understudied proteins)

# Merge both datasets
df = pd.concat([df_davis, df_pharos], ignore_index=True)

# Define group-specific thresholds
thresholds = {0: 0.007697489112615585, 1: 0.019704099744558334}

# Compute fairness metrics
fairness_results = compute_fairness_metrics1(df, thresholds, 'Group')

# Print results
for metric, value in fairness_results.items():
    print(f"{metric}: {value:.4f}")

# Add source column
df_davis['Group'] = 'DAVIS'
df_pharos['Group'] = 'PHAROS'

# Merge datasets
df = pd.concat([df_davis, df_pharos], ignore_index=True)

# Define classification thresholds
thresholds = {'DAVIS': 0.007697489112615585
, 'PHAROS': 0.019704099744558334}

# Apply thresholds to classify predictions
df['Predicted_Label'] = df.apply(lambda row: 1 if row['Predicted_Score'] >= thresholds[row['Group']] else 0, axis=1)

metrics, disparities = compute_fairness_metrics2(df)

# Print results
print("Group-wise Metrics:")
for group, metric_vals in metrics.items():
    print(f"\n{group}:")
    for metric, value in metric_vals.items():
        print(f"  {metric}: {value:.4f}")

print("\nFairness Disparities (DAVIS - PHAROS):")
for metric, value in disparities.items():
    print(f"  {metric}: {value:.4f}")

Demographic Parity: 0.2819
Equalized Odds: 0.2183
Equal Opportunity: 0.0229
Disparate Impact: 1.6102
False Negative Rate (Pharos): 0.1186
False Negative Rate (DAVIS): 0.1415
False Omission Rate (Pharos): 0.3943
False Omission Rate (DAVIS): 0.5897
Group-wise Metrics:

DAVIS:
  Accuracy: 0.6207
  Precision: 0.2141
  Recall: 0.8585
  F1-Score: 0.3427
  Positive Rate: 0.4620

PHAROS:
  Accuracy: 0.6383
  Precision: 0.5937
  Recall: 0.8814
  F1-Score: 0.7095
  Positive Rate: 0.7439

Fairness Disparities (DAVIS - PHAROS):
  Accuracy Gap: -0.0177
  Precision Gap: -0.3795
  Recall Gap: -0.0229
  F1-Score Gap: -0.3667
  Positive Rate Gap: -0.2819
