In [3]:
from NN import SimpleNN
from LR_pt import LogisticRegression
import torch
import os as os
os.chdir("/Users/karl/Desktop/Fairness/algorithmic-fairness-exam")
from data_loader import data_loader, preprocess
from models_clean import one_hot_cols, features
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from fairlearn.metrics import equalized_odds_difference

In [4]:
one_hot = True

num_samples = 1_000_000
df = data_loader(one_hot_cols, num=num_samples)

# filter columns to only include columns in the features list below
if 'race_ethnicity' in features:
       # remove  'race_ethnicity' from features
       features.remove('race_ethnicity')
else: 
       print("'race_ethnicity' not in features")
x_train, x_val, x_test, y_train,y_val , y_test, train_groups, val_groups,test_groups = preprocess(df, features, one_hot_cols)
print(f'All rows in train_groups sum to 1: {np.allclose(np.sum(train_groups, axis=1), 1)}')

# convert y_train, y_val, y_test to numpy arrays
y_train = y_train.to_numpy()
y_val = y_val.to_numpy()
y_test = y_test.to_numpy()

Loading data...
processed_data.csv exists. Loading data from file.
'race_ethnicity' not in features
x_train: 70.00%
x_val: 15.00%
x_test: 15.00%
Num features BEFORE filtering features 54
Num features AFTER filtering features 14
x_train shape:  (395460, 14)
y_train shape:  (84742, 14)
All rows in train_groups sum to 1: True


In [None]:
model_path = "models/NN_pca:False_E:20_lr:0.001_bs:512.pt"
num_classes = 2

# Set device
device = torch.device('mps' if torch.cuda.is_available() else 'cpu')
# Initialize the model
model = SimpleNN(x_train.shape[1], num_classes).to(device)

# Load the model
model.load_state_dict(torch.load(model_path))

# Evaluate the model
model.eval()
with torch.no_grad():
    # Get the model predictions
    outputs = model(x_test_tensor)
    
    # Convert the predictions to probabilities using softmax
    probabilities, predicted = torch.max(outputs.data, 1)

    # Compute accuracy
    accuracy = accuracy_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy())
    
    # Compute F1 score
    f1 = f1_score(y_test_tensor.cpu().numpy(), predicted.cpu().numpy(), average='weighted')
    print(classification_report(y_test_tensor.cpu().numpy(), predicted.cpu().numpy()))


I have this directory fullpath = "/Users/karl/Desktop/Fairness/algorithmic-fairness-exam/models" where the statedict of 5 pytorch models are saved, nothing else. The models that start with "NN" are of the architechture of the imported class SimpleNN, models with the "LRmodel" have the architechture of LogisticRegression. I want a function that load these models into seperate model variable (with suitable names), and returns them. 

In [11]:
def load_models(input_size, num_classes, models_directory):
    models = {}

    for filename in os.listdir(models_directory):
        if filename.endswith('.pt'):  # if the file is a PyTorch model
            full_path = os.path.join(models_directory, filename)
            state_dict = torch.load(full_path)

            if filename.startswith('NN'):
                model_name = "NN"
                if "pca:True" in filename:
                    model = SimpleNN(9, num_classes)  
                    model_name += "_FairPCA"
                else:
                    model = SimpleNN(input_size, num_classes)  
                    model_name += "_FairLoss"
            elif filename.startswith('LRmodel'):
                model = LogisticRegression(input_size)
                model_name = "LR"
                if "F:NO l2" in filename:
                    model_name += "_L2"
                elif "F:True" in filename:
                    model_name += "_FairLoss"

            model.load_state_dict(state_dict)
            models[model_name] = model

    return models


In [12]:
models = load_models(x_train.shape[1], 2, '/Users/karl/Desktop/Fairness/algorithmic-fairness-exam/models')
print(models['LR'])  



LogisticRegression(
  (linear): Linear(in_features=14, out_features=1, bias=True)
)


In [24]:
def evaluate_models(models, x_test, y_test, test_groups):
    # list of metric names
    metric_names = ['weighted_f1', 'f1_score_0', 'f1_score_1', 'mean_equalized_odds_difference']

    # dictionary to hold metric values for each model
    metric_values = {name: [] for name in metric_names}

    for model_name, model in models.items():
        # get model predictions
        test_preds = model(x_test).detach().numpy() < 0.5
        print(test_preds.shape)
        print(test_preds[:10])
        
        # calculate weighted f1 score
        weighted_f1 = f1_score(y_test, test_preds, average='weighted')
        metric_values['weighted_f1'].append(weighted_f1)

        # calculate f1 score for class 0
        f1_score_0 = f1_score(y_test, test_preds, pos_label=0)
        metric_values['f1_score_0'].append(f1_score_0)

        # calculate f1 score for class 1
        f1_score_1 = f1_score(y_test, test_preds, pos_label=1)
        metric_values['f1_score_1'].append(f1_score_1)

        # calculate mean equalized odds difference
        one_hot_cols = list(range(6))
        mean_eod = np.mean([
            equalized_odds_difference(y_test, test_preds, sensitive_features=test_groups[:, one_hot_cols.index(col)])
            for col in one_hot_cols
        ])
        metric_values['mean_equalized_odds_difference'].append(mean_eod)

    # create bar chart
    x = np.arange(len(metric_names))  # the label locations
    width = 0.2  # the width of the bars

    fig, ax = plt.subplots()
    for i, model_name in enumerate(models.keys()):
        ax.bar(x - width/2 + i*width, metric_values[model_name], width, label=model_name)

    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel('Scores')
    ax.set_title('Scores by model and metric')
    ax.set_xticks(x)
    ax.set_xticklabels(metric_names)
    ax.legend()

    plt.show()

In [25]:
evaluate_models(models, x_test, y_test, test_groups)
# print types of parameters
print("x_test: ", type(x_test))
print("y_test: ", type(y_test))

(84742, 1)
[[False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [False]
 [ True]
 [False]
 [False]]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (84742x14 and 9x64)