**DATACHALLENGE BDGIA DEBIASING MODEL**
---

In [3]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#from imblearn.over_sampling import SMOTE

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
from sklearn.preprocessing import normalize
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

from evaluator import *

In [29]:
# FUNCTIONS

# to show performance

def evaluate(Y_pred,Y,S,will_print=1):
    '''returns model accuracy, final score, macro fscore ans TPR gap
    input : 2 np arrays of same dimension
    output : array of 4 values
    '''
    accuracy= accuracy_score(Y, Y_pred)  # Y_test are your original test labels
    print(f"Accuracy on transformed test data: {accuracy}")
    eval_scores, confusion_matrices_eval = gap_eval_scores(Y_pred, Y, S, metrics=['TPR'])
    final_score = (eval_scores['macro_fscore']+ (1-eval_scores['TPR_GAP']))/2

    if will_print==1:
        #print results
        print('final score',final_score)
        print('macro_fscore',eval_scores['macro_fscore'])
        print('1-eval_scores[\'TPR_GAP\']',1-eval_scores['TPR_GAP'])
    
    return accuracy, final_score, eval_scores['macro_fscore'],1-eval_scores['TPR_GAP'] , eval_scores , confusion_matrices_eval

# to predict X_test and save to file

def save_Y_pred_tofile(X, model,name):
    
    # save probabilities for each Xi (dim=28)
    y_pred_probs = model(X)
    probs=pd.DataFrame(y_pred_probs.detach().numpy(), columns= list(range(0,28)))
    file_name_probs = "y_pred_probs/y_pred_probs_"+str(name)+".csv"
    probs.to_csv(file_name_probs, header = None, index = None)

    # save predicted labels for each Xi (dim=1)
    y_pred = torch.argmax(y_pred_probs, dim=1)
    results=pd.DataFrame(y_pred.numpy(), columns= ['score'])
    file_name = "y_pred/Data_Challenge_"+str(name)+".csv"
    results.to_csv(file_name, header = None, index = None)

    return y_pred, y_pred_probs


##############################################################
#  DEFINE CUSTOM LOSS FUNCTION AND EVALUATION FUNCTIONS
#   
#   soft_f1_loss
#   macro_soft_f1_loss
#   calculate_exact_macro_f1
#   calculate_class_tpr_gap
#   average_tpr_gap_per_class
#   
##############################################################


def soft_f1_loss(y_true, y_pred):
    """
    Differentiable approximation of the F1 score as a loss function.
    """
    y_pred_probs = torch.softmax(y_pred, dim=1)
    tp = torch.sum(y_true * y_pred_probs, dim=0)
    pp = torch.sum(y_pred_probs, dim=0)
    ap = torch.sum(y_true, dim=0)
    precision = tp / (pp + 1e-6)
    recall = tp / (ap + 1e-6)
    soft_f1 = 2 * (precision * recall) / (precision + recall + 1e-6)
    loss = 1 - soft_f1.mean()  # Mean to aggregate over all classes
    return loss

def soft_macro_f1_loss(y_true, y_pred):
    """
    Differentiable approximation of the macro F1 score as a loss function.
    Calculates the F1 score for each class independently and then takes the average.
    Inputs :
        y_true must be one hot encoded
    """
    y_pred_one_hot = torch.nn.functional.one_hot(y_pred, num_classes=Y_train.nunique()) if len(y_pred.shape) == 1 else y_pred
    y_pred_probs = torch.softmax(y_pred_one_hot, dim=1)
    
    tp = torch.sum(y_true * y_pred_probs, dim=0)
    pp = torch.sum(y_pred_probs, dim=0)
    ap = torch.sum(y_true, dim=0)
    
    precision = tp / (pp + 1e-6)
    recall = tp / (ap + 1e-6)
    
    f1_per_class = 2 * (precision * recall) / (precision + recall + 1e-6)
    macro_f1 = torch.mean(f1_per_class)  # Average F1 score across all classes
    
    loss = 1 - macro_f1  # Minimizing loss is maximizing macro F1 score
    return loss


def get_macro_f1(y_true, y_pred):
    """
    Calculate the exact macro F1 score for evaluation.
    Input : any format as tensors will be converted to Tensors of true label if dim >1 . Can be :
        - Tensor of probabilities(y_pred_probs) dimension (n,28)
        - Tensor of labels, one hote encoded (y_pred_one_hot) dimension (n,28)
        - Tensor of labels (y_pred_tensor) dimension (n,1)
    Ouput : scalar
    """
    #convert Tensors to 1 dimension (labels ranging from 0 to 27) if necessary
    y_pred_labels = torch.argmax(y_pred, dim=1) if y_pred.ndim > 1 else y_pred
    y_true_labels = torch.argmax(y_true, dim=1) if y_true.ndim > 1 else y_true

    " predict macro f1"
    f1 = f1_score(y_true_labels.cpu().numpy(), y_pred_labels.cpu().numpy(), average='macro')
    return f1

def get_tpr_gap(y_true, y_pred, protected_attribute, class_idx):
    """
    Calculate the TPR gap for a specific class across protected groups.
    
    Args:
    - y_true: Tensor of true labels, one-hot encoded.
    - y_pred_probs: Tensor of predicted probabilities (after softmax).
    - protected_attribute: Tensor indicating group membership for each instance.
    - class_idx: Index of the class for which to calculate the TPR gap.
    
    Returns:
    - TPR gap for the specified class.
    """
    #convert Tensors to 1 dimension (labels ranging from 0 to 27) if necessary
    y_pred_labels = torch.argmax(y_pred, dim=1) if y_pred.ndim > 1 else y_pred
    y_true_labels = torch.argmax(y_true, dim=1) if y_true.ndim > 1 else y_true
    
    # Calculate overall TPR for the current class
    overall_mask = y_true_labels == class_idx
    overall_tpr = torch.sum((y_pred_labels == class_idx) & overall_mask).float() / (torch.sum(overall_mask).float() + 1e-6)
    
    # Initialize list to store TPR for each protected group
    group_tprs = []
    
    # Calculate TPR for each protected group
    for group_val in protected_attribute.unique():
        group_mask = (protected_attribute == group_val) & overall_mask
        group_tpr = torch.sum((y_pred_labels == class_idx) & group_mask).float() / (torch.sum(group_mask).float() + 1e-6)
        group_tprs.append(group_tpr)
    
    # Calculate TPR gap for the current class
    tpr_gaps = torch.abs(torch.tensor(group_tprs) - overall_tpr)
    
    return torch.mean(tpr_gaps)  # Return the mean TPR gap for this class

def get_macro_tpr_gap(y_true, y_pred, protected_attribute):
    """
    Calculate the average TPR gap per class by calling tpr_gap for each class.
    
    Args:
    - y_true: Tensor of true labels, one-hot encoded.
    - y_pred: Tensor of predicted logits (before softmax).
    - protected_attribute: Tensor indicating group membership for each instance.
    
    Returns:
    - Average TPR gap across all classes.
    """
    #convert Tensors to 1 dimension (labels ranging from 0 to 27) if necessary
    y_pred_labels = torch.argmax(y_pred, dim=1) if y_pred.ndim > 1 else y_pred
    y_true_labels = torch.argmax(y_true, dim=1) if y_true.ndim > 1 else y_true
    
    # Initialize list to store TPR gaps for all classes
    class_tpr_gaps = []
    
    # Iterate over each class
    num_classes = len(y_true_labels.unique())
    for class_idx in range(num_classes):
        class_tpr_gap = get_tpr_gap(y_true_labels, y_pred_labels, protected_attribute, class_idx)
        class_tpr_gaps.append(class_tpr_gap)
    
    # Calculate the average TPR gap across all classes
    avg_tpr_gap = torch.mean(torch.stack(class_tpr_gaps))
    
    return avg_tpr_gap


def soft_final_score_loss(y_true, y_pred, protected_attribute):
    """
    Combine soft macro F1 score and TPR gap to create a final evaluation metric.
    """
    soft_macro_f1 = soft_macro_f1_loss(y_true, y_pred)  # Calculate soft macro F1 score
    macro_tpr_gap = get_macro_tpr_gap(y_true, y_pred, protected_attribute)  # Calculate TPR gap
    
    soft_final_score = ( soft_macro_f1 + (1 - macro_tpr_gap) ) / 2
    return soft_final_score

def get_final_score(y_true, y_pred, protected_attribute):
    """
    Combine soft macro F1 score and TPR gap to create a final evaluation metric.
    """
    #convert Tensors to 1 dimension (labels ranging from 0 to 27) if necessary
    y_pred_labels = torch.argmax(y_pred, dim=1) if y_pred.ndim > 1 else y_pred
    y_true_labels = torch.argmax(y_true, dim=1) if y_true.ndim > 1 else y_true

    macro_f1 = get_macro_f1(y_true_labels, y_pred_labels)  # Calculate macro F1 score
    macro_tpr_gap = get_macro_tpr_gap(y_true_labels, y_pred_labels, protected_attribute)  # Calculate macro TPR gap
    
    final_score = (macro_f1 + (1 - macro_tpr_gap)) / 2
    return final_score




In [8]:
##############################################################
# LOAD DATA, 
#############################################################

# Load pickle file and convert to numpy array
with open('data-challenge-student.pickle', 'rb') as handle:
    # dat = pickle.load(handle)
    dat = pd.read_pickle(handle)
 
#Check keys()
print(dat.keys())
X = dat['X_train']
Y = dat['Y']
S = dat['S_train']

#create a label to distiguish 56 labels Y x 2 (man or woman)
# 0 to 27 = non sensitive group | 28 + [0 , 27] = 28 to 55 = sensitive group
Y56 = Y+28*S

X_test_true = dat['X_test']
S_test_true = dat['S_test']

# check size
print(X.shape,Y.shape,S.shape,X_test_true.shape,S_test_true.shape)


dict_keys(['X_train', 'X_test', 'Y', 'S_train', 'S_test'])
(27749, 768) (27749,) (27749,) (11893, 768) (11893,)


In [28]:
##############################################################
# train_test_split (np.arrays)
##############################################################

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, Y56_train, Y56_test = train_test_split(X, Y56, test_size=0.2, random_state=42)
Y_train = Y56_train % 28  # reste (original Y)   ex 33% 28 = classe 5 
S_train = Y56_train//28   # facteur (original S) ex 33//28 = 1 (attribut protégé)
Y_test = Y56_test % 28  # reste (original Y)   ex 33% 28 = classe 5 
S_test = Y56_test//28   # facteur (original S) ex 33//28 = 1 (attribut protégé)

# impression des dimensions
print('train:',X_train.shape,Y_train.shape,S_train.shape)
print('test:',X_test.shape,Y_test.shape, S_test.shape)

##############################################################
# 1. Transform DataFrames into Tensors
##############################################################

X_tensor = torch.tensor(X.values, dtype=torch.float32)
Y_tensor = torch.tensor(Y.values, dtype=torch.long)
S_tensor = torch.tensor(S.values, dtype=torch.long)

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train.values, dtype=torch.long)
S_train_tensor = torch.tensor(S_train.values, dtype=torch.long)

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test.values, dtype=torch.long)
S_test_tensor = torch.tensor(S_test.values, dtype=torch.long)

Y_train_one_hot = torch.nn.functional.one_hot(Y_train_tensor, num_classes=Y_train.nunique())
Y_test_one_hot = torch.nn.functional.one_hot(Y_test_tensor, num_classes=Y_train.nunique())

X_test_true_tensor = torch.tensor(X_test_true.values, dtype=torch.float32)

# impression des dimensions
print('train_tensor:',X_train_tensor.shape,Y_train_tensor.shape,S_train_tensor.shape, type(X_train_tensor))
print('test_tensor:',X_test_tensor.shape,Y_test_tensor.shape, S_test_tensor.shape, type(X_test_tensor))
print('Y_train_one_hot:',Y_train_one_hot.shape, type(Y_train_one_hot))
print('X_test_true_tensor:',X_test_true_tensor.shape, type(X_test_true_tensor))

train: (22199, 768) (22199,) (22199,)
test: (5550, 768) (5550,) (5550,)
train_tensor: torch.Size([22199, 768]) torch.Size([22199]) torch.Size([22199]) <class 'torch.Tensor'>
test_tensor: torch.Size([5550, 768]) torch.Size([5550]) torch.Size([5550]) <class 'torch.Tensor'>
test_tensor: 2 1 torch.Size([5550]) <class 'torch.Tensor'>
Y_train_one_hot: torch.Size([22199, 28]) <class 'torch.Tensor'>
X_test_true_tensor: torch.Size([11893, 768]) <class 'torch.Tensor'>


**NN with customized loss function (final score)**
---

In [15]:
# 1. Define the model and optimizer
# ---------------------------------

model = nn.Sequential(
    nn.Linear(768, 28),  # Assuming 768 input features and 28 classes
    nn.ReLU(),  # Adding a ReLU activation function
    nn.Linear(28, 28),  # Additional layer for complexity
    #nn.LogSoftmax(dim=1)  # LogSoftmax for multi-class classification
)

optimizer = optim.SGD(model.parameters(), lr=0.1)

# 2. Train the model with the custom loss function final_eval
# -----------------------------------------------------------

num_epochs = 1000  # Adjust as necessary

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    outputs_train = model(X_train_tensor)
    
    loss = soft_final_score_loss(Y_train_one_hot.float(), outputs_train, S_train_tensor)
    #loss = soft_macro_f1_loss(Y_train_one_hot.float(), outputs_train)*10 + get_macro_tpr_gap(Y_train_one_hot.float(), outputs_train, S_train_tensor )
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 100 == 0:
        model.eval()
        with torch.no_grad():
            # Calculate metrics for training data
            outputs_train = model(X_train_tensor) # probabilities

            # Evaluate predictions on training data
            final_score_train = get_final_score(Y_train_tensor, outputs_train, S_train_tensor)
            macro_f1_train = get_macro_f1(Y_train_tensor, outputs_train)
            inv_macro_tpr_gap_train = 1 - get_macro_tpr_gap(Y_train_tensor, outputs_train, S_train_tensor)
            
            # Calculate metrics for test data
            outputs_test = model(X_test_tensor)

            # Evaluate predictions on training data
            final_score_test = get_final_score(Y_test_tensor, outputs_test, S_test_tensor)
            macro_f1_test = get_macro_f1(Y_test_tensor, outputs_test)
            inv_macro_tpr_gap_test = 1 - get_macro_tpr_gap(Y_test_tensor, outputs_test, S_test_tensor)
            
            print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Final Score Train: {final_score_train.item()}, Final Score Test: {final_score_test.item()}, macro F1 Train: {macro_f1_train}, macro F1 Test: {macro_f1_test}, 1-TPR Gap Train: {inv_macro_tpr_gap_train}, 1-TPR Gap Test: {inv_macro_tpr_gap_test}')
            # print(f'Epoch {epoch + 1}, Loss: {loss.item()},  macro F1 Train: {macro_f1_train}, macro F1 Test: {macro_f1_test}')# Final Score Train: {final_score_train.item()}, Final Score Test: {final_score_test.item()}, macro F1 Train: {macro_f1_train}, macro F1 Test: {macro_f1_test}, 1-TPR Gap Train: {inv_macro_tpr_gap_train}, 1-TPR Gap Test: {inv_macro_tpr_gap_test}')


# 4. Make Predictions and Evaluate with final_score
# -------------------------------------------------
            
with torch.no_grad():
    model.eval()
    Y_pred_probs = model(X_test_tensor) # dim = 28
    Y_pred_tensor = torch.argmax(Y_pred_probs, dim=1)  # dim = 1 (Get the class with the highest probability)
    Y_pred_one_hot = torch.nn.functional.one_hot(Y_pred_tensor, num_classes=28)  # dim = 28
 
    macro_f1 = get_macro_f1(Y_test_tensor, Y_pred_tensor)
    inv_macro_tpr_gap = 1 - get_macro_tpr_gap(Y_test_tensor, Y_pred_probs, S_test_tensor)
    final_score = get_final_score(Y_test_tensor, Y_pred_probs, S_test_tensor)
    print(f'Final Evaluation Score: {final_score.item()} Macro F1: {macro_f1.item()} 1-TPR_gap: { inv_macro_tpr_gap.item() }')

Epoch 100, Loss: 0.9841914176940918, Final Score Train: 0.501908004283905, Final Score Test: 0.4984510540962219, macro F1 Train: 0.008571775500201642, macro F1 Test: 0.007334987901510969, 1-TPR Gap Train: 0.9952442049980164, 1-TPR Gap Test: 0.9895671010017395
Epoch 200, Loss: 0.9833416938781738, Final Score Train: 0.5067097544670105, Final Score Test: 0.5049751996994019, macro F1 Train: 0.019669357091836863, macro F1 Test: 0.01782821189219381, 1-TPR Gap Train: 0.9937501549720764, 1-TPR Gap Test: 0.992122232913971
Epoch 300, Loss: 0.9840088486671448, Final Score Train: 0.5078456401824951, Final Score Test: 0.5077477693557739, macro F1 Train: 0.019754407542982855, macro F1 Test: 0.019433599351551208, 1-TPR Gap Train: 0.995936930179596, 1-TPR Gap Test: 0.9960619807243347
Epoch 400, Loss: 0.985431432723999, Final Score Train: 0.5088541507720947, Final Score Test: 0.5081185102462769, macro F1 Train: 0.018309218934500774, macro F1 Test: 0.016256363347120585, 1-TPR Gap Train: 0.99939906597137

In [16]:
name = 'test'
save_Y_pred_tofile(X_test_true_tensor, model,name)

OSError: Cannot save file into a non-existent directory: 'y_pred_probs'

In [17]:
# Convert Y_pred to a DataFrame
Y_pred_df = pd.DataFrame(Y_pred_tensor.numpy(), columns=['Predicted'])

# Evaluate Y_pred compared to Y_test (assuming Y_test is a numpy array or a pandas Series)
print(classification_report(Y_test, Y_pred_df['Predicted']))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        81
           1       0.00      0.00      0.00       127
           2       0.00      0.00      0.00       458
           3       0.00      0.00      0.00        36
           4       0.00      0.00      0.00        48
           5       0.00      0.00      0.00        72
           6       0.00      0.00      0.00       178
           7       0.00      0.00      0.00        54
           8       0.00      0.00      0.00        18
           9       0.00      0.00      0.00        91
          10       0.00      0.00      0.00        22
          11       0.00      0.00      0.00       286
          12       0.00      0.00      0.00       110
          13       0.00      0.00      0.00       258
          14       0.00      0.00      0.00       112
          15       0.00      0.00      0.00        19
          16       0.00      0.00      0.00        33
          17       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [38]:
Y_pred_probs = model(X_test_true_tensor)
Y_pred_tensor = torch.argmax(Y_pred_probs, dim=1)

results=pd.DataFrame(Y_pred_tensor, columns= ['score'])
name = 'NN_with_custom_loss'
file_name = "Data_Challenge_MDI_341_"+str(name)+".csv"
results.to_csv(file_name, header = None, index = None)


**ARRET ANTICIPE DU NN (sans mini-batch)**
---

In [25]:
# 1. Define the model and optimizer
# ---------------------------------

model = nn.Sequential(
    nn.Linear(768, 28),  # Assuming 768 input features and 28 classes
    nn.ReLU(),  # Adding a ReLU activation function
    nn.Linear(28, 28),  # Additional layer for complexity
    #nn.LogSoftmax(dim=1)  # LogSoftmax for multi-class classification
)

optimizer = optim.Adam(model.parameters(), lr=0.01)


# 2. Paramètres pour l'arrêt précoce
# -------------------------------
patience = 10  # Nombre d'époques à attendre après la dernière amélioration de la perte de validation
best_loss = None
epochs_without_improvement = 0

# 3. Train the model with the custom loss function final_eval
# -----------------------------------------------------------
num_epochs = 1000  # Adjust as necessary

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    outputs_train = model(X_train_tensor)
    
    loss = soft_final_score_loss(Y_train_one_hot.float(), outputs_train, S_train_tensor)
    # loss = soft_macro_f1_loss(Y_train_one_hot.float(), outputs_train)
    # loss = get_macro_tpr_gap(Y_train_one_hot.float(), outputs_train, S_train_tensor )

    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 100 == 0:
        model.eval()
        with torch.no_grad():
            # Calculate metrics for training data
            outputs_train = model(X_train_tensor) # probabilities

            # Evaluate predictions on training data
            final_score_train = get_final_score(Y_train_tensor, outputs_train, S_train_tensor)
            macro_f1_train = get_macro_f1(Y_train_tensor, outputs_train)
            inv_macro_tpr_gap_train = 1 - get_macro_tpr_gap(Y_train_tensor, outputs_train, S_train_tensor)

            # Calculate metrics for test data
            outputs_test = model(X_test_tensor)

            # Evaluate predictions on training data
            final_score_test = get_final_score(Y_test_tensor, outputs_test, S_test_tensor)
            macro_f1_test = get_macro_f1(Y_test_tensor, outputs_test)
            inv_macro_tpr_gap_test = 1 - get_macro_tpr_gap(Y_test_tensor, outputs_test, S_test_tensor)

            # Vérifier si la perte de validation s'est améliorée (arret précoce)
            if best_loss is None or final_score_test < best_loss:
                best_loss = final_score_test
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1
                if epochs_without_improvement >= patience:
                    print(f'Arrêt précoce après {epoch+1} époques')
                    break  # Arrêter l'entraînement

            print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Final Score Train: {final_score_train.item()}, Final Score Test: {final_score_test.item()}, macro F1 Train: {macro_f1_train}, macro F1 Test: {macro_f1_test}, 1-TPR Gap Train: {inv_macro_tpr_gap_train}, 1-TPR Gap Test: {inv_macro_tpr_gap_test}')
            # print(f'Epoch {epoch + 1}, Loss: {loss.item()},  macro F1 Train: {macro_f1_train}, macro F1 Test: {macro_f1_test}')# Final Score Train: {final_score_train.item()}, Final Score Test: {final_score_test.item()}, macro F1 Train: {macro_f1_train}, macro F1 Test: {macro_f1_test}, 1-TPR Gap Train: {inv_macro_tpr_gap_train}, 1-TPR Gap Test: {inv_macro_tpr_gap_test}')


# 4. Make Predictions and Evaluate with final_score
# -------------------------------------------------
            
with torch.no_grad():
    model.eval()
    Y_pred_probs = model(X_test_tensor) # dim = 28
    Y_pred_tensor = torch.argmax(Y_pred_probs, dim=1)  # dim = 1 (Get the class with the highest probability)
    Y_pred_one_hot = torch.nn.functional.one_hot(Y_pred_tensor, num_classes=28)  # dim = 28
 
    macro_f1 = get_macro_f1(Y_test_tensor, Y_pred_tensor)
    inv_macro_tpr_gap = 1 - get_macro_tpr_gap(Y_test_tensor, Y_pred_probs, S_test_tensor)
    final_score = get_final_score(Y_test_tensor, Y_pred_probs, S_test_tensor)
    print(f'Final Evaluation Score: {final_score.item()} Macro F1: {macro_f1.item()} 1-TPR_gap: { inv_macro_tpr_gap.item() }')

Epoch 100, Loss: 0.6180952787399292, Final Score Train: 0.8383526802062988, Final Score Test: 0.7733933925628662, macro F1 Train: 0.727813242975664, macro F1 Test: 0.6287169097435105, 1-TPR Gap Train: 0.9488921761512756, 1-TPR Gap Test: 0.9180698394775391
Epoch 200, Loss: 0.5957030057907104, Final Score Train: 0.8599814772605896, Final Score Test: 0.7825250029563904, macro F1 Train: 0.7688718539032557, macro F1 Test: 0.6406831513698007, 1-TPR Gap Train: 0.9510911107063293, 1-TPR Gap Test: 0.92436683177948
Epoch 300, Loss: 0.5893559455871582, Final Score Train: 0.8680845499038696, Final Score Test: 0.7799464464187622, macro F1 Train: 0.7814826795012992, macro F1 Test: 0.6342193246928722, 1-TPR Gap Train: 0.9546864628791809, 1-TPR Gap Test: 0.9256735444068909
Epoch 400, Loss: 0.5860427618026733, Final Score Train: 0.8719273209571838, Final Score Test: 0.7806618213653564, macro F1 Train: 0.7879128081575509, macro F1 Test: 0.6328553288903299, 1-TPR Gap Train: 0.9559418559074402, 1-TPR Gap 

In [26]:
# SANS MINI-BATCH
def train_NN_with_custom_loss_no_mini_batch(model, optimizer, X_train_tensor, Y_train_tensor, S_train_tensor, X_test_tensor, Y_test_tensor, S_test_tensor):

    # 2. Paramètres pour l'arrêt précoce
    # -------------------------------
    patience = 10  # Nombre d'époques à attendre après la dernière amélioration de la perte de validation
    best_loss = None
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
    
        outputs_train = model(X_train_tensor)
    
        loss = soft_final_score_loss(Y_train_one_hot.float(), outputs_train, S_train_tensor)
        #loss = soft_macro_f1_loss(Y_train_one_hot.float(), outputs_train) 
        # loss = get_macro_tpr_gap(Y_train_one_hot.float(), outputs_train, S_train_tensor )

        loss.backward()
        optimizer.step()

        model.eval()
        
        # 1. Vérifier si la perte de validation s'est améliorée (arret précoce)

        # Calculate metrics for test data
        outputs_test = model(X_test_tensor)

        # Evaluate predictions on test (validation) data
        final_score_test = get_final_score(Y_test_tensor, outputs_test, S_test_tensor)
        outputs_test = model(X_test_tensor)
        
        if best_loss is None or final_score_test < best_loss:
            best_loss = final_score_test
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print(f'Arrêt précoce après {epoch+1} époques')
                break  # Arrêter l'entraînement

        # 2. Impression de l'apprentissage et des scores train et test
        if (epoch + 1) % 100 == 0:
            
            with torch.no_grad():
                # Calculate metrics for training data
                outputs_train = model(X_train_tensor) # probabilities
                # Evaluate predictions on training data
                final_score_train = get_final_score(Y_train_tensor, outputs_train, S_train_tensor)
                macro_f1_train = get_macro_f1(Y_train_tensor, outputs_train)
                inv_macro_tpr_gap_train = 1 - get_macro_tpr_gap(Y_train_tensor, outputs_train, S_train_tensor)
            
                # Calculate metrics for test data
                outputs_test = model(X_test_tensor)
                # Evaluate predictions on training data
                final_score_test = get_final_score(Y_test_tensor, outputs_test, S_test_tensor)
                macro_f1_test = get_macro_f1(Y_test_tensor, outputs_test)
                inv_macro_tpr_gap_test = 1 - get_macro_tpr_gap(Y_test_tensor, outputs_test, S_test_tensor)

                print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Final Score Train: {final_score_train.item()}, Final Score Test: {final_score_test.item()}, macro F1 Train: {macro_f1_train}, macro F1 Test: {macro_f1_test}, 1-TPR Gap Train: {inv_macro_tpr_gap_train}, 1-TPR Gap Test: {inv_macro_tpr_gap_test}')
            
    # 4. Make Predictions and Evaluate with final_score
    # -------------------------------------------------
            
    with torch.no_grad():
        model.eval()
        Y_pred_probs = model(X_test_tensor) # dim = 28 (Probabilities for each class)
        Y_pred_tensor = torch.argmax(Y_pred_probs, dim=1)  # dim = 1 (Get the class with the highest probability)
 
        macro_f1 = get_macro_f1(Y_test_tensor, Y_pred_tensor)
        inv_macro_tpr_gap = 1 - get_macro_tpr_gap(Y_test_tensor, Y_pred_probs, S_test_tensor)
        final_score = get_final_score(Y_test_tensor, Y_pred_probs, S_test_tensor)
        print(f'Final Evaluation Score: {final_score.item()} Macro F1: {macro_f1.item()} 1-TPR_gap: { inv_macro_tpr_gap.item() }')

    return model, Y_pred_probs, Y_pred_tensor, final_score, macro_f1, inv_macro_tpr_gap

In [29]:
#################################################
#          TEST DES PARAMETRES
################################################


# 1. Define the model and optimizer and train
# --------------------------------------------------

model = nn.Sequential(
    nn.Linear(768, 28),  # Assuming 768 input features and 28 classes
    #nn.LogSoftmax(dim=1)
    )  # LogSoftmax for multi-class classification

learning_rate=0.01
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
num_epochs = 10000 

# 2. Train the model with the custom loss function final_eval
# -----------------------------------------------------------
name = 'NN-28-28_Adam'+'_lr_'+str(learning_rate)
print('\n\n Starting to train model', name)
model_trained, Y_pred_probs, Y_pred_tensor, final_score, macro_f1, inv_macro_tpr_gap = train_NN_with_custom_loss_no_mini_batch(model,optim.Adam(model.parameters(), lr=learning_rate), X_train_tensor, Y_train_tensor, S_train_tensor, X_test_tensor, Y_test_tensor, S_test_tensor)
#Res.loc[i]=[name,optimizer,learning_rate,final_score, macro_f1, inv_macro_tpr_gap]
#save_Y_pred_tofile(X_test_true_tensor, model_trained,name)




 Starting to train model NN-28-28_Adam_lr_0.01
Arrêt précoce après 12 époques
Final Evaluation Score: 0.6730861067771912 Macro F1: 0.4465380708164603 1-TPR_gap: 0.8996341228485107


**FUNCTION FOR NN WITH CUSTOM LOSS**
---

In [21]:
# AVEC MINI BATCH

def train_NN_with_custom_loss(model, optimizer, X_train_tensor, Y_train_tensor, S_train_tensor, X_test_tensor, Y_test_tensor, S_test_tensor):

    # 1. Convertir les tensors en datasets puis en DataLoader pour gérer les mini-batchs
    train_dataset = TensorDataset(X_train_tensor, Y_train_tensor, S_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    test_dataset = TensorDataset(X_test_tensor, Y_test_tensor, S_test_tensor)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    
    # 2. Paramètres pour l'arrêt précoce
    # -------------------------------
    patience = 10  # Nombre d'époques à attendre après la dernière amélioration de la perte de validation
    best_loss = None
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        
        model.train()
        train_loss = 0.0
        
        # 1/ exécuter les minibatches et recupérer la loss moyenne
        for X_batch, Y_batch, S_batch in train_loader:
            #if epoch==0 : print('dim de X_batch Y_batch et S_batch',X_batch.size(),Y_batch.size(),S_batch.size())
            model.train()
            optimizer.zero_grad()
            
            outputs_train = model(X_batch)
            #print('Y_batch',Y_batch.size(),'outputs_train',outputs_train.size())
            Y_batch_one_hot = torch.nn.functional.one_hot(Y_batch, num_classes=Y_train.nunique())
            loss = soft_final_score_loss(Y_batch_one_hot, outputs_train, S_batch)
            # loss = soft_macro_f1_loss(Y_train_one_hot.float(), outputs_train) 
            # loss = get_macro_tpr_gap(Y_train_one_hot.float(), outputs_train, S_train_tensor )

            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        print('boucle mini batch terminée')
        # Average loss pour l'epoch (après boucle mini-batchs)
        train_loss = train_loss / len(train_loader)       
        
        # 2. Vérifier si la perte de validation s'est améliorée (arret précoce)

        # Evaluation sur le jeu de données de test
        model.eval()
        test_loss = 0.0
        
        with torch.no_grad():
            for X_batch_test, Y_batch_test, S_batch_test in test_loader:
                outputs_test = model(X_batch_test)
                Y_batch_test_one_hot = torch.nn.functional.one_hot(Y_batch_test, num_classes=Y_train.nunique())
                loss_test = soft_final_score_loss(Y_batch_test_one_hot, outputs_test, S_batch)
                test_loss += loss_test.item()
                
        #average_test_loss = running_loss_test / len(test_loader)
        test_loss = test_loss / len(test_loader)
        print('fin boucle mini batch test')    
        # Evaluate predictions on test (validation) data
        #final_score_test = get_final_score(Y_test_tensor, outputs_test, S_test_tensor)
        #outputs_test = model(X_test_tensor)
        
        if best_loss is None or test_loss < best_loss:
            best_loss = test_loss
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print(f'Arrêt précoce après {epoch+1} époques')
                break  # Arrêter l'entraînement
        print('fin eval early ending') 
        # 3. Impression de l'apprentissage et des scores train et test
        if (epoch + 1) % 100 == 0:
            
            with torch.no_grad():
                # Calculate metrics for training data
                outputs_train = model(X_train_tensor) # probabilities
                # Evaluate predictions on training data
                final_score_train = get_final_score(Y_train_tensor, outputs_train, S_train_tensor)
                macro_f1_train = get_macro_f1(Y_train_tensor, outputs_train)
                inv_macro_tpr_gap_train = 1 - get_macro_tpr_gap(Y_train_tensor, outputs_train, S_train_tensor)
            
                # Calculate metrics for test data
                outputs_test = model(X_test_tensor)
                # Evaluate predictions on training data
                final_score_test = get_final_score(Y_test_tensor, outputs_test, S_test_tensor)
                macro_f1_test = get_macro_f1(Y_test_tensor, outputs_test)
                inv_macro_tpr_gap_test = 1 - get_macro_tpr_gap(Y_test_tensor, outputs_test, S_test_tensor)

                print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Final Score Train: {final_score_train.item()}, Final Score Test: {final_score_test.item()}, macro F1 Train: {macro_f1_train}, macro F1 Test: {macro_f1_test}, 1-TPR Gap Train: {inv_macro_tpr_gap_train}, 1-TPR Gap Test: {inv_macro_tpr_gap_test}')
            
    # 4. Make Predictions and Evaluate with final_score
    # -------------------------------------------------
            
    with torch.no_grad():
        model.eval()
        Y_pred_probs = model(X_test_tensor) # dim = 28 (Probabilities for each class)
        Y_pred_tensor = torch.argmax(Y_pred_probs, dim=1)  # dim = 1 (Get the class with the highest probability)
 
        macro_f1 = get_macro_f1(Y_test_tensor, Y_pred_tensor)
        inv_macro_tpr_gap = 1 - get_macro_tpr_gap(Y_test_tensor, Y_pred_probs, S_test_tensor)
        final_score = get_final_score(Y_test_tensor, Y_pred_probs, S_test_tensor)
        print(f'Final Evaluation Score: {final_score.item()} Macro F1: {macro_f1.item()} 1-TPR_gap: { inv_macro_tpr_gap.item() }')

    return model, Y_pred_probs, Y_pred_tensor, final_score, macro_f1, inv_macro_tpr_gap

In [44]:
# AVEC MINI BATCH

def train_NN_with_custom_loss(model, optimizer, batch_size, X_train_tensor, Y_train_tensor, S_train_tensor, X_test_tensor, Y_test_tensor, S_test_tensor):

    # 1. Convertir les tensors en datasets puis en DataLoader pour gérer les mini-batchs
    train_dataset = TensorDataset(X_train_tensor, Y_train_one_hot, S_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    test_dataset = TensorDataset(X_test_tensor, Y_test_one_hot, S_test_tensor)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    
    # 2. Paramètres pour l'arrêt précoce
    # -------------------------------
    patience = 10  # Nombre d'époques à attendre après la dernière amélioration de la perte de validation
    best_loss = None
    early_ending = None
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        
        model.train()
        train_loss = 0.0
        
        # 1/ exécuter les minibatches et recupérer la loss moyenne
        for X_batch, Y_batch, S_batch in train_loader:
            # Y_batch est one hot
            
            model.train()
            optimizer.zero_grad()
            outputs_train = model(X_batch)
            loss = soft_final_score_loss(Y_batch, outputs_train, S_batch)
            loss.backward()
            optimizer.step()

            # save mini-batch loss
            train_loss += loss.item()
        
        # Average loss pour l'epoch (après boucle mini-batchs)
        train_loss = train_loss / len(train_loader)       
        
        # 2. Vérifier si la perte de validation s'est améliorée (arret précoce)

        # Evaluation sur le jeu de données de test
        model.eval()
        test_loss = 0.0
        
        with torch.no_grad():
            for X_batch_test, Y_batch_test, S_batch_test in test_loader:
                outputs_test = model(X_batch_test)
                #Y_batch_test_one_hot = torch.nn.functional.one_hot(Y_batch_test, num_classes=Y_train.nunique())
                loss_test = soft_final_score_loss(Y_batch_test, outputs_test, S_batch_test)
                test_loss += loss_test.item()
                
        #average_test_loss = running_loss_test / len(test_loader)
        test_loss = test_loss / len(test_loader)
       
        # check if improvement in loss (compared to last epoch)
        if best_loss is None or test_loss < best_loss:
            best_loss = test_loss
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print(f'Arrêt précoce après {epoch+1} époques')
                early_ending = epoch + 1
                break  # Arrêter l'entraînement
        
        # 3. Impression de l'apprentissage et des scores train et test
        if epoch==0 or (epoch+1) % 10 == 0:
            
            with torch.no_grad():
                
                # Calculate metrics for training data
                outputs_train = model(X_train_tensor) # probabilities
                # Evaluate predictions on training data
                final_score_train = get_final_score(Y_train_tensor, outputs_train, S_train_tensor)
                macro_f1_train = get_macro_f1(Y_train_tensor, outputs_train)
                inv_macro_tpr_gap_train = 1 - get_macro_tpr_gap(Y_train_tensor, outputs_train, S_train_tensor)
            
                # Calculate metrics for test data
                outputs_test = model(X_test_tensor)
                # Evaluate predictions on training data
                final_score_test = get_final_score(Y_test_tensor, outputs_test, S_test_tensor)
                macro_f1_test = get_macro_f1(Y_test_tensor, outputs_test)
                inv_macro_tpr_gap_test = 1 - get_macro_tpr_gap(Y_test_tensor, outputs_test, S_test_tensor)

                print(f'Epoch {epoch+1}, Loss: {loss.item()}, Final Score Train: {final_score_train.item()}, Final Score Test: {final_score_test.item()}, macro F1 Train: {macro_f1_train}, macro F1 Test: {macro_f1_test}, 1-TPR Gap Train: {inv_macro_tpr_gap_train}, 1-TPR Gap Test: {inv_macro_tpr_gap_test}')
            
    # 4. Make Predictions and Evaluate with final_score
    # -------------------------------------------------
            
    with torch.no_grad():
        model.eval()
        Y_pred_probs = model(X_test_tensor) # dim = 28 (Probabilities for each class)
        Y_pred_tensor = torch.argmax(Y_pred_probs, dim=1)  # dim = 1 (Get the class with the highest probability)
 
        macro_f1 = get_macro_f1(Y_test_tensor, Y_pred_tensor)
        inv_macro_tpr_gap = 1 - get_macro_tpr_gap(Y_test_tensor, Y_pred_probs, S_test_tensor)
        final_score = get_final_score(Y_test_tensor, Y_pred_probs, S_test_tensor)
        print(f'Final Evaluation Score: {final_score.item()} Macro F1: {macro_f1.item()} 1-TPR_gap: { inv_macro_tpr_gap.item() }')

    return model, Y_pred_probs, Y_pred_tensor, final_score, macro_f1, inv_macro_tpr_gap, early_ending

In [43]:
#################################################
#          TEST DES PARAMETRES
################################################


# 1. Define the model and optimizer and train
# --------------------------------------------------

model = nn.Sequential(
    nn.Linear(768, 28),  # Assuming 768 input features and 28 classes
    nn.ReLU(),  # Adding a ReLU activation function
    nn.Linear(28, 28),
    )  # Additional layer for complexity
'''    model = nn.Sequential(
    nn.Linear(768, 28),  # Assuming 768 input features and 28 classes
    #nn.LogSoftmax(dim=1)
    )  # LogSoftmax for multi-class classification'''

batch_size = 56
learning_rate=0.01
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
num_epochs = 1000

# 2. Train the model with the custom loss function final_eval
# -----------------------------------------------------------
name = 'NN-28-28_Adam'+'_lr_'+str(learning_rate)+'_batch_size_'+str(batch_size)
print('\n\n Starting to train model', name)
model_trained, Y_pred_probs, Y_pred_tensor, final_score, macro_f1, inv_macro_tpr_gap, early_ending = train_NN_with_custom_loss(model,optim.Adam(model.parameters(), lr=learning_rate), batch_size, X_train_tensor, Y_train_tensor, S_train_tensor, X_test_tensor, Y_test_tensor, S_test_tensor)
#Res.loc[i]=[name,optimizer,learning_rate,batch_size, early_ending, final_score, macro_f1, inv_macro_tpr_gap]
save_Y_pred_tofile(X_test_true_tensor, model_trained,name)




 Starting to train model NN-28-28_Adam_lr_0.01batch_size_56
Epoch 1, Loss: 0.771858811378479, Final Score Train: 0.688298225402832, Final Score Test: 0.6836358904838562, macro F1 Train: 0.4329074591321616, macro F1 Test: 0.42239134838052556, 1-TPR Gap Train: 0.9436889886856079, 1-TPR Gap Test: 0.9448804259300232
Epoch 10, Loss: 0.8224817514419556, Final Score Train: 0.7283227443695068, Final Score Test: 0.7060960531234741, macro F1 Train: 0.5039018027253503, macro F1 Test: 0.4758285708485192, 1-TPR Gap Train: 0.9527437090873718, 1-TPR Gap Test: 0.9363635182380676
Arrêt précoce après 16 époques
Final Evaluation Score: 0.7095040082931519 Macro F1: 0.47337439777047774 1-TPR_gap: 0.9456336498260498


(tensor([18, 21, 18,  ..., 22,  2, 19]),
 tensor([[ -39.2941,   33.9956,    5.6496,  ...,  -77.1098,  -20.6030,
           -29.1662],
         [-131.5958,  -76.7651,  -32.4897,  ...,    9.1425,  -46.1435,
           -14.5496],
         [ -34.3485,    8.2786,  -46.7784,  ...,  -52.5115,    0.5027,
           -14.1258],
         ...,
         [ -78.5277, -132.9139,  -11.4870,  ...,  -29.2087,  -31.6271,
           -21.0487],
         [  56.9629,  -15.3802,  177.5195,  ...,  -50.6951,  -32.4422,
           -26.2471],
         [   2.3273,  -37.0764,    5.4587,  ...,   12.8139,  -65.0023,
           -11.5120]], grad_fn=<AddmmBackward0>))

In [48]:
#################################################
#          BOUCLE HYPERPARAMETRES
################################################


# 1. Define the model and optimizer and train
# --------------------------------------------------

model = nn.Sequential(
    nn.Linear(768, 28),  # Assuming 768 input features and 28 classes
    nn.ReLU(),  # Adding a ReLU activation function
    nn.Linear(28, 28),  # Additional layer for complexity
    #nn.LogSoftmax(dim=1))  # LogSoftmax for multi-class classification
    )

optimizer_dict = {'Momentum' : optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9),
                'NAG': optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True),
                'Adam': optim.Adam(model.parameters(), lr=learning_rate),
                'Adagrad': optim.Adagrad(model.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10),
                 'SGD': optim.SGD(model.parameters(), lr=learning_rate)
                }
lr_list = [0.1, 0.05, 0.01, 0.005, 0.001]
batch_size_list = [28,56,128,256,1024]
num_epochs = 10000 

# 2. Train the model with the custom loss function final_eval
# -----------------------------------------------------------
Res=pd.DataFrame(columns=['model','optimizer','lr','batch_size','early_ending', 'final_score','macro_f1','macro_tpr_gap'])
i=0
for opt_name, optimizer in optimizer_dict.items():
    for learning_rate in lr_list:
        for batch_size in batch_size_list:
            name = 'NN-28-28_Adam'+'_lr_'+str(learning_rate)+'_batch_size_'+str(batch_size)
            print('\n\n Starting to train model', name)
            model_trained, Y_pred_probs, Y_pred_tensor, final_score, macro_f1, inv_macro_tpr_gap , early_ending= train_NN_with_custom_loss(model, optimizer, batch_size, X_train_tensor, Y_train_tensor, S_train_tensor, X_test_tensor, Y_test_tensor, S_test_tensor)
            Res.loc[i]=[name,optimizer,learning_rate,batch_size,early_ending,final_score, macro_f1, inv_macro_tpr_gap]
            save_Y_pred_tofile(X_test_true_tensor, model_trained,name)
        i+=1




 Starting to train model NN-28-28_Adam_lr_0.1_batch_size_28
Epoch 1, Loss: 0.8861488103866577, Final Score Train: 0.6009126901626587, Final Score Test: 0.59725421667099, macro F1 Train: 0.2493369680682859, macro F1 Test: 0.24110996910830043, 1-TPR Gap Train: 0.952488362789154, 1-TPR Gap Test: 0.9533984661102295


In [None]:
#ENTRAINEMENT SUR TOUT LE MODELE

# 2. Train the model with the custom loss function final_eval
# -----------------------------------------------------------
Res=pd.DataFrame(columns=['model','optimizer','lr','alpha','final_score','macro_f1','macro_tpr_gap'])
i=0
for opt_name, optimizer in optimizer_dict.items():
    for learning_rate in lr_list:
        for i in range(1,10):
            alpha=i
            name = 'all'+opt_name+'_lr_'+str(learning_rate)+'_alpha_'+str(i)
            print('\n\n Starting to train model', name)
            model_trained, Y_pred_probs, Y_pred_tensor, final_score, macro_f1, inv_macro_tpr_gap = train_NN_with_custom_loss(model, optimizer, alpha, X_tensor, Y_tensor, S_tensor, X_test_tensor, Y_test_tensor, S_test_tensor)
            Res.loc[i]=[name,optimizer,learning_rate,alpha,final_score, macro_f1, inv_macro_tpr_gap]
            save_Y_pred_tofile(X_test_true_tensor, model_trained,name)
            i+=1

In [None]:
model = nn.Sequential(
    nn.Linear(768, 2048),  # Couche d'entrée à la première couche cachée
    nn.ReLU(),  # Fonction d'activation ReLU
    nn.Dropout(p=0.5),  # Dropout avec une probabilité de désactivation de 50%
    nn.Linear(2048, 512),  # De la première couche cachée à la deuxième couche cachée
    nn.ReLU(),  # Une autre fonction d'activation ReLU après la deuxième couche cachée
    nn.Dropout(p=0.5),  # Un autre dropout après la deuxième couche cachée
    nn.Linear(512, 28),  # De la deuxième couche cachée à la couche de sortie
    nn.LogSoftmax(dim=1)  # LogSoftmax pour la classification multiclasse
)

learning_rate = 0.01
num_epochs = 20000 

name = 'NN2048-512-28-dropout_Adam'+'_lr_'+str(learning_rate)+'_alpha_5'
print('\n\n Starting to train model', name)
model_trained, Y_pred_probs, Y_pred_tensor, final_score, macro_f1, inv_macro_tpr_gap = train_NN_with_custom_loss(model,optim.Adam(model.parameters(), lr=learning_rate) , 5, X_train_tensor, Y_train_tensor, S_train_tensor, X_test_tensor, Y_test_tensor, S_test_tensor)
Res.loc[i]=[name,optimizer,learning_rate,alpha,final_score, macro_f1, inv_macro_tpr_gap]
save_Y_pred_tofile(X_test_true_tensor, model_trained,name)

In [None]:
model = nn.Sequential(
    nn.Linear(768, 2048),  # Assuming 768 input features and 28 classes
    nn.ReLU(),  # Adding a ReLU activation function
    nn.Linear(2048,512),  # Assuming 768 input features and 28 classes
    nn.Linear(512,28),  
    nn.Linear(28, 28),  # Additional layer for complexity
    nn.LogSoftmax(dim=1))  # LogSoftmax for multi-class classification

learning_rate = 0.01
num_epochs = 20000 

name = 'NN2048-512-28_Adam'+'_lr_'+str(learning_rate)+'_alpha_5'
print('\n\n Starting to train model', name)
model_trained, Y_pred_probs, Y_pred_tensor, final_score, macro_f1, inv_macro_tpr_gap = train_NN_with_custom_loss(model,optim.Adam(model.parameters(), lr=learning_rate) , 5, X_train_tensor, Y_train_tensor, S_train_tensor, X_test_tensor, Y_test_tensor, S_test_tensor)
Res.loc[i]=[name,optimizer,learning_rate,alpha,final_score, macro_f1, inv_macro_tpr_gap]
save_Y_pred_tofile(X_test_true_tensor, model_trained,name)

In [None]:
path_pkl = ''

with open(path_pkl + 'RESULTS_11-03-2024.pkl', 'wb') as f:
   pickle.dump(Res, f)

#path_pkl = 'pkl_files/'
#train = pd.read_pickle(path_pkl + 'train_pp.pkl')

**2. REGRESSION WITH CUSTOM LOSS macro F1**
---

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score
import numpy as np

# Assuming model, optimizer, X_train_tensor, Y_train_one_hot, X_test_tensor, Y_test are already defined

# Convert Y_test to one-hot encoding if it's not already one-hot encoded
# This is necessary for consistency in our loss function calculations
Y_test_tensor = torch.tensor(Y_test.values, dtype=torch.int64) if isinstance(Y_test, pd.Series) else torch.from_numpy(Y_test).long()
Y_test_one_hot = torch.nn.functional.one_hot(Y_test_tensor, num_classes=28)


# Define the model using nn.Sequential
model = nn.Sequential(
    nn.Linear(768, 28),  # Assuming 768 input features and 28 classes
    nn.ReLU(),  # Adding a ReLU activation function
    nn.Linear(28, 28),  # Additional layer for complexity
    nn.LogSoftmax(dim=1)  # LogSoftmax for multi-class classification
)

# Define an optimizer
optimizer = optim.SGD(model.parameters(), lr=0.1)

num_epochs = 10000  # Example number of epochs

for epoch in range(num_epochs):
    optimizer.zero_grad()  # Zero the gradients
    
    # Forward pass on the training data
    outputs_train = model(X_train_tensor)
    loss_train = macro_soft_f1_loss(Y_train_one_hot.float(), outputs_train)
    
    # Backward pass and optimize
    loss_train.backward()
    optimizer.step()
    
    # No gradient computation needed for evaluation
    with torch.no_grad():
        model.eval()  # Set the model to evaluation mode
        
        # Forward pass on the validation data
        outputs_test = model(X_test_tensor)
        
        # Calculate the exact macro F1 score for both training and validation data
        f1_train = calculate_exact_macro_f1(Y_train_one_hot.float(), outputs_train)
        f1_test = calculate_exact_macro_f1(Y_test_one_hot.float(), outputs_test)
        
        model.train()  # Set the model back to training mode
    
    # Print loss and F1 score
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss_train.item():.4f}, macro F1 Train: {f1_train:.4f}, macro F1 Test: {f1_test:.4f}')

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
import torch

# Assuming model is already trained and X_test is a DataFrame

# Convert X_test to a PyTorch tensor
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)

# Make predictions
with torch.no_grad():  # We do not need gradient computation for prediction
    model.eval()  # Set the model to evaluation mode
    Y_pred_probs = model(X_test_tensor)
    Y_pred = torch.argmax(Y_pred_probs, dim=1)  # Get the class with the highest probability

# Convert Y_pred to a DataFrame
Y_pred_df = pd.DataFrame(Y_pred.numpy(), columns=['Predicted'])

# Evaluate Y_pred compared to Y_test (assuming Y_test is a numpy array or a pandas Series)
print(classification_report(Y_test, Y_pred_df['Predicted']))

# If you want to use the exact F1 score for evaluation, you can directly use it from sklearn.metrics
from sklearn.metrics import f1_score
print("Exact F1 Score (micro):", f1_score(Y_test, Y_pred_df['Predicted'],average = 'micro'))  # 'weighted' for multi-class
print("Exact F1 Score (macro):", f1_score(Y_test, Y_pred_df['Predicted'], average='macro'))  # 'weighted' for multi-class

# Returning Y_pred as a DataFrame makes sense for further analysis or submission
#return Y_pred_df

**CUSTON LOSS FUNCTION TRP GAP**
---

In [None]:
import torch

def gap_TPR(y_true, y_pred, protected_attribute):
    """
    Calculate the average TPR gap for each class across protected groups.
    
    Args:
    - y_true: Tensor of true labels, one-hot encoded.
    - y_pred: Tensor of predicted logits (before softmax).
    - protected_attribute: Tensor indicating group membership for each instance.
    
    Returns:
    - Average TPR gap across all classes.
    """
    # Apply softmax to get probabilities
    y_pred_probs = torch.softmax(y_pred, dim=1)
    
    # Convert one-hot labels to class indices for gathering
    y_true_indices = torch.argmax(y_true, dim=1)
    
    # Initialize TPR storage
    tpr_gaps = []
    
    # Iterate over each class
    num_classes = y_true.shape[1]
    for class_idx in range(num_classes):
        # Calculate TPR for the current class across all groups
        tpr_list = []
        
        # Calculate overall TPR for the current class
        overall_mask = y_true_indices == class_idx
        overall_tpr = torch.sum((y_pred_probs[:, class_idx] > 0.5) & overall_mask).float() / torch.sum(overall_mask).float()
        
        # Calculate TPR for each protected group
        for group_val in protected_attribute.unique():
            group_mask = (protected_attribute == group_val) & overall_mask
            group_tpr = torch.sum((y_pred_probs[:, class_idx] > 0.5) & group_mask).float() / torch.sum(group_mask).float()
            tpr_list.append(group_tpr)
        
        # Calculate TPR gap for the current class and store it
        tpr_gaps.append(torch.abs(torch.tensor(tpr_list) - overall_tpr))
    
    # Calculate the average TPR gap across all classes
    avg_tpr_gap = torch.mean(torch.stack(tpr_gaps))
    
    return avg_tpr_gap

In [None]:
print(type(Y_test),Y_test.shape)
print(type(Y_pred_probs),Y_pred_probs.shape)
get_macro_tpr_gap(Y_test,Y_pred_probs,S_test)

<class 'pandas.core.series.Series'> (5550,)
<class 'torch.Tensor'> torch.Size([5550, 28])


TypeError: argmax(): argument 'input' (position 1) must be Tensor, not Series