# Results of Frisch et al. co-clustering
Dataset with same matching row id = user id and col id = item id

In [1]:
import os
import pickle
import pandas as pd
import numpy as np
import re

from sklearn.cluster import KMeans
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics.cluster import adjusted_mutual_info_score
from sklearn.metrics.cluster import normalized_mutual_info_score

import sys
sys.path.append('./fair_taucc/tauCC/src')
from fairness_metrics import balance_gen, balance_chierichetti, KL_fairness_error


root = os.getcwd()
root

'./fair_taucc/algorithms/C-Fairness-RecSys/reproducibility_study/Frisch_et_al'

In [2]:
# Dataset

DATASET = "movielens-1m"
SENSITIVE = "age"
TRUE_LABEL = "age"
TRUE_LABEL_DIM = "rows"

"""
DATASET = "yelp"
SENSITIVE = "gender"
TRUE_LABEL = "restaurant_type"
TRUE_LABEL_DIM = "cols"
"""

"""
DATASET = "amazon"
SENSITIVE = "gender"
TRUE_LABEL = "preferred_words_by_category"
TRUE_LABEL_DIM = "cols"
"""

"""
DATASET = "lfw"
SENSITIVE = "gender"
TRUE_LABEL = "person_ids" #true labels per calcolare ARI, AMI, NMI
TRUE_LABEL_DIM = "rows"
"""

if "movielens" in DATASET:
    dataset_path = f"./fair_taucc/datasets/movielens/{DATASET}"
else:
    dataset_path = f"./fair_taucc/datasets/{DATASET}"
    
Sx = np.load(dataset_path + f"/{SENSITIVE}.npy", allow_pickle=True).astype(int)

if TRUE_LABEL != " ":
    true_labels = np.load(dataset_path + f"/{TRUE_LABEL}.npy", allow_pickle=True).astype(int)
    if true_labels.ndim != 1:
        true_labels = true_labels.reshape(-1)

# Frisch et al. results
result_path = root + f"/results/{DATASET}/{SENSITIVE}/lbm_fair"
baseline_path = root + f"/results/{DATASET}/{SENSITIVE}/lbm_baseline"

In [3]:
def get_pkl_files_with_os(directory):
    absolute_path = []
    filenames = []
    for file in os.listdir(directory):
        if file.endswith(".pkl"):
            filenames.append(file)
            absolute_path.append(os.path.join(directory, file))
    return absolute_path, filenames

In [4]:
baseline_path

'./fair_taucc/algorithms/C-Fairness-RecSys/reproducibility_study/Frisch_et_al/results/movielens-1m/age/lbm_baseline'

# Best run of Vanilla LBM

### Results of LBM baseline

In [5]:
# Find the best run of Vanilla LBM
pkl_abspath, pkl_filenames = get_pkl_files_with_os(baseline_path)
exec_times = pd.read_csv(baseline_path + "/time.csv", sep=",")["time"].to_numpy()
total_runs = len(pkl_filenames)

if not os.path.exists(baseline_path + "/results.csv"):
    with open(baseline_path + "/results.csv", "a") as file:
        file.write(f"run;row_clus;col_clus;NLL;NMI_true_labels;AMI_true_labels;ARI_true_labels;balance_chierichetti;balance_bera;KL_fairness_error;time\n")


print("***LBM Ordinal***")
for filename, path in zip(pkl_filenames, pkl_abspath):
    match = re.search(r"run_(\d+)", filename)
    if match:
        run = int(match.group(1))
        print("run: ", run)
    else:
        raise ValueError(f"Run not found in {filename}")
        
    with open(path, "rb") as f:
        data = pickle.load(f)
        
    if "nll" in data:
        nll = data["nll"]
    else:
        nll = None
    K_rows = data["nq"]
    K_cols = data["nl"]
    
    print("Run Kmeans on tau1...")
    kmeans_rows = KMeans(n_clusters=K_rows).fit(data["model"]["tau_1"])
    print("Kmeans on tau1 terminated.")
    print("Run Kmeans on tau2...")
    kmeans_cols = KMeans(n_clusters=K_cols).fit(data["model"]["tau_2"])
    print("Kmeans on tau2 terminated.")
    
    row_labels = kmeans_rows.labels_
    col_labels = kmeans_cols.labels_
    
    np.save(baseline_path + f"/run_{run}_row_clustering.npy", row_labels)
    np.save(baseline_path + f"/run_{run}_col_clustering.npy", col_labels)
    
    bera = balance_gen(Sx, row_labels)
    chierichetti = balance_chierichetti(Sx, row_labels)
    kl_error = KL_fairness_error(row_labels, K_rows, Sx)
    
    time = exec_times[run-1]
    
    if TRUE_LABEL_DIM == "cols":
        NMI_true_labels = normalized_mutual_info_score(true_labels, col_labels)
        AMI_true_labels = adjusted_mutual_info_score(true_labels, col_labels)
        ARI_true_labels = adjusted_rand_score(true_labels, col_labels)
    else:
        NMI_true_labels = normalized_mutual_info_score(true_labels, row_labels)
        AMI_true_labels = adjusted_mutual_info_score(true_labels, row_labels)
        ARI_true_labels = adjusted_rand_score(true_labels, row_labels)
    
    with open(baseline_path + "/results.csv", "a") as file:
        file.write(f"{run};{K_rows};{K_cols};{nll};{NMI_true_labels};{AMI_true_labels};{ARI_true_labels};{chierichetti};{bera};{kl_error};{time}\n")
    
    

***LBM Ordinal***
run:  2
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  10
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  8
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  6
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  9
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  1
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  4
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  3
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  7
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  5
Run Kmeans on tau1.

### Best run of LBM baselin

In [6]:
df_vanilla = pd.read_csv(baseline_path + "/results.csv", sep=";")
df_vanilla

Unnamed: 0,run,row_clus,col_clus,NLL,NMI_true_labels,AMI_true_labels,ARI_true_labels,balance_chierichetti,balance_bera,KL_fairness_error,time
0,2,25,25,,0.009704,0.007656,-0.006828,0.022222,0.106077,0.890538,9119.704376
1,10,25,25,,0.009856,0.007784,-0.00696,0.026667,0.158505,0.893785,8808.395654
2,8,25,25,,0.008606,0.006609,-0.004863,0.038462,0.218888,0.708673,8811.531039
3,6,25,25,,0.009004,0.006942,-0.007114,0.046875,0.243352,0.797382,9108.066045
4,9,25,25,,0.008061,0.006024,-0.004719,0.076923,0.40743,0.642087,8886.033014
5,1,25,25,,0.008016,0.005999,-0.004848,0.071429,0.386634,0.603396,9046.431015
6,4,25,25,,0.008913,0.006875,-0.003952,0.053333,0.299782,0.804723,9057.284409
7,3,25,25,,0.008153,0.006144,-0.003041,0.097087,0.475516,0.441249,9107.657846
8,7,25,25,,0.008524,0.006509,-0.00456,0.060976,0.328332,0.615059,8958.898691
9,5,25,25,,0.008164,0.006104,-0.006789,0.05,0.250726,0.783166,8930.930307


In [7]:
# Best run
if df_vanilla["NLL"].tolist()[0] == "None":
    id_row = df_vanilla["NMI_true_labels"].argmax()
    print("NLL None")
else:
    id_row = df_vanilla["NLL"].argmin()
    print("NLL Not None")

best_run_row = df_vanilla.iloc[id_row]
best_run = int(best_run_row["run"])
print("best run of LBM baseline: ", best_run)

NLL None
best run of LBM baseline:  10


In [8]:
mean_vanilla = df_vanilla.groupby(["row_clus", "col_clus"]).mean().drop(["run"],axis=1)
mean_vanilla

Unnamed: 0_level_0,Unnamed: 1_level_0,NMI_true_labels,AMI_true_labels,ARI_true_labels,balance_chierichetti,balance_bera,KL_fairness_error,time
row_clus,col_clus,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
25,25,0.0087,0.006664,-0.005368,0.054397,0.287524,0.718006,8983.493239


In [9]:
var_vanilla = df_vanilla.groupby(["row_clus", "col_clus"]).var().drop(["run"],axis=1)
var_vanilla

Unnamed: 0_level_0,Unnamed: 1_level_0,NMI_true_labels,AMI_true_labels,ARI_true_labels,balance_chierichetti,balance_bera,KL_fairness_error,time
row_clus,col_clus,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
25,25,4.427937e-07,4.247434e-07,2e-06,0.000533,0.013216,0.020546,14665.713284


In [10]:
std_vanilla = df_vanilla.groupby(["row_clus", "col_clus"]).std().drop(["run"],axis=1)
std_vanilla

Unnamed: 0_level_0,Unnamed: 1_level_0,NMI_true_labels,AMI_true_labels,ARI_true_labels,balance_chierichetti,balance_bera,KL_fairness_error,time
row_clus,col_clus,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
25,25,0.000665,0.000652,0.001443,0.02309,0.11496,0.143338,121.102078


In [11]:
aggregated_vanilla = pd.DataFrame()

for key in mean_vanilla.keys():
    aggregated_vanilla[f"{key}_mean"] = mean_vanilla[key].values

for key in std_vanilla.keys():
    aggregated_vanilla[f"{key}_std"] = std_vanilla[key].values
    
for key in var_vanilla.keys():
    aggregated_vanilla[f"{key}_var"] = var_vanilla[key].values
    
aggregated_vanilla

Unnamed: 0,NMI_true_labels_mean,AMI_true_labels_mean,ARI_true_labels_mean,balance_chierichetti_mean,balance_bera_mean,KL_fairness_error_mean,time_mean,NMI_true_labels_std,AMI_true_labels_std,ARI_true_labels_std,...,balance_bera_std,KL_fairness_error_std,time_std,NMI_true_labels_var,AMI_true_labels_var,ARI_true_labels_var,balance_chierichetti_var,balance_bera_var,KL_fairness_error_var,time_var
0,0.0087,0.006664,-0.005368,0.054397,0.287524,0.718006,8983.493239,0.000665,0.000652,0.001443,...,0.11496,0.143338,121.102078,4.427937e-07,4.247434e-07,2e-06,0.000533,0.013216,0.020546,14665.713284


In [12]:
aggregated_vanilla.to_csv(baseline_path + "/aggregated.csv", index=False)

# Parity LBM vs LBM baseline

In [13]:
result_path

'./fair_taucc/algorithms/C-Fairness-RecSys/reproducibility_study/Frisch_et_al/results/movielens-1m/age/lbm_fair'

In [14]:
baseline_row_labels = np.load(baseline_path + f"/run_{best_run}_row_clustering.npy")
baseline_col_labels = np.load(baseline_path + f"/run_{best_run}_col_clustering.npy")

pkl_abspath, pkl_filenames = get_pkl_files_with_os(result_path)
exec_times = pd.read_csv(result_path + "/time.csv", sep=",")["time"].to_numpy()
total_runs = len(pkl_filenames)

if not os.path.exists(result_path + "/results.csv"):
    with open(result_path + "/results.csv", "a") as file:
        file.write(f"run;row_clus;col_clus;NLL;NMI_true_labels;AMI_true_labels;ARI_true_labels;NMI_rows;AMI_rows;ARI_rows;NMI_cols;AMI_cols;ARI_cols;balance_chierichetti;balance_bera;KL_fairness_error;time\n")

print("***Parity LBM***")

for filename, path in zip(pkl_filenames, pkl_abspath):

    match = re.search(r"run_(\d+)", filename)
    if match:
        run = int(match.group(1))
        print("run: ", run)
    else:
        raise ValueError(f"Run not found in {filename}")
        
    with open(path, "rb") as f:
        data = pickle.load(f)
    
    #nll = data["nll"]
    nll = None
    K_rows = data["nq"]
    K_cols = data["nl"]
    
    print("Run Kmeans on tau1...")
    kmeans_rows = KMeans(n_clusters=K_rows).fit(data["model"]["tau_1"])
    print("Kmeans on tau1 terminated.")
    print("Run Kmeans on tau2...")
    kmeans_cols = KMeans(n_clusters=K_cols).fit(data["model"]["tau_2"])
    print("Kmeans on tau2 terminated.")
    
    row_labels = kmeans_rows.labels_
    col_labels = kmeans_cols.labels_
    
    np.save(result_path + f"/run_{run}_row_clustering.npy", row_labels)
    np.save(result_path + f"/run_{run}_col_clustering.npy", col_labels)
    
    bera = balance_gen(Sx, row_labels)
    chierichetti = balance_chierichetti(Sx, row_labels)
    kl_error = KL_fairness_error(row_labels, K_rows, Sx)
    
    time = exec_times[run-1]
    
    if TRUE_LABEL_DIM == "cols":
        NMI_true_labels = normalized_mutual_info_score(true_labels, col_labels)
        AMI_true_labels = adjusted_mutual_info_score(true_labels, col_labels)
        ARI_true_labels = adjusted_rand_score(true_labels, col_labels)
    else:
        NMI_true_labels = normalized_mutual_info_score(true_labels, row_labels)
        AMI_true_labels = adjusted_mutual_info_score(true_labels, row_labels)
        ARI_true_labels = adjusted_rand_score(true_labels, row_labels)
    
    NMI_rows = normalized_mutual_info_score(baseline_row_labels, row_labels)
    AMI_rows = adjusted_mutual_info_score(baseline_row_labels, row_labels)
    ARI_rows = adjusted_rand_score(baseline_row_labels, row_labels)
    
    NMI_cols = normalized_mutual_info_score(baseline_col_labels, col_labels)
    AMI_cols = adjusted_mutual_info_score(baseline_col_labels, col_labels)
    ARI_cols = adjusted_rand_score(baseline_col_labels, col_labels)
        
    with open(result_path + "/results.csv", "a") as file:
        file.write(f"{run};{K_rows};{K_cols};{nll};{NMI_true_labels};{AMI_true_labels};{ARI_true_labels};{NMI_rows};{AMI_rows};{ARI_rows};{NMI_cols};{AMI_cols};{ARI_cols};{chierichetti};{bera};{kl_error};{time}\n")
    

***Parity LBM***
run:  3
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  6
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  10
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  8
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  2
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  1
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  4
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  5
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  7
Run Kmeans on tau1...
Kmeans on tau1 terminated.
Run Kmeans on tau2...
Kmeans on tau2 terminated.
run:  9
Run Kmeans on tau1..

In [15]:
df_fair = pd.read_csv(result_path + "/results.csv", sep=";")
df_fair

Unnamed: 0,run,row_clus,col_clus,NLL,NMI_true_labels,AMI_true_labels,ARI_true_labels,NMI_rows,AMI_rows,ARI_rows,NMI_cols,AMI_cols,ARI_cols,balance_chierichetti,balance_bera,KL_fairness_error,time
0,3,25,25,,0.00436,0.001981,-0.007024,0.052568,0.03561,0.030259,0.136182,0.112017,0.238769,0,0.0,inf,9835.946497
1,6,25,25,,0.007226,0.004141,-0.021022,0.067568,0.051993,0.053237,0.131038,0.102377,0.153334,0,0.0,inf,9969.189223
2,10,25,25,,0.005122,0.002517,-0.010448,0.059084,0.042133,0.040152,0.127677,0.104469,0.212915,0,0.0,inf,7943.385755
3,8,25,25,,0.005273,0.002537,-0.014892,0.063409,0.048379,0.055897,0.135723,0.108735,0.14706,0,0.0,inf,9659.374012
4,2,25,25,,0.00344,0.000975,-0.007007,0.057066,0.040725,0.03389,0.132107,0.108667,0.213046,0,0.0,inf,9803.633262
5,1,25,25,,0.00627,0.003742,-0.011779,0.064353,0.047655,0.043304,0.132974,0.108002,0.228028,0,0.0,inf,9872.444815
6,4,25,25,,0.007674,0.004845,-0.021354,0.068465,0.052221,0.049802,0.129703,0.104154,0.222758,0,0.0,inf,9889.571162
7,5,25,25,,0.006521,0.003417,-0.018614,0.06701,0.051111,0.043493,0.110047,0.082367,0.080999,0,0.0,inf,9988.145977
8,7,25,25,,0.004208,0.001887,-0.006858,0.052989,0.035918,0.030351,0.137821,0.112614,0.244205,0,0.0,inf,9703.260414
9,9,25,25,,0.005758,0.003293,-0.008806,0.054472,0.037398,0.034619,0.127643,0.105443,0.207506,0,0.0,inf,9633.917366


In [16]:
df_fair.drop(["run"], axis=1, inplace=True)
df_fair

Unnamed: 0,row_clus,col_clus,NLL,NMI_true_labels,AMI_true_labels,ARI_true_labels,NMI_rows,AMI_rows,ARI_rows,NMI_cols,AMI_cols,ARI_cols,balance_chierichetti,balance_bera,KL_fairness_error,time
0,25,25,,0.00436,0.001981,-0.007024,0.052568,0.03561,0.030259,0.136182,0.112017,0.238769,0,0.0,inf,9835.946497
1,25,25,,0.007226,0.004141,-0.021022,0.067568,0.051993,0.053237,0.131038,0.102377,0.153334,0,0.0,inf,9969.189223
2,25,25,,0.005122,0.002517,-0.010448,0.059084,0.042133,0.040152,0.127677,0.104469,0.212915,0,0.0,inf,7943.385755
3,25,25,,0.005273,0.002537,-0.014892,0.063409,0.048379,0.055897,0.135723,0.108735,0.14706,0,0.0,inf,9659.374012
4,25,25,,0.00344,0.000975,-0.007007,0.057066,0.040725,0.03389,0.132107,0.108667,0.213046,0,0.0,inf,9803.633262
5,25,25,,0.00627,0.003742,-0.011779,0.064353,0.047655,0.043304,0.132974,0.108002,0.228028,0,0.0,inf,9872.444815
6,25,25,,0.007674,0.004845,-0.021354,0.068465,0.052221,0.049802,0.129703,0.104154,0.222758,0,0.0,inf,9889.571162
7,25,25,,0.006521,0.003417,-0.018614,0.06701,0.051111,0.043493,0.110047,0.082367,0.080999,0,0.0,inf,9988.145977
8,25,25,,0.004208,0.001887,-0.006858,0.052989,0.035918,0.030351,0.137821,0.112614,0.244205,0,0.0,inf,9703.260414
9,25,25,,0.005758,0.003293,-0.008806,0.054472,0.037398,0.034619,0.127643,0.105443,0.207506,0,0.0,inf,9633.917366


In [17]:
mean_fair = df_fair.groupby(["row_clus", "col_clus"]).mean()
mean_fair

Unnamed: 0_level_0,Unnamed: 1_level_0,NMI_true_labels,AMI_true_labels,ARI_true_labels,NMI_rows,AMI_rows,ARI_rows,NMI_cols,AMI_cols,ARI_cols,balance_chierichetti,balance_bera,KL_fairness_error,time
row_clus,col_clus,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
25,25,0.005585,0.002934,-0.01278,0.060698,0.044314,0.0415,0.130091,0.104884,0.194862,0,0.0,inf,9629.886848


In [18]:
std_fair = df_fair.groupby(["row_clus", "col_clus"]).std()
std_fair

Unnamed: 0_level_0,Unnamed: 1_level_0,NMI_true_labels,AMI_true_labels,ARI_true_labels,NMI_rows,AMI_rows,ARI_rows,NMI_cols,AMI_cols,ARI_cols,balance_chierichetti,balance_bera,KL_fairness_error,time
row_clus,col_clus,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
25,25,0.001364,0.001169,0.005809,0.00622,0.006725,0.009303,0.007857,0.008588,0.051674,0.0,0.0,,604.882887


In [19]:
var_fair = df_fair.groupby(["row_clus", "col_clus"]).var()
var_fair

Unnamed: 0_level_0,Unnamed: 1_level_0,NMI_true_labels,AMI_true_labels,ARI_true_labels,NMI_rows,AMI_rows,ARI_rows,NMI_cols,AMI_cols,ARI_cols,balance_chierichetti,balance_bera,KL_fairness_error,time
row_clus,col_clus,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
25,25,2e-06,1e-06,3.4e-05,3.9e-05,4.5e-05,8.7e-05,6.2e-05,7.4e-05,0.00267,0,0.0,,365883.307203


In [20]:
aggregated = pd.DataFrame()
aggregated

In [21]:
for key in mean_fair.keys():
    aggregated[f"{key}_mean"] = mean_fair[key].values

for key in std_fair.keys():
    aggregated[f"{key}_std"] = std_fair[key].values
    
for key in var_fair.keys():
    aggregated[f"{key}_var"] = var_fair[key].values

In [22]:
aggregated

Unnamed: 0,NMI_true_labels_mean,AMI_true_labels_mean,ARI_true_labels_mean,NMI_rows_mean,AMI_rows_mean,ARI_rows_mean,NMI_cols_mean,AMI_cols_mean,ARI_cols_mean,balance_chierichetti_mean,...,NMI_rows_var,AMI_rows_var,ARI_rows_var,NMI_cols_var,AMI_cols_var,ARI_cols_var,balance_chierichetti_var,balance_bera_var,KL_fairness_error_var,time_var
0,0.005585,0.002934,-0.01278,0.060698,0.044314,0.0415,0.130091,0.104884,0.194862,0,...,3.9e-05,4.5e-05,8.7e-05,6.2e-05,7.4e-05,0.00267,0,0.0,,365883.307203


In [23]:
result_path

'./fair_taucc/algorithms/C-Fairness-RecSys/reproducibility_study/Frisch_et_al/results/movielens-1m/age/lbm_fair'

In [24]:
aggregated.to_csv(result_path + "/aggregated.csv", index=False)