In [57]:
import os

In [67]:
import pandas as pd

class ResultsReader:
    DROP_COLUMNS = ["time", 'bias mit ML method', "reps"]
    REPS = "reps"
    
    ID = "id"
    
    DATASET = "data"
    ML = "ML method"
    ATTR = "sensitive attrs"
    BIAS_MIT = "bias mitigation"
    OTHER = "other"
    FILTERABLE = [DATASET, ML, ATTR, BIAS_MIT, OTHER]
    
    VAR_PREFIX = "VAR|"  
    FYP_VAE = "FYP VAE"
    BASE = "No Bias Mitigation"
    def __init__(self, file_path):
        self.file_path = file_path
        self.df = None
        self.metrics = []
        
        
        self.read_csv()
        self.filters = {}
        self.columns_shown = self.FILTERABLE + self.metrics

    def read_csv(self):
        try:
            df = pd.read_csv(self.file_path)
            print(f"File '{self.file_path}' successfully loaded as DataFrame.")
        except Exception as e:
            print(f"Error: Unable to open '{self.file_path}'", e)
            
        self.df = self._proccess_df(df)
            
    def _proccess_df(self, df):
        # remove time and reps columns, keep only reps = 1
        df = df[df[self.REPS] == 1]
        var_cols = df.filter(like=self.VAR_PREFIX, axis=1).columns.tolist()
        cols_to_drop = var_cols + self.DROP_COLUMNS
        df = df.drop(columns=cols_to_drop, axis=1)
    
        non_metric_cols = self.FILTERABLE + [self.ID]
        print
        self.metrics = [col for col in df.columns if col not in non_metric_cols]
        
        return df

    def change_other_to_losses(self):
        # edit the "other" col values to have just the lossed used"
        self.df[self.OTHER] = self.df[self.OTHER].apply(self._get_losses_used)
        
    def _get_losses_used(self, other):
        other = other.split("losses_used=[")[-1]
        other = other.split("]")[0]
        return other
    
    def get_all_column_values(self, column_name) -> list:
        return self.df[column_name].unique().tolist()
        
    def set_filter(self, column_name, values: list):
        if column_name not in self.FILTERABLE:
            raise RuntimeError("invalid filter column name", column_name)
        self.filters[column_name] = values
        
    def clear_filters(self):
        self.filters = {}

    def set_columns_shown(self, metrics = None, other_columns_shown = None):
        if metrics is None:
            metrics = self.metrics
        if other_columns_shown is None:
            other_columns_shown = self.FILTERABLE
        self.columns_shown = other_columns_shown + metrics
        
    def get_filtered_df(self):
        """FILTERED,
        returns all columns based on the row filters"""
        df = self.df.copy()
        
        for col_name, allowed_vals in self.filters.items():
            df = df[df[col_name].isin(allowed_vals)]
            
        return df
    
    def get_filtered_metrics(self):
        """ FILTERED, SELECTED COLUMNS,
        returns selected columns based on the row filters.
        by default returns all filterable columns and all metric columns."""
        return self.get_filtered_df()[self.columns_shown]
        
    def get_relative_metrics(self, base = None):
        """ FILTERED, SELECTED COLUMNS, RELATIVE
        returns metric values with respect to the given "base" bias mitigation method of the same experiment.
        returns selected columns based on the row filters.
        by default returns all filterable columns and all metric columns.
        """
        if base is None:
            base = self.BASE
        return self._get_relative_metrics(base)[self.columns_shown]
    
    def get_mean_metrics(self, merge_all_others = True):
        """ FILTERED, SELECTED COLUMNS, MEAN
        returns metric values averaged over all runs of the experiment with the same config.
        returns selected columns based on the row filters.
        by default returns all filterable columns and all metric columns.
        """
        if not merge_all_others:
            raise NotImplementedError()
        
        df = self.get_filtered_df()
        # take mean over sema config aka "other col value"
        return self._get_mean_metrics(df)[self.columns_shown]
    
    def get_mean_relative_metrics(self, base = None, merge_all_others = True):
        """ FILTERED, SELECTED COLUMNS, MEAN, RELATIVE
        returns metric values with respect to the given "base" bias mitigation method of the same experiment.
        returns metric values averaged over all runs of the experiment with the same config.
        returns selected columns based on the row filters.
        by default returns all filterable columns and all metric columns.
        """
        if base is None:
            base = self.BASE
        if not merge_all_others:
            raise NotImplementedError()
        
        df = self._get_relative_metrics(base)
        # take mean over sema config aka "other col value"
        return self._get_mean_metrics(df)[self.columns_shown]
    
    def _get_mean_metrics(self, df):
        mean_df = df.groupby(self.FILTERABLE).mean().reset_index()
        return mean_df

    def _get_relative_metrics(self, base):
        df = self.get_filtered_df().reset_index()
        
        for index, row in df.iterrows():
            base_row = self.df[
                (self.df[self.ID] == row[self.ID]) & (self.df[self.DATASET] == row[self.DATASET]) & 
                (self.df[self.ML] == row[self.ML]) & (self.df[self.ATTR] == row[self.ATTR]) & 
                (self.df[self.BIAS_MIT] == base)
                ]
            for metric in self.metrics:
                df.loc[index, metric] = row[metric] - base_row[metric].values[0]

        return df
    
    
    
    
            
        


In [68]:
results_filename = "before_after_mask_proc_sex"
results_file = os.path.join("results",results_filename +".csv")


reader = ResultsReader(results_file)

for col in ResultsReader.FILTERABLE:
    print(reader.get_all_column_values(col))


File 'results/before_after_mask_proc_sex.csv' successfully loaded as DataFrame.
['Compas Dataset', 'Adult Dataset']
['MLPClassifier', 'NaiveBayes']
["['sex']"]
['No Bias Mitigation', 'FairMask Bias Mitigation', 'FYP VAE', 'Reweighing Bias Mitigation', 'FairBalance Bias Mitigation']
['{}', '{\'my model config\': "VAEMaskConfig(epochs=1250, latent_dim=10, mask_values=None vae_layers=(75, 60, 30, 15), lr=0.011, \\nlosses_used=[\'Latent sens ADV loss\', \'Sensitive KL loss\', \'Reconstruction loss\', \'KL divergence loss\'])Latent sens ADV loss: {\'weight\': 0.11, \'lr\': 0.05, \'optimizer\': \'Adam\', \'layers\': (30, 30), \'input_dim\': 9}, Sensitive KL loss: {\'weight\': 9000, \'sens_col_ids\': [7]}, Reconstruction loss: {\'weight\': 15}, KL divergence loss: {\'weight\': 0.005}"}', '{\'my model config\': "VAEMaskConfig(epochs=1250, latent_dim=25, mask_values=None vae_layers=(75, 60, 30, 15), lr=0.011, \\nlosses_used=[\'Latent sens ADV loss\', \'Sensitive KL loss\', \'Reconstruction loss

In [69]:
reader.clear_filters()
reader.set_filter(ResultsReader.DATASET, ['Adult Dataset'])
#reader.set_filter(ResultsReader.BIAS_MIT, [ResultsReader.BASE])
reader.change_other_to_losses()
reader.get_mean_relative_metrics()

Unnamed: 0,data,ML method,sensitive attrs,bias mitigation,other,accuracy,precision,recall,sex|[AOD] Average Odds Difference,sex|[EOD] Equal Opportunity Difference,sex|[SPD] Statistical Parity Difference,sex|[DI_FM] Disparate Impact the way it was implemented in FairMask,[SF] Statistical Parity Subgroup Fairness,[DF] Differential Fairness
0,Adult Dataset,MLPClassifier,['sex'],FYP VAE,"'Flipped ADV loss', 'Pos Y vec loss', 'Reconst...",-0.045041,0.017229,-0.323327,-0.024629,-0.003472,-0.11779,-0.622256,-0.025981,-0.299207
1,Adult Dataset,MLPClassifier,['sex'],FYP VAE,"'Latent sens ADV loss', 'Pos Y vec loss', 'Rec...",-0.054836,-0.045824,-0.314726,0.015325,0.079055,-0.164788,-1.970416,-0.036374,-0.979111
2,Adult Dataset,MLPClassifier,['sex'],FYP VAE,"'Latent sens ADV loss', 'Sensitive KL loss', '...",-0.061038,-0.034768,-0.372349,0.011711,0.082347,-0.166024,-1.835265,-0.036811,-0.91302
3,Adult Dataset,MLPClassifier,['sex'],FYP VAE,"'Pos Y vec loss', 'Reconstruction loss', 'KL d...",-0.046607,-0.005212,-0.292876,-0.033818,-0.013126,-0.092108,-0.234161,-0.02052,-0.115762
4,Adult Dataset,MLPClassifier,['sex'],FYP VAE,"'Reconstruction loss', 'KL divergence loss'",-0.050936,-0.065894,-0.259652,-0.046874,-0.017316,-0.122984,-1.448982,-0.027223,-0.637884
5,Adult Dataset,MLPClassifier,['sex'],FYP VAE,"'Sensitive KL loss', 'Pos Y vec loss', 'Recons...",-0.051428,-0.021212,-0.321838,-0.038044,0.006543,-0.155676,-1.735319,-0.034593,-0.798024
6,Adult Dataset,MLPClassifier,['sex'],FairMask Bias Mitigation,{},-0.001448,-0.000291,-0.00952,0.029048,0.051016,0.008341,0.343297,0.001848,0.103926
7,Adult Dataset,MLPClassifier,['sex'],No Bias Mitigation,{},0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Adult Dataset,NaiveBayes,['sex'],FYP VAE,"'Flipped ADV loss', 'Pos Y vec loss', 'Reconst...",-0.023826,-0.07496,-0.274435,-0.131085,-0.079221,-0.214482,-1.276637,-0.047223,-0.586131
9,Adult Dataset,NaiveBayes,['sex'],FYP VAE,"'Latent sens ADV loss', 'Pos Y vec loss', 'Rec...",-0.098741,-0.088886,-0.145375,-0.187195,-0.172146,-0.350615,-1.980398,-0.077387,-1.004156


In [61]:

"""
what do I need for experiment setup????
currently under the asumption that only 1 rep results are considered

- set filter based on: bias mits, datasets, sens attrs, ml method, others
- get all available: bias mits, datasets, sens attrs, ml method, others
- get all available metric col names:
- based on the set filters get all requested metric columns (default metrics)
    (print also all filter columns?)
    
- based on the set filters get change in all requested columns compared to base (give base name)

MEAN OR ALL?????


"""

'\nwhat do I need for experiment setup????\ncurrently under the asumption that only 1 rep results are considered\n\n- set filter based on: bias mits, datasets, sens attrs, ml method, others\n- get all available: bias mits, datasets, sens attrs, ml method, others\n- get all available metric col names:\n- based on the set filters get all requested metric columns (default metrics)\n    (print also all filter columns?)\n    \n- based on the set filters get change in all requested columns compared to base (give base name)\n\nMEAN OR ALL?????\n\n\n'