In [278]:
from experiment_utils import *
import os

In [279]:
import pandas as pd

class ResultsReader:
    DROP_COLUMNS = ["time", 'bias mit ML method', "reps"]
    REPS = "reps"
    
    ID = "id"
    
    DATASET = "data"
    ML = "ML method"
    ATTR = "sensitive attrs"
    BIAS_MIT = "bias mitigation"
    OTHER = "other"
    FILTERABLE = [DATASET, ML, ATTR, BIAS_MIT, OTHER]
    
    VAR_PREFIX = "VAR|"  
    FYP_VAE = "FYP VAE"
    BASE = "No Bias Mitigation"
    def __init__(self, file_path):
        self.file_path = file_path
        self.df = None
        self.metrics = []
        
        
        self.read_csv()
        self.filters = {}
        self.columns_shown = self.FILTERABLE + self.metrics

    def read_csv(self):
        try:
            df = pd.read_csv(self.file_path)
            print(f"File '{self.file_path}' successfully loaded as DataFrame.")
        except Exception as e:
            print(f"Error: Unable to open '{self.file_path}'", e)
            
        self.df = self._proccess_df(df)
            
    def _proccess_df(self, df):
        # remove time and reps columns, keep only reps = 1
        df = df[df[self.REPS] == 1]
        var_cols = df.filter(like=self.VAR_PREFIX, axis=1).columns.tolist()
        cols_to_drop = var_cols + self.DROP_COLUMNS
        df = df.drop(columns=cols_to_drop, axis=1)
    
        non_metric_cols = self.FILTERABLE + [self.ID]
        print
        self.metrics = [col for col in df.columns if col not in non_metric_cols]
        
        return df

    def change_other_to_losses(self):
        # edit the "other" col values to have just the lossed used"
        self.df[self.OTHER] = self.df[self.OTHER].apply(self._get_losses_used)
        
    def _get_losses_used(self, other):
        other = other.split("losses_used=[")[-1]
        other = other.split("]")[0]
        return other
    
    def get_all_column_values(self, column_name) -> list:
        return self.df[column_name].unique().tolist()
        
    def set_filter(self, column_name, values: list):
        if column_name not in self.FILTERABLE:
            raise RuntimeError("invalid filter column name", column_name)
        self.filters[column_name] = values
        
    def clear_filters(self):
        self.filters = {}

    def set_columns_shown(self, metrics = None, other_columns_shown = None):
        if metrics is None:
            metrics = self.metrics
        if other_columns_shown is None:
            other_columns_shown = self.FILTERABLE
        self.columns_shown = other_columns_shown + metrics
        
    def get_filtered_df(self):
        """FILTERED,
        returns all columns based on the row filters"""
        df = self.df.copy()
        
        for col_name, allowed_vals in self.filters.items():
            df = df[df[col_name].isin(allowed_vals)]
            
        return df
    
    def get_filtered_metrics(self):
        """ FILTERED, SELECTED COLUMNS,
        returns selected columns based on the row filters.
        by default returns all filterable columns and all metric columns."""
        return self.get_filtered_df()[self.columns_shown]
        
    def get_relative_metrics(self, base = None):
        """ FILTERED, SELECTED COLUMNS, RELATIVE
        returns metric values with respect to the given "base" bias mitigation method of the same experiment.
        returns selected columns based on the row filters.
        by default returns all filterable columns and all metric columns.
        """
        if base is None:
            base = self.BASE
        return self._get_relative_metrics(base)[self.columns_shown]
    
    def get_mean_metrics(self, merge_all_others = True):
        """ FILTERED, SELECTED COLUMNS, MEAN
        returns metric values averaged over all runs of the experiment with the same config.
        returns selected columns based on the row filters.
        by default returns all filterable columns and all metric columns.
        """
        if not merge_all_others:
            raise NotImplementedError()
        
        df = self.get_filtered_df()
        # take mean over sema config aka "other col value"
        return self._get_mean_metrics(df)[self.columns_shown]
    
    def get_mean_relative_metrics(self, base = None, merge_all_others = True):
        """ FILTERED, SELECTED COLUMNS, MEAN, RELATIVE
        returns metric values with respect to the given "base" bias mitigation method of the same experiment.
        returns metric values averaged over all runs of the experiment with the same config.
        returns selected columns based on the row filters.
        by default returns all filterable columns and all metric columns.
        """
        if base is None:
            base = self.BASE
        if not merge_all_others:
            raise NotImplementedError()
        
        df = self._get_relative_metrics(base)
        # take mean over sema config aka "other col value"
        return self._get_mean_metrics(df)[self.columns_shown]
    
    def _get_mean_metrics(self, df):
        mean_df = df.groupby(self.FILTERABLE).mean().reset_index()
        return mean_df

    def _get_relative_metrics(self, base):
        df = self.get_filtered_df().reset_index()
        
        for index, row in df.iterrows():
            base_row = self.df[
                (self.df[self.ID] == row[self.ID]) & (self.df[self.DATASET] == row[self.DATASET]) & 
                (self.df[self.ML] == row[self.ML]) & (self.df[self.ATTR] == row[self.ATTR]) & 
                (self.df[self.BIAS_MIT] == base)
                ]
            for metric in self.metrics:
                df.loc[index, metric] = row[metric] - base_row[metric].values[0]

        return df
    
    

In [280]:
results_filename = "before_after_mask_proc_sex"
results_file = os.path.join("results",results_filename +".csv")


reader = ResultsReader(results_file)

for col in ResultsReader.FILTERABLE:
    print(reader.get_all_column_values(col))


Error: Unable to open 'results\before_after_mask_proc_sex.csv' [Errno 2] No such file or directory: 'results\\before_after_mask_proc_sex.csv'


UnboundLocalError: local variable 'df' referenced before assignment

In [None]:
reader.set_filter(ResultsReader.DATASET, ['Adult Dataset'])
#reader.change_other_to_losses()
reader.get_relative_metrics()

Unnamed: 0,data,ML method,sensitive attrs,bias mitigation,other,accuracy,precision,recall,sex|[AOD] Average Odds Difference,sex|[EOD] Equal Opportunity Difference,sex|[SPD] Statistical Parity Difference,sex|[DI_FM] Disparate Impact the way it was implemented in FairMask,[SF] Statistical Parity Subgroup Fairness,[DF] Differential Fairness
0,Adult Dataset,LogisticRegression,['sex'],Reweighing Bias Mitigation,{},-0.002149,0.00309,-0.019737,0.185604,0.31269,-0.094566,-4.513176,-0.021505,-1.04043
1,Adult Dataset,LogisticRegression,['sex'],FairBalance Bias Mitigation,{},-0.051888,-0.179171,0.210526,0.145096,0.288091,-0.026992,-4.990197,-0.006138,-1.255442
2,Adult Dataset,LogisticRegression,['sex'],FairMask Bias Mitigation,{},0.000307,-0.000316,0.002632,0.008557,0.016129,-0.002634,-0.445456,-0.000599,-0.065958
3,Adult Dataset,LogisticRegression,['sex'],No Bias Mitigation,{},0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Adult Dataset,LogisticRegression,['sex'],FYP VAE,"EOD no longer ABS! {'my model config': ""VAEMas...",-0.040835,-0.125562,-0.021053,0.345856,0.56482,-0.195333,-5.797631,-0.044421,-1.776367
5,Adult Dataset,LogisticRegression,['sex'],Reweighing Bias Mitigation,{},-0.006755,-0.000313,-0.045627,0.17142,0.274856,-0.100577,-3.340709,-0.021737,-0.936093
6,Adult Dataset,LogisticRegression,['sex'],FairBalance Bias Mitigation,{},-0.04759,-0.161988,0.21673,0.130655,0.244651,-0.029806,-3.644557,-0.006442,-1.088053
7,Adult Dataset,LogisticRegression,['sex'],FairMask Bias Mitigation,{},0.000307,-0.000894,0.003802,0.020006,0.037836,-0.00628,-0.597105,-0.001357,-0.115011
8,Adult Dataset,LogisticRegression,['sex'],No Bias Mitigation,{},0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Adult Dataset,LogisticRegression,['sex'],FYP VAE,"EOD no longer ABS! {'my model config': ""VAEMas...",-0.038686,-0.077441,-0.139417,0.151496,0.193694,-0.144477,-3.871962,-0.031224,-1.219103


In [None]:

"""
what do I need for experiment setup????
currently under the asumption that only 1 rep results are considered

- set filter based on: bias mits, datasets, sens attrs, ml method, others
- get all available: bias mits, datasets, sens attrs, ml method, others
- get all available metric col names:
- based on the set filters get all requested metric columns (default metrics)
    (print also all filter columns?)
    
- based on the set filters get change in all requested columns compared to base (give base name)

MEAN OR ALL?????


"""

'\nwhat do I need for experiment setup????\ncurrently under the asumption that only 1 rep results are considered\n\n- set filter based on: bias mits, datasets, sens attrs, ml method, others\n- get all available: bias mits, datasets, sens attrs, ml method, others\n- get all available metric col names:\n- based on the set filters get all requested metric columns (default metrics)\n    (print also all filter columns?)\n    \n- based on the set filters get change in all requested columns compared to base (give base name)\n\nMEAN OR ALL?????\n\n\n'