In [254]:
from experiment_utils import *
import os

In [255]:
import pandas as pd

class ResultsReader:
    DROP_COLUMNS = ["time", 'bias mit ML method', "reps"]
    REPS = "reps"
    
    ID = "id"
    
    DATASET = "data"
    ML = "ML method"
    ATTR = "sensitive attrs"
    BIAS_MIT = "bias mitigation"
    OTHER = "other"
    FILTERABLE = [DATASET, ML, ATTR, BIAS_MIT, OTHER]
    
    VAR_PREFIX = "VAR|"  
    FYP_VAE = "FYP VAE"
    BASE = "No Bias Mitigation"
    def __init__(self, file_path):
        self.file_path = file_path
        self.df = None
        self.metrics = []
        
        
        self.read_csv()
        self.filters = {}
        self.columns_shown = self.FILTERABLE + self.metrics

    def read_csv(self):
        try:
            df = pd.read_csv(self.file_path)
            print(f"File '{self.file_path}' successfully loaded as DataFrame.")
        except Exception as e:
            print(f"Error: Unable to open '{self.file_path}'", e)
            
        self.df = self._proccess_df(df)
            
    def _proccess_df(self, df):
        # remove time and reps columns, keep only reps = 1
        df = df[df[self.REPS] == 1]
        var_cols = df.filter(like=self.VAR_PREFIX, axis=1).columns.tolist()
        cols_to_drop = var_cols + self.DROP_COLUMNS
        df = df.drop(columns=cols_to_drop, axis=1)
    
        non_metric_cols = self.FILTERABLE + [self.ID]
        print
        self.metrics = [col for col in df.columns if col not in non_metric_cols]
        
        return df

    def change_other_to_losses(self):
        # edit the "other" col values to have just the lossed used"
        self.df[self.OTHER] = self.df[self.OTHER].apply(self._get_losses_used)
        
    def _get_losses_used(self, other):
        other = other.split("losses_used=[")[-1]
        other = other.split("]")[0]
        return other
    
    def get_all_column_values(self, column_name) -> list:
        return self.df[column_name].unique().tolist()
        
    def set_filter(self, column_name, values: list):
        if column_name not in self.FILTERABLE:
            raise RuntimeError("invalid filter column name", column_name)
        self.filters[column_name] = values
        
    def clear_filters(self):
        self.filters = {}
        
    def get_filtered_df(self):
        df = self.df.copy()
        
        for col_name, allowed_vals in self.filters.items():
            df = df[df[col_name].isin(allowed_vals)]
            
        return df
    
    def set_columns_shown(self, metrics = None, other_columns_shown = None):
        if metrics is None:
            metrics = self.metrics
        if other_columns_shown is None:
            other_columns_shown = self.FILTERABLE
        self.columns_shown = other_columns_shown + metrics
    
    def get_filtered_metrics(self):
        return self.get_filtered_df()[self.columns_shown]
        
    def get_relative_metrics(self, base = ResultsReader.BASE):
        return self._get_relative_metrics(base)[self.columns_shown]
    
    def get_mean_relative_metrics(self, base = ResultsReader.BASE, merge_all_others = True):
        if not merge_all_others:
            raise NotImplementedError()
        
        # take mean over sema config aka "other col value"
        pass # TODO git rm --cached */__pycache__/*
    
    def _get_relative_metrics(self, base):
        df = self.get_filtered_df().reset_index()
        
        for index, row in df.iterrows():
            base_row = self.df[(self.df[self.ID] == row[self.ID]) & (self.df[self.BIAS_MIT] == base)]
        
            for metric in self.metrics:
                df.loc[index, metric] = row[metric] - base_row[metric].values[0]

        return df
    
    
    
    
            
        


In [256]:
results_filename = "before_after_mask_proc_sex_new"
results_file = os.path.join("results",results_filename +".csv")


reader = ResultsReader(results_file)

for col in ResultsReader.FILTERABLE:
    print(reader.get_all_column_values(col))


File 'results\before_after_mask_proc_sex_new.csv' successfully loaded as DataFrame.
['Compas Dataset', 'Adult Dataset']
['LogisticRegression']
["['sex']"]
['Reweighing Bias Mitigation', 'FairBalance Bias Mitigation', 'FairMask Bias Mitigation', 'No Bias Mitigation', 'FYP VAE']
['{}', '{\'my model config\': "VAEMaskConfig(epochs=1250, latent_dim=10, mask_values=None vae_layers=(75, 60, 30, 15), lr=0.011, \\nlosses_used=[\'Sensitive KL loss\', \'Pos Y vec loss\', \'Reconstruction loss\', \'KL divergence loss\'])Sensitive KL loss: {\'weight\': 9000, \'sens_col_ids\': [7]}, Pos Y vec loss: {\'weight\': 1200000}, Reconstruction loss: {\'weight\': 15}, KL divergence loss: {\'weight\': 0.005}"}', '{\'my model config\': "VAEMaskConfig(epochs=1250, latent_dim=25, mask_values=None vae_layers=(75, 60, 30, 15), lr=0.011, \\nlosses_used=[\'Sensitive KL loss\', \'Pos Y vec loss\', \'Reconstruction loss\', \'KL divergence loss\'])Sensitive KL loss: {\'weight\': 9000, \'sens_col_ids\': [33]}, Pos Y ve

In [257]:
reader.set_filter(ResultsReader.BIAS_MIT, ['No Bias Mitigation'])
reader.change_other_to_losses()
reader.get_filtered_metrics(relative_to=ResultsReader.BASE)

Unnamed: 0,data,ML method,sensitive attrs,bias mitigation,other,accuracy,precision,recall,sex|[AOD] Average Odds Difference,sex|[EOD] Equal Opportunity Difference,sex|[SPD] Statistical Parity Difference,sex|[DI_FM] Disparate Impact the way it was implemented in FairMask,[SF] Statistical Parity Subgroup Fairness,[DF] Differential Fairness
0,Compas Dataset,LogisticRegression,['sex'],Reweighing Bias Mitigation,{},0.009736,0.010215,0.002513,-0.192918,-0.062023,-0.175973,-0.208389,-0.02653,-0.252389
1,Compas Dataset,LogisticRegression,['sex'],FairBalance Bias Mitigation,{},-0.001391,0.024474,-0.065327,-0.174391,0.011364,-0.217404,-0.272161,-0.032776,-0.318421
2,Compas Dataset,LogisticRegression,['sex'],FairMask Bias Mitigation,{},0.002782,0.009748,-0.017588,-0.139773,-0.079545,-0.120301,-0.12904,-0.018137,-0.163629
3,Adult Dataset,LogisticRegression,['sex'],Reweighing Bias Mitigation,{},-0.000921,0.009931,-0.022843,-0.095782,-0.075224,-0.088772,-2.207857,-0.019611,-0.745421
4,Adult Dataset,LogisticRegression,['sex'],FairBalance Bias Mitigation,{},-0.042063,-0.159113,0.219543,-0.124889,-0.10449,-0.00678,-2.304576,-0.001498,-0.795145
5,Adult Dataset,LogisticRegression,['sex'],FairMask Bias Mitigation,{},0.000307,-0.000993,0.003807,-0.014087,-0.026087,-0.00466,-0.291786,-0.001029,-0.071973


In [258]:

"""
what do I need for experiment setup????
currently under the asumption that only 1 rep results are considered

- set filter based on: bias mits, datasets, sens attrs, ml method, others
- get all available: bias mits, datasets, sens attrs, ml method, others
- get all available metric col names:
- based on the set filters get all requested metric columns (default metrics)
    (print also all filter columns?)
    
- based on the set filters get change in all requested columns compared to base (give base name)

MEAN OR ALL?????


"""

'\nwhat do I need for experiment setup????\ncurrently under the asumption that only 1 rep results are considered\n\n- set filter based on: bias mits, datasets, sens attrs, ml method, others\n- get all available: bias mits, datasets, sens attrs, ml method, others\n- get all available metric col names:\n- based on the set filters get all requested metric columns (default metrics)\n    (print also all filter columns?)\n    \n- based on the set filters get change in all requested columns compared to base (give base name)\n\nMEAN OR ALL?????\n\n\n'