In [None]:
!pip install alibi[ray]


: 

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

from utils.df_loader import load_adult_df, load_compas_df, load_german_df, load_diabetes_df, load_breast_cancer_df, load_electricity_df
from utils.preprocessing import preprocess_df,get_columns_type
from sklearn.model_selection import train_test_split
from utils.dice import generate_dice_result, process_results
from utils.models import train_three_models, evaluation_test, save_three_models, load_three_models
from utils.save import save_result_as_csv

pd.options.mode.chained_assignment = None 

print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False

seed = 123
tf.random.set_seed(seed)
np.random.seed(seed)


TF version:  2.15.0
Eager execution enabled:  True


In [5]:
#### Select dataset ####'

dataset_name = 'adult' # [adult, german, compas]

if dataset_name == 'electricity':
    dataset_loading_fn = load_electricity_df
elif dataset_name == 'adult':
    dataset_loading_fn = load_adult_df
elif dataset_name == 'german':
    dataset_loading_fn = load_german_df
elif dataset_name == 'compas':
    dataset_loading_fn = load_compas_df
elif dataset_name == 'diabetes':
    dataset_loading_fn = load_diabetes_df
elif dataset_name == 'breast_cancer':
    dataset_loading_fn = load_breast_cancer_df
else:
    raise Exception("Unsupported dataset")

In [6]:
#### Load datafram info.
df_info = preprocess_df(dataset_loading_fn)
df_info

DfInfo(df=       age         workclass   education      marital-status  \
0       39         State-gov   Bachelors       Never-married   
1       50  Self-emp-not-inc   Bachelors  Married-civ-spouse   
2       38           Private     HS-grad            Divorced   
3       53           Private        11th  Married-civ-spouse   
4       28           Private   Bachelors  Married-civ-spouse   
...    ...               ...         ...                 ...   
32556   27           Private  Assoc-acdm  Married-civ-spouse   
32557   40           Private     HS-grad  Married-civ-spouse   
32558   58           Private     HS-grad             Widowed   
32559   22           Private     HS-grad       Never-married   
32560   52      Self-emp-inc     HS-grad  Married-civ-spouse   

              occupation   relationship   race     sex  capital-gain  \
0           Adm-clerical  Not-in-family  White    Male          2174   
1        Exec-managerial        Husband  White    Male             0   
2    

In [18]:
### Seperate to train and test set.
train_df, test_df = train_test_split(df_info.dummy_df, train_size=.8, random_state=seed, shuffle=True)

In [19]:
train_df

Unnamed: 0,date,period,nswprice,nswdemand,vicprice,vicdemand,transfer,target
3114,0.460334,0.361702,0.105589,0.512645,0.006992,0.591403,0.201188,1
7704,0.912172,0.021277,0.044710,0.220470,0.002879,0.312015,0.528121,1
17147,0.880581,0.021277,0.084995,0.434841,0.005388,0.422320,0.609510,1
8080,0.893899,0.170213,0.044834,0.290539,0.002690,0.415588,0.780978,0
7016,0.867130,0.340426,0.040442,0.506694,0.002709,0.324184,0.442615,1
...,...,...,...,...,...,...,...,...
15377,0.013495,0.787234,0.053714,0.668997,0.003467,0.422915,0.432556,0
21602,0.428963,0.468085,0.079138,0.350491,0.003467,0.422915,0.432556,1
17730,0.876023,0.404255,0.034179,0.557870,0.001867,0.537804,0.673982,1
15725,0.871997,0.468085,0.034179,0.503868,0.001695,0.275246,0.940557,0


In [29]:
test_df

Unnamed: 0,date,period,nswprice,nswdemand,vicprice,vicdemand,transfer,target
11086,0.460865,0.723404,0.142074,0.606665,0.009412,0.438374,0.426612,1
12114,0.013053,1.000000,0.050069,0.534216,0.003467,0.422915,0.432556,1
2277,0.014247,0.680851,0.042124,0.494942,0.003467,0.422915,0.432556,0
21817,0.437768,0.021277,0.049321,0.249926,0.003467,0.422915,0.432556,0
18012,0.013716,0.744681,0.047483,0.502380,0.003467,0.422915,0.432556,0
...,...,...,...,...,...,...,...,...
21693,0.030795,0.595745,0.115372,0.534811,0.003467,0.422915,0.432556,1
13622,0.009867,0.361702,0.049819,0.644749,0.003467,0.422915,0.432556,0
10376,0.013141,0.404255,0.043152,0.421749,0.003467,0.422915,0.432556,0
8542,0.867395,0.212766,0.021498,0.093127,0.000841,0.120663,0.941015,0


In [20]:
len(test_df)

4617

In [21]:
len(train_df)

18467

In [22]:
get_columns_type

<function utils.preprocessing.get_columns_type(df)>

In [23]:
df_info.columns_type

{'integer': [],
 'float': ['date',
  'period',
  'nswprice',
  'nswdemand',
  'vicprice',
  'vicdemand',
  'transfer'],
 'string': ['target']}

In [24]:
len(df_info.numerical_cols)

7

In [28]:
df_info.categorical_cols


['target']

In [25]:
#df_info.categorical_cols

#cat_to_ohe_cat
len(df_info.ohe_feature_names)

#print(len(df_info.ohe_feature_names.keys()))

7

In [26]:
df_info.cat_to_ohe_cat

{}

In [312]:
len(df_info.categorical_cols
)

1

In [27]:
df_info.categorical_cols

['target']

In [31]:
scaled_cf_df = df_info.scaled_df
scaled_cf_df

Unnamed: 0,date,period,nswprice,nswdemand,vicprice,vicdemand,transfer,target
0,0.429715,0.106383,0.042996,0.166022,0.003467,0.422915,0.432556,N
1,0.455909,0.574468,0.073094,0.578846,0.005164,0.720093,0.185186,Y
2,0.442724,0.702128,0.064556,0.528563,0.004285,0.545054,0.433013,N
3,0.881333,0.468085,0.034023,0.348557,0.002081,0.280684,0.690900,Y
4,0.885138,0.489362,0.036236,0.518001,0.002462,0.582859,0.235482,N
...,...,...,...,...,...,...,...,...
23079,0.911331,0.319149,0.047171,0.486760,0.003018,0.542983,0.438042,N
23080,0.474183,0.808511,0.028415,0.303184,0.001753,0.136199,0.873342,N
23081,0.912128,0.446809,0.045146,0.350788,0.002867,0.265924,0.687243,Y
23082,0.885403,0.914894,0.028166,0.434692,0.001843,0.418954,0.567901,N


In [30]:
df_info.target_label_encoder.inverse_transform([scaled_cf_df.loc[0, df_info.target_name]])[0]