In [1]:
import pandas as pd
from numpyencoder import NumpyEncoder
import json
from util.nn_model import NNModel
from util.evaluator import *
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("../../data/Hepatitis/HepatitisC_dataset_processed.csv")
standard_sc = preprocessing.StandardScaler()

X = data.drop(['Category'],axis=1)
y = data["Category"]
train_x, test_x, train_y, test_y = train_test_split(X,y,test_size=0.3,random_state=42,stratify=y)

train_x=standard_sc.fit_transform(train_x).astype(np.float32)
test_x=standard_sc.transform(test_x).astype(np.float32)

train_y = train_y.to_numpy().astype(np.int64)
test_y = test_y.to_numpy().astype(np.int64)

model = NNModel("../../train/hepatitis/Hepatitis_model_simple.pt")
model1 = NNModel("../../train/hepatitis/Hepatitis_model_simple_v1.pt")

# obtain true negative set of test set
idx = np.where(test_y == 0)[0]
pred_y = model.predict(test_x)
idx1 = np.where(pred_y == 0)[0]
tn_idx = set(idx).intersection(idx1)
abnormal_test = test_x[list(tn_idx)]

# obtain true positive set of train set
idx2 = np.where(train_y == 1)[0]
pred_ty = model.predict(train_x)
idx3 = np.where(pred_ty == 1)[0]
tp_idx = set(idx2).intersection(idx3)
normal_test = train_x[list(tp_idx)]

# set the normal range
normal_range = np.array([[35.6, 30, 10, 10, 0, 5.32, 5.368, 59, 10, 66],
    [46, 120, 35, 35, 21, 12.92, 5.368, 84, 42, 87]])
normal_range = standard_sc.transform(normal_range).astype(np.float32)
normal_range = normal_range * 0.3

# initialize the evaluator
evaluator = Evaluator(train_x, normal_test)



In [3]:
def run_cfmss(json_path):
    with open(json_path) as f:
        cfmss_json = json.load(f)

    cfmss_json.keys()
    d = len(cfmss_json['data'][0][0])
    cfs_list = cfmss_json['cf']
    _cfs_list = cfmss_json['cf2']
    diversity_list = []
    diversity2_list = []

    count_diversity_list = []
    count_diversity2_list = []

    for cfs, _cfs in zip(cfs_list, _cfs_list):
        cfs = [item['cf'] for item in cfs]
        cfs = np.reshape(cfs, (-1, d))
        # print(cfs.shape)
        diversity = evaluator.diversity(cfs)
        count_diversity = evaluator.count_diversity(cfs)

        _cfs = [item for item in _cfs]
        _cfs = np.reshape(_cfs, (-1, d))
        print(_cfs.shape)
        diversity2 = evaluator.diversity(_cfs)
        count_diversity2 = evaluator.count_diversity(_cfs)

        diversity_list.append(diversity)
        diversity2_list.append(diversity2)
        count_diversity_list.append(count_diversity)
        count_diversity2_list.append(count_diversity2)

    print(cfmss_json['diversity'])
    print(diversity_list)

    print(cfmss_json['diversity2'])
    print(diversity2_list)

    # del cfmss_json['diversity']
    # del cfmss_json['diversity2']
    # del cfmss_json['count_diversity']
    # del cfmss_json

    cfmss_json['diversity'] = diversity_list
    cfmss_json['diversity2'] = diversity2_list
    cfmss_json['count_diversity'] = count_diversity_list
    cfmss_json['count_diversity2'] = count_diversity2_list
    with open(json_path, "w") as f:
        json.dump(cfmss_json, f, cls = NumpyEncoder)

def run(json_path):
    with open(json_path) as f:
        cfmss_json = json.load(f)

    cfmss_json.keys()
    d = len(cfmss_json['data'][0][0])
    cfs_lists = cfmss_json['cf']
    diversity_list = []
    diversity2_list = []

    count_diversity_list = []
    count_diversity2_list = []

    for cfs_list in cfs_lists:

        cfs = [_tmp_result['cf'] for _tmp_result in cfs_list]
        cfs = np.reshape(cfs, (-1, d))
        diversity = evaluator.diversity(cfs)
        count_diversity = evaluator.count_diversity(cfs)

        _cfs = [_tmp_result['cf2'] for _tmp_result in cfs_list]
        _cfs = np.reshape(_cfs, (-1, d))
        diversity2 = evaluator.diversity(_cfs)
        count_diversity2 = evaluator.count_diversity(_cfs)

        diversity_list.append(diversity)
        diversity2_list.append(diversity2)
        count_diversity_list.append(count_diversity)
        count_diversity2_list.append(count_diversity2)

    print(cfmss_json['diversity'])
    print(diversity_list)

    print(cfmss_json['diversity2'])
    print(diversity2_list)

    # del cfmss_json['diversity']
    # del cfmss_json['diversity2']

    cfmss_json['diversity'] = diversity_list
    cfmss_json['diversity2'] = diversity2_list
    cfmss_json['count_diversity'] = count_diversity_list
    cfmss_json['count_diversity2'] = count_diversity2_list
    with open(json_path, "w") as f:
        json.dump(cfmss_json, f, cls = NumpyEncoder)

In [4]:
cfmss_path = 'Hepatitis_cfmss.json'
cfproto_path = 'Hepatitis_cfproto.json'
dice_path = 'Hepatitis_dice.json'
gs_path = 'Hepatitis_growingsphere.json'
plaincf_path = 'Hepatitis_plaincf.json'

In [5]:
run_cfmss(cfmss_path)
run(cfproto_path)
run(dice_path)
run(gs_path)
run(plaincf_path)

(1, 10)
(3, 10)
(5, 10)
(1, 10)
(1, 10)
(1, 10)
(1, 10)
(1, 10)
(2, 10)
(1, 10)
(0, 10)
(1, 10)
(2, 10)
(5, 10)
(1, 10)
(2, 10)
(1, 10)
(2, 10)
(2, 10)
(1, 10)
[0.03479232022045804, 0.09916202777834471, 0.0648786420050576, 0.1294098894005689, 0.13739285748152297, 0.03259569516372558, -1, -1, -1, 0.11888775862509689, -1, 0.04969517144561184, 0.11607364264839976, 0.18381163226720063, -1, 0.08276135114345445, -1, 0.37087394742351176, 0.15470839809956707, 0.3233145505218759]
[0.3479232022045804, 0.9916202777834471, 0.648786420050576, 1.294098894005689, 1.3739285748152297, 0.3259569516372558, -1, -1, -1, 1.1888775862509688, -1, 0.4969517144561184, 1.1607364264839977, 1.8381163226720063, -1, 0.8276135114345445, -1, 3.708739474235118, 1.5470839809956707, 3.233145505218759]
[-1, 0.0877113322458741, 0.09726830084802703, -1, -1, -1, -1, -1, 0.14603636808545903, -1, -1, -1, 0.14225725913241935, 0.18381163226720063, -1, 0.14319602055461417, -1, 0.39143500692694755, 0.06568261260907263, -1]
[-1, 0.