In [60]:
# %pip install pi-heaan

In [1]:
import piheaan as heaan
from piheaan.math import sort
from piheaan.math import approx # for piheaan math function
import math
import numpy as np
import pandas as pd
import os

from sklearn.metrics import recall_score, roc_auc_score, accuracy_score
import numpy as np
from sklearn.metrics import confusion_matrix

In [2]:
# set parameter
params = heaan.ParameterPreset.FGb
context = heaan.make_context(params) # context has paramter information
heaan.make_bootstrappable(context) # make parameter bootstrapable

In [63]:
# # create and save keys
# key_file_path = "./keys"

# sk = heaan.SecretKey(context) # create secret key
# os.makedirs(key_file_path, mode=0o775, exist_ok=True)
# sk.save(key_file_path+"/secretkey.bin") # save secret key

# key_generator = heaan.KeyGenerator(context, sk) # create public key
# key_generator.gen_common_keys()
# key_generator.save(key_file_path+"/") # save public key

In [3]:
# Load pre-exisisting secret/public key
# When a key is created, it can be used again to save a new key without creating a new one
key_file_path = "./keys"

secret_key = heaan.SecretKey(context,key_file_path+"/secretkey.bin") # load secret key
public_key = heaan.KeyPack(context, key_file_path+"/") # load public key
public_key.load_enc_key()
public_key.load_mult_key()

eval = heaan.HomEvaluator(context,public_key) # to load piheaan basic function
dec = heaan.Decryptor(context) # for decrypt
enc = heaan.Encryptor(context) # for encrypt

In [5]:
# log_slots is used for the number of slots per ciphertext
# It depends on the parameter used (ParameterPreset)
# The number '15' is the value for maximum number of slots,
# but you can also use a smaller number (ex. 2, 3, 5, 7 ...)
# The actual number of slots in the ciphertext is calculated as below.
log_slots = 15
num_slots = 2**log_slots

In [10]:
import pandas as pd

train = pd.read_csv('is_train_data.csv')
test = pd.read_csv('is_test_data.csv')

In [11]:
train['loan_label'].value_counts()

0    450234
1     21481
Name: loan_label, dtype: int64

In [12]:
test['loan_label'].value_counts()

0    133445
1      1185
Name: loan_label, dtype: int64

In [13]:
# loan_label이 0인 데이터에서 무작위로 1000개 샘플 선택
train_0 = train[train['loan_label'] == 0].sample(n=1000, random_state=42)

# loan_label이 1인 데이터에서 무작위로 1000개 샘플 선택
train_1 = train[train['loan_label'] == 1].sample(n=1000, random_state=42)

# 선택된 데이터 합치기
df = pd.concat([train_0, train_1])

In [14]:
df

Unnamed: 0,log_inc,log_loan_amnt,int_rate,term,installment,purpose,grade,loan_label,emp_length,home_ownership,dti,last_fico_range_high,last_fico_range_low,total_acc,delinq_2yrs
279940,12.409013,9.677214,0.1612,60,388.90,0,2,0,7,0,30.46,604.0,600.0,35.0,0.0
338512,11.225243,9.680344,0.1774,60,404.04,0,2,0,4,0,25.12,719.0,715.0,29.0,0.0
448250,9.372459,9.998798,0.0756,36,684.95,0,0,0,2,0,107.14,759.0,755.0,20.0,0.0
166655,11.790557,9.903488,0.0819,60,407.35,0,0,0,1,0,5.88,784.0,780.0,11.0,0.0
170615,11.002100,10.158130,0.1797,60,654.73,0,3,0,7,0,22.02,734.0,730.0,23.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188765,11.695247,9.740969,0.1640,60,417.03,1,2,1,1,0,16.69,514.0,510.0,30.0,0.0
199744,12.111762,9.825526,0.1033,60,396.09,1,1,1,10,0,12.56,674.0,670.0,57.0,0.0
116152,10.827746,9.210340,0.1171,36,330.76,3,1,1,0,1,9.69,554.0,550.0,34.0,1.0
406894,10.463103,8.987197,0.1430,60,187.40,0,2,1,2,1,26.51,739.0,735.0,26.0,0.0


In [15]:
def normalize_data(arr):
    S = 0
    for i in range(len(arr)):
        S += arr[i]
    return [arr[i] / S for i in range(len(arr))]

In [17]:
# preprocessing data
# convert only on columns that are not 0 ~ 1 to values 0 ~ 1
train_n = df.shape[0]

X = [0] * 14
X[0] = normalize_data(df['log_inc'].values)
X[1] = normalize_data(df['log_loan_amnt'].values)
X[2] = normalize_data(df['int_rate'].values)
X[3] = list(df['purpose'].values)
X[4] = list(df['grade'].values)
X[5] = normalize_data(df['emp_length'].values)
X[6] = normalize_data(df['dti'].values)
X[7] = normalize_data(df['last_fico_range_high'].values)
X[8] = normalize_data(df['last_fico_range_low'].values)
X[9] = normalize_data(df['home_ownership'].values)
X[10] = normalize_data(df['total_acc'].values)
X[11] = list(df['delinq_2yrs'].values)
X[12] = normalize_data(df['term'].values)
X[13] = normalize_data(df['installment'].values)

Y = list(df['loan_label'].values)

In [18]:
print(train_n)

2000


In [None]:
# 데이터 분할 및 암호화
# X 데이터를 log_slots를 기반으로 여러 부분으로 나눔
# 각 데이터 부분을 별도로 암호화
num_slots = 2**log_slots  # 슬롯의 총 개수 계산
num_parts = (train_n + num_slots - 1) // num_slots  # 필요한 부분의 수 계산

for part in range(num_parts):
    start_idx = part * num_slots
    end_idx = min((part + 1) * num_slots, train_n)

    msg_X = heaan.Message(log_slots)
    ctxt_X = heaan.Ciphertext(context)

    for idx in range(start_idx, end_idx):
        # X의 해당 요소를 복소수 형태로 변환하여 할당
        msg_X[idx - start_idx] = X[idx // train_n][idx % train_n]

    enc.encrypt(msg_X, public_key, ctxt_X)
#     ctxt_X.save(key_file_path + f"/ctxt_X_part_{part}.bin")


In [20]:
num_parts_y = (len(Y) + num_slots - 1) // num_slots  # 필요한 부분의 수 계산

for part in range(num_parts_y):
    start_idx = part * num_slots
    end_idx = min((part + 1) * num_slots, len(Y))

    msg_Y = heaan.Message(log_slots)
    ctxt_Y = heaan.Ciphertext(context)

    for idx in range(start_idx, end_idx):
        # Y의 해당 요소를 복소수 형태로 변환하여 할당
        msg_Y[idx - start_idx] = Y[idx]
    enc.encrypt(msg_Y, public_key, ctxt_Y)
#     ctxt_Y.save(key_file_path + f"/ctxt_Y_part_{part}.bin")

In [77]:
# ctxt_X_parts

[(level: 12, log(num slots): 15, data: [ (0.000553+0.000000j), (0.000500+0.000000j), (0.000418+0.000000j), (0.000526+0.000000j), (0.000490+0.000000j), ..., (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j) ])]

In [78]:
# ctxt_Y_parts

[(level: 12, log(num slots): 15, data: [ (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), ..., (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j) ])]

In [22]:
# initial value beta
beta = 2 * np.random.rand(15) - 1
print("beta : ", beta)
print()
msg_beta = heaan.Message(log_slots)
ctxt_beta = heaan.Ciphertext(context)

for i in range(14):
    for j in range(train_n):
        msg_beta[train_n*i + j] = beta[i+1]
for j in range(train_n):
    msg_beta[14*train_n + j] = beta[0]

enc.encrypt(msg_beta, public_key, ctxt_beta)
print("msg_beta : ", msg_beta)

beta :  [-0.21015197  0.76136137  0.58101882  0.82158062  0.96089553  0.64234132
 -0.80838403  0.71629563  0.65529596 -0.07197904 -0.18064009  0.88150466
 -0.69015922  0.11770393  0.26879135]

msg_beta :  [ (0.761361+0.000000j), (0.761361+0.000000j), (0.761361+0.000000j), (0.761361+0.000000j), (0.761361+0.000000j), ..., (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j), (0.000000+0.000000j) ]


In [23]:
def step(learning_rate, ctxt_X, ctxt_Y, ctxt_beta, n, log_slots, context, eval):
    '''
    ctxt_X, ctxt_Y : data for training
    ctxt_beta : initial value beta
    n : the number of row in train_data
    '''
    ctxt_rot = heaan.Ciphertext(context)
    ctxt_tmp = heaan.Ciphertext(context)

    ## step1
    # beta0
    ctxt_beta0 = heaan.Ciphertext(context)
    eval.left_rotate(ctxt_beta, 14*n, ctxt_beta0)

    # compute  ctxt_tmp = beta1*x1 + beta2*x2 + ... + beta12*x12 + beta0
    ctxt_tmp = heaan.Ciphertext(context)
    eval.mult(ctxt_beta, ctxt_X, ctxt_tmp)

    for i in range(3):
        eval.left_rotate(ctxt_tmp, n*2**(2-i), ctxt_rot)
        eval.add(ctxt_tmp, ctxt_rot, ctxt_tmp)
    eval.add(ctxt_tmp, ctxt_beta0, ctxt_tmp)

    msg_mask = heaan.Message(log_slots)
    for i in range(n):
        msg_mask[i] = 1
    eval.mult(ctxt_tmp, msg_mask, ctxt_tmp)



    ## step2
    # compute sigmoid
    approx.sigmoid(eval, ctxt_tmp, ctxt_tmp, 8.0)
    eval.bootstrap(ctxt_tmp, ctxt_tmp)
    msg_mask = heaan.Message(log_slots)
    # if sigmoid(0) -> return 0.5
    for i in range(n, num_slots):
        msg_mask[i] = 0.5
    eval.sub(ctxt_tmp, msg_mask, ctxt_tmp)

    ## step3
    # compute  (learning_rate/n) * (y_(j) - p_(j))
    ctxt_d = heaan.Ciphertext(context)
    eval.sub(ctxt_Y, ctxt_tmp, ctxt_d)
    eval.mult(ctxt_d, learning_rate / n, ctxt_d)

    eval.right_rotate(ctxt_d, 14*n, ctxt_tmp) # for beta0
    for i in range(3):
        eval.right_rotate(ctxt_d, n * 2**i, ctxt_rot)
        eval.add(ctxt_d, ctxt_rot, ctxt_d)
    eval.add(ctxt_d, ctxt_tmp, ctxt_d)

    ## step4
    # compute  (learning_rate/n) * (y_(j) - p_(j)) * x_(j)
    ctxt_X_j = heaan.Ciphertext(context)
    msg_X0 = heaan.Message(log_slots)
    for i in range(14*n, 15*n):
        msg_X0[i] = 1
    eval.add(ctxt_X, msg_X0, ctxt_X_j)
    eval.mult(ctxt_X_j, ctxt_d, ctxt_d)

    ## step5
    # compute  Sum_(all j) (learning_rate/n) * (y_(j) - p_(j)) * x_(j)
    for i in range(14):
        eval.left_rotate(ctxt_d, 2**(13-i), ctxt_rot)
        eval.add(ctxt_d, ctxt_rot, ctxt_d)
    msg_mask = heaan.Message(log_slots)

    for i in range(14):
        msg_mask[i * n] = 1
    eval.mult(ctxt_d, msg_mask, ctxt_d)

    for i in range(14):
        eval.right_rotate(ctxt_d, 2**i, ctxt_rot)
        eval.add(ctxt_d, ctxt_rot, ctxt_d)

    ## step6
    # update beta
    eval.add(ctxt_beta, ctxt_d, ctxt_d)
    return ctxt_d

In [24]:
# randomly assign learning_rate
learning_rate = 0.15
num_steps = 100

ctxt_next = heaan.Ciphertext(context)
eval.add(ctxt_beta, 0, ctxt_next)
# for i in range(num_steps):
#     print("=== Step", i, "===")
#     # estimate beta_hat using function 'step' for 100 iteration
#     ctxt_next = step(0.2, ctxt_X, ctxt_Y, ctxt_next, train_n, log_slots, context, eval)
#     print(ctxt_next)

for i in range(num_steps):
    print("=== Step", i, "===")
    ctxt_next = step(learning_rate, ctxt_X, ctxt_Y, ctxt_next, train_n, log_slots, context, eval)
    print(ctxt_next)
    # if (i + 1) % 20 == 0: # 매 20번째 스텝마다 학습률을 조정
    #     learning_rate *= 0.95

=== Step 0 ===
(level: 9, log(num slots): 15, data: [ (0.800604+0.000000j), (0.800604+0.000000j), (0.800604+0.000000j), (0.800604+0.000000j), (0.800604+0.000000j), ..., (0.039245+0.000000j), (0.039245+0.000000j), (0.039245+0.000000j), (0.039245+0.000000j), (0.039245+0.000000j) ])
=== Step 1 ===
(level: 9, log(num slots): 15, data: [ (0.830091+0.000000j), (0.830091+0.000000j), (0.830091+0.000000j), (0.830091+0.000000j), (0.830091+0.000000j), ..., (0.068734+0.000000j), (0.068734+0.000000j), (0.068734+0.000000j), (0.068734+0.000000j), (0.068734+0.000000j) ])
=== Step 2 ===
(level: 9, log(num slots): 15, data: [ (0.852346+0.000000j), (0.852346+0.000000j), (0.852346+0.000000j), (0.852346+0.000000j), (0.852346+0.000000j), ..., (0.090991+0.000000j), (0.090991+0.000000j), (0.090991+0.000000j), (0.090991+0.000000j), (0.090991+0.000000j) ])
=== Step 3 ===
(level: 9, log(num slots): 15, data: [ (0.869254+0.000000j), (0.869254+0.000000j), (0.869254+0.000000j), (0.869254+0.000000j), (0.869254+0.000

(level: 9, log(num slots): 15, data: [ (0.959839+0.000000j), (0.959839+0.000000j), (0.959839+0.000000j), (0.959839+0.000000j), (0.959839+0.000000j), ..., (0.198537+0.000000j), (0.198537+0.000000j), (0.198537+0.000000j), (0.198537+0.000000j), (0.198537+0.000000j) ])
=== Step 31 ===
(level: 9, log(num slots): 15, data: [ (0.961492+0.000000j), (0.961492+0.000000j), (0.961492+0.000000j), (0.961492+0.000000j), (0.961492+0.000000j), ..., (0.200191+0.000000j), (0.200191+0.000000j), (0.200191+0.000000j), (0.200191+0.000000j), (0.200191+0.000000j) ])
=== Step 32 ===
(level: 9, log(num slots): 15, data: [ (0.963133+0.000000j), (0.963133+0.000000j), (0.963133+0.000000j), (0.963133+0.000000j), (0.963133+0.000000j), ..., (0.201834+0.000000j), (0.201834+0.000000j), (0.201834+0.000000j), (0.201834+0.000000j), (0.201834+0.000000j) ])
=== Step 33 ===
(level: 9, log(num slots): 15, data: [ (0.964762+0.000000j), (0.964762+0.000000j), (0.964762+0.000000j), (0.964762+0.000000j), (0.964762+0.000000j), ..., 

(level: 9, log(num slots): 15, data: [ (1.005980+0.000000j), (1.005980+0.000000j), (1.005980+0.000000j), (1.005980+0.000000j), (1.005980+0.000000j), ..., (0.244727+0.000000j), (0.244727+0.000000j), (0.244727+0.000000j), (0.244727+0.000000j), (0.244727+0.000000j) ])
=== Step 62 ===
(level: 9, log(num slots): 15, data: [ (1.007304+0.000000j), (1.007304+0.000000j), (1.007304+0.000000j), (1.007304+0.000000j), (1.007304+0.000000j), ..., (0.246053+0.000000j), (0.246053+0.000000j), (0.246053+0.000000j), (0.246053+0.000000j), (0.246053+0.000000j) ])
=== Step 63 ===
(level: 9, log(num slots): 15, data: [ (1.008619+0.000000j), (1.008619+0.000000j), (1.008619+0.000000j), (1.008619+0.000000j), (1.008619+0.000000j), ..., (0.247369+0.000000j), (0.247369+0.000000j), (0.247369+0.000000j), (0.247369+0.000000j), (0.247369+0.000000j) ])
=== Step 64 ===
(level: 9, log(num slots): 15, data: [ (1.009924+0.000000j), (1.009924+0.000000j), (1.009924+0.000000j), (1.009924+0.000000j), (1.009924+0.000000j), ..., 

(level: 9, log(num slots): 15, data: [ (1.041790+0.000000j), (1.041790+0.000000j), (1.041790+0.000000j), (1.041790+0.000000j), (1.041790+0.000000j), ..., (0.280579+0.000000j), (0.280579+0.000000j), (0.280579+0.000000j), (0.280579+0.000000j), (0.280579+0.000000j) ])
=== Step 92 ===
(level: 9, log(num slots): 15, data: [ (1.042854+0.000000j), (1.042854+0.000000j), (1.042854+0.000000j), (1.042854+0.000000j), (1.042854+0.000000j), ..., (0.281644+0.000000j), (0.281644+0.000000j), (0.281644+0.000000j), (0.281644+0.000000j), (0.281644+0.000000j) ])
=== Step 93 ===
(level: 9, log(num slots): 15, data: [ (1.043910+0.000000j), (1.043910+0.000000j), (1.043910+0.000000j), (1.043910+0.000000j), (1.043910+0.000000j), ..., (0.282701+0.000000j), (0.282701+0.000000j), (0.282701+0.000000j), (0.282701+0.000000j), (0.282701+0.000000j) ])
=== Step 94 ===
(level: 9, log(num slots): 15, data: [ (1.044958+0.000000j), (1.044958+0.000000j), (1.044958+0.000000j), (1.044958+0.000000j), (1.044958+0.000000j), ..., 

In [26]:
# prepare test data for evaluation
# convert only on columns that are not 0 ~ 1 to values 0 ~ 1
# loan_label이 0인 데이터에서 무작위로 5000개 샘플 선택
test_0 = test[test['loan_label'] == 0].sample(n=400, random_state=42)

# loan_label이 1인 데이터에서 무작위로 5000개 샘플 선택
test_1 = test[test['loan_label'] == 1].sample(n=300, random_state=42)

# 선택된 데이터 합치기
df = pd.concat([test_0, test_1])
# df = pd.DataFrame(csv_test)
test_n = df.shape[0]

X_test = [0] * 14
X_test[0] = normalize_data(df['log_inc'].values)
X_test[1] = normalize_data(df['log_loan_amnt'].values)
X_test[2] = normalize_data(df['int_rate'].values)
X_test[3] = list(df['purpose'].values)
X_test[4] = list(df['grade'].values)
X_test[5] = normalize_data(df['emp_length'].values)
X_test[6] = normalize_data(df['dti'].values)
X_test[7] = normalize_data(df['last_fico_range_high'].values)
X_test[8] = normalize_data(df['last_fico_range_low'].values)
X_test[9] = normalize_data(df['home_ownership'].values)
X_test[10] = normalize_data(df['total_acc'].values)
X_test[11] = list(df['delinq_2yrs'].values)
X_test[12] = normalize_data(df['term'].values)
X_test[13] = normalize_data(df['installment'].values)

Y_test = list(df['loan_label'].values)

msg_X_test = heaan.Message(log_slots)
ctxt_X_test = heaan.Ciphertext(context)

for i in range(14):
    for j in range(test_n):
        msg_X_test[test_n*i + j] = X_test[i][j]
enc.encrypt(msg_X_test, public_key, ctxt_X_test)

In [27]:
def compute_sigmoid(ctxt_X, ctxt_beta, n, log_slots, eval, context, num_slots):
    '''
    ctxt_X : data for evaluation
    ctxt_beta : estimated beta from function 'step'
    n : the number of row in test_data
    '''
    ctxt_rot = heaan.Ciphertext(context)
    ctxt_tmp = heaan.Ciphertext(context)

    # beta0
    ctxt_beta0 = heaan.Ciphertext(context)
    eval.left_rotate(ctxt_beta, 15*n, ctxt_beta0)

    # compute x * beta + beta0
    ctxt_tmp = heaan.Ciphertext(context)
    eval.mult(ctxt_beta, ctxt_X, ctxt_tmp)

    for i in range(3):
        eval.left_rotate(ctxt_tmp, n*2**(2-i), ctxt_rot)
        eval.add(ctxt_tmp, ctxt_rot, ctxt_tmp)
    eval.add(ctxt_tmp, ctxt_beta0, ctxt_tmp)

    msg_mask = heaan.Message(log_slots)
    for i in range(n):
        msg_mask[i] = 1
    eval.mult(ctxt_tmp, msg_mask, ctxt_tmp)

    # compute sigmoid
    approx.sigmoid(eval, ctxt_tmp, ctxt_tmp, 8.0)
    eval.bootstrap(ctxt_tmp, ctxt_tmp)
    msg_mask = heaan.Message(log_slots)
    for i in range(n, num_slots):
        msg_mask[i] = 0.5
    eval.sub(ctxt_tmp, msg_mask, ctxt_tmp)

    return ctxt_tmp

In [28]:
# accuracy
ctxt_infer = compute_sigmoid(ctxt_X_test, ctxt_next, test_n, log_slots, eval, context, num_slots)

res = heaan.Message(log_slots)
dec.decrypt(ctxt_infer, secret_key, res)
cnt = 0
for i in range(test_n):
    if res[i].real >= 0.6:
        if Y_test[i] == 1:
            cnt += 1
    else:
        if Y_test[i] == 0:
            cnt += 1
print("Accuracy : ", cnt / test_n)


Accuracy :  0.5285714285714286


In [30]:
ctxt_infer = compute_sigmoid(ctxt_X_test, ctxt_next, test_n, log_slots, eval, context, num_slots)

res = heaan.Message(log_slots)
dec.decrypt(ctxt_infer, secret_key, res)


# 예측 라벨을 저장할 리스트
y_pred = []

# 예측 라벨 계산
for i in range(test_n):
    if res[i].real > 0.65:
        y_pred.append(1)
    else:
        y_pred.append(0)
print(y_pred)


[0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 

In [31]:

def calculate_g_mean(y_test, y_pred):
    # 혼동 행렬 계산
    cm = confusion_matrix(y_test, y_pred)
    # True Positive (TP), True Negative (TN), False Positive (FP), False Negative (FN) 추출
    TN, FP, FN, TP = cm.ravel()

    # 민감도(재현율) 계산
    sensitivity_positive = TP / (TP + FN) if (TP + FN) != 0 else 0
    sensitivity_negative = TN / (TN + FP) if (TN + FP) != 0 else 0

    # G-mean 계산
    g_mean = (sensitivity_positive * sensitivity_negative) ** 0.5
    return g_mean

accuracy = accuracy_score(Y_test, y_pred)
# 실제 라벨과 예측 라벨을 기반으로 recall_score 계산
recall = recall_score(Y_test, y_pred)
g_mean = calculate_g_mean(Y_test, y_pred)
# ROC AUC score 계산
roc_auc = roc_auc_score(Y_test, [res[i].real for i in range(test_n)])

print(f"Accuracy : {accuracy}")
print(f"Recall Score: {recall}")
print(f"G-mean: {g_mean}")
print(f"ROC AUC Score: {roc_auc}")


Accuracy : 0.5285714285714286
Recall Score: 0.6933333333333334
G-mean: 0.5299056519796709
ROC AUC Score: 0.554


In [None]:
# Save the model
ctxt_next.save("logistic_HE.bin")