In [19]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore') #忽略不重要的warning

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import hamming_loss
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

In [20]:
def predictresult(predict_df):
    row = 0
    while row<1069: # 总样本量-1  599
        y_test = predict_df.iloc[row+1:row+107] # 预测标签序号1-60
        y_score = predict_df.iloc[row+108:row+214] # 预测概率序号61-120

        y_test.reset_index(drop=True,inplace=True)
        y_score.reset_index(drop=True,inplace=True)

        row += 214 # 一组样本个数 60*2
        yield y_test,y_score

In [21]:
def sample_weight(y_test):
    sw = []
    for i in range(y_test.shape[1]):
        sw.append(compute_sample_weight(class_weight='balanced',y=y_test[:,i]))
    
    sw = np.transpose(np.array(sw))
    return sw

def bieva(Eva_p,pred,Threshold):
    breva = []
    for y_test,y_score in pred:
        y_test = np.array(y_test.astype('int'))
        y_score = np.array(y_score.astype('float'))
        y_pred = np.where(y_score>Threshold,1,0)        
        sw = sample_weight(y_test)
        
        for i in range(y_test.shape[1]):
            cm = confusion_matrix(y_test[:,i], y_pred[:,i],sample_weight=sw[:,i])

            Accuracy = round(accuracy_score(y_test[:,i], y_pred[:,i],sample_weight=sw[:,i]),4)
            Precision = round(precision_score(y_test[:,i], y_pred[:,i],sample_weight=sw[:,i]),4)
            Recall = round(recall_score(y_test[:,i], y_pred[:,i],sample_weight=sw[:,i]),4)
            f1 = round(f1_score(y_test[:,i], y_pred[:,i],sample_weight=sw[:,i]),4)
            breva.append([Accuracy,Precision,Recall,f1])
   
    BReva_arr = np.array(breva).reshape(5,2,4) # # reshape(5,类别数,4)
    BRmean_arr = np.around(np.mean(BReva_arr,0),2)
    BRstd_arr = np.around(np.std(BReva_arr,0),4)
    BR = np.concatenate((BRmean_arr,BRstd_arr),axis=1)
    BR_df = pd.DataFrame(BR,columns=['Accuracy_m','Precision_m','Recall_m','F1 Score_m','Accuracy_s','Precision_s','Recall_s','F1 Score_s'])
    BR_df.to_csv(Eva_p+'BinaryEvaluation.csv',mode='a') 

def auprc(Eva_p,pred):
    roc = []
    for y_test,y_score in pred:
        y_label = y_test.columns
        y_test = np.array(y_test.astype('int'))
        y_score = np.array(y_score.astype('float'))
        sw = sample_weight(y_test)
        
        for i in range(y_test.shape[1]):
            average_precision = average_precision_score(y_test[:,i], y_score[:,i],sample_weight=sw[:,i])
            AUC = roc_auc_score(y_test[:,i], y_score[:,i],sample_weight=sw[:,i])        
            roc.append([round(average_precision,2),round(AUC,2)])
    
    ROC_arr = np.array(roc).reshape(5,2,2) # reshape(5,类别数,2)
    ROCmean_arr = np.around(np.mean(ROC_arr,0),2)
    ROCstd_arr = np.around(np.std(ROC_arr,0),4)
    ROC = np.concatenate((ROCmean_arr,ROCstd_arr),axis=1)
    ROC_df = pd.DataFrame(ROC,index=y_label,columns=['AUPR_m','AUC_m','AUPR_s','AUC_s'])
    # ROC_df = pd.DataFrame(ROC.T,index=['AUPR_m','AUC_m','AUPR_s','AUC_s'],columns=y_label)
    ROC_df.to_csv(Eva_p+'AUC_AUPR.csv',mode='a')    


def hamloss(Eva_p,pred,Threshold):
    hmloss = []
    for y_test,y_score in pred:
        y_test = np.array(y_test.astype('int'))
        y_score = np.array(y_score.astype('float'))
        y_pred = np.where(y_score>Threshold,1,0)
        sw = sample_weight(y_test)
        
        hmloss.append(hamming_loss(y_test,y_pred,sw))
    
    HM_mean = round(np.mean(hmloss),2)
    HM_std = round(np.std(hmloss),4)
    HM = np.array([HM_mean, HM_std]).reshape(1,2)
    HM_df = pd.DataFrame(HM,columns=['Hamming Loss_m','Hamming Loss_s'])
    HM_df.to_csv(Eva_p+'HammingLoss.csv',mode='a') 

In [22]:
predict_dir = '/home/dqw_lyt/LYT_Task2/script/ISTH_RJ_mmc_result/predict/94D/f1_loo_2_3/' 


model_c = ['test_BinaryRelevance.csv',
            'test_ClassifierChain.csv',
            'test_LabelPowerset.csv',
            'test_MLkNN.csv',
            'test_RakelD.csv']

for csv in model_c:
    predict_p = predict_dir + '/' + csv
    predict_df = pd.read_csv(predict_p,header=None)
    model_n = csv[5:-4]
    
    Eva_p = predict_dir + model_n + '/'

    THRESHOLD = [0.5,0.4,0.35,0.3,0.25]
    for Threshold in THRESHOLD:
        pred = predictresult(predict_df)
        bieva(Eva_p,pred,Threshold)

    pred = predictresult(predict_df)
    auprc(Eva_p,pred)
    
    THRESHOLD = [0.5,0.4,0.35,0.3,0.25]
    for Threshold in THRESHOLD:
        pred = predictresult(predict_df)
        hamloss(Eva_p,pred,Threshold)