In [1]:
import numpy as np
import re, os

In [2]:
os.chdir(os.path.pardir)

In [3]:
filepath = os.path.join(os.getcwd(), r'ML_Result\LightGBM\ZScale\zscale_pre_cv.txt')

In [22]:
def read_pre_res(filepath):
    with open(filepath) as f:
        records = f.readlines()
        f.close()
        res = []
        for i in records:
            if 'fold ' in i:
                fold_res = []
                res.append(fold_res)
                continue
            fold_res.append(i.rstrip().split('\t') if i.rstrip() != '' else None)
    return np.array(res).astype(np.float64)

In [23]:
zscale_pre_ind = read_pre_res(filepath)

In [24]:
def calculate_cutoff(data, sp_value):
    cutoffs = []
    for fold in data:
        neg = []
        for value in fold:
            if value[0] == 0.0:
                neg.append(list(value))
        negative = np.array(neg)
        all_n = len(negative)
        tn = int(sp_value*all_n)
        fp = all_n - tn
        data = negative[np.argsort(-negative[:,1])]
        cutoff = data[:,1][fp]
        cutoffs.append(cutoff)
    return cutoffs

In [25]:
sp = 0.65
a = calculate_cutoff(zscale_pre_ind,sp)

In [26]:
# 计算并保存性能指标
def calculate_metrics(labels, scores, cutoff=0.5, po_label=1):
    my_metrics = {
        'SN': 'NA',
        'SP': 'NA',
        'ACC': 'NA',
        'MCC': 'NA',
        'Recall': 'NA',
        'Precision': 'NA',
        'F1-score': 'NA',
        'Cutoff': cutoff,
    }

    tp, tn, fp, fn = 0, 0, 0, 0
    for i in range(len(scores)):
        if labels[i] == po_label:
            if scores[i] >= cutoff:
                tp = tp + 1
            else:
                fn = fn + 1
        else:
            if scores[i] < cutoff:
                tn = tn + 1
            else:
                fp = fp + 1

    my_metrics['SN'] = tp / (tp + fn) if (tp + fn) != 0 else 'NA'
    my_metrics['SP'] = tn / (fp + tn) if (fp + tn) != 0 else 'NA'
    my_metrics['ACC'] = (tp + tn) / (tp + fn + tn + fp)
    my_metrics['MCC'] = (tp * tn - fp * fn) / np.math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) if (
                                                                                                                     tp + fp) * (
                                                                                                                     tp + fn) * (
                                                                                                                     tn + fp) * (
                                                                                                                     tn + fn) != 0 else 'NA'
    my_metrics['Precision'] = tp / (tp + fp) if (tp + fp) != 0 else 'NA'
    my_metrics['Recall'] = my_metrics['SN']
    my_metrics['F1-score'] = 2 * tp / (2 * tp + fp + fn) if (2 * tp + fp + fn) != 0 else 'NA'
    return my_metrics

def calculate_metrics_list(data, cutoffs, label_column=0, score_column=2, po_label=1):
    metrics_list = []
    for index,i in enumerate(data):
        metrics_list.append(calculate_metrics(i[:, label_column], i[:, score_column], cutoff=cutoffs[index], po_label=po_label))
    if len(metrics_list) == 1:
        return metrics_list
    else:
        mean_dict = {}
        std_dict = {}
        keys = metrics_list[0].keys()
        for i in keys:
            mean_list = []
            for metric in metrics_list:
                mean_list.append(metric[i])
            mean_dict[i] = np.array(mean_list).sum() / len(metrics_list)
            std_dict[i] = np.array(mean_list).std()
        metrics_list.append(mean_dict)
        metrics_list.append(std_dict)
        return metrics_list

In [27]:
aaa = calculate_metrics_list(zscale_pre_ind, a, label_column=0, score_column=1, po_label=1)

In [28]:
def save_prediction_metrics_list(metrics_list, output):
    if len(metrics_list) == 1:
        with open(output, 'w') as f:
            f.write('Result')
            for keys in metrics_list[0]:
                f.write('\t%s' % keys)
            f.write('\n')
            for i in range(len(metrics_list)):
                f.write('value')
                for keys in metrics_list[i]:
                    f.write('\t%s' % metrics_list[i][keys])
                f.write('\n')
            f.close()
    else:
        with open(output, 'w') as f:
            f.write('Fold')
            for keys in metrics_list[0]:
                f.write('\t%s' % keys)
            f.write('\n')
            for i in range(len(metrics_list)):
                if i <= len(metrics_list)-3:
                    f.write('%d' % (i + 1))
                elif i == len(metrics_list)-2:
                    f.write('mean')
                else:
                    f.write('std')
                for keys in metrics_list[i]:
                    f.write('\t%s' % metrics_list[i][keys])
                f.write('\n')
            f.close()
    return None

In [29]:
output = r'D:\研一下学期\生信群论文及文档\尉溪林\a.txt'

In [30]:
save_prediction_metrics_list(aaa, output)