In [1]:
import numpy as np
import os
from sklearn.metrics import roc_curve, auc
from sklearn import metrics
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
from itertools import cycle
from scipy import interp

In [2]:
file_dir = "./score数据统计/"
save_figure_dir = "./ROC曲线图片/"
methods = ["SVM","KNN","DT","MLP","RVM","RandomForest"]
class_name = ["class2","class3","class4"]
k_fold = [0,1,2,3,4,5,6,7,8,9]
data_name = ["predicted_test_label","predicted_test_score","true_test_label","predicted_train_label","true_train_label"]

In [3]:
# 读取所有数据
data_dict = {}
for data_type in data_name:
    data_dict[data_type] = {}
    for method in methods:
#         print("Currently Processing "+method)
        data_dict[data_type][method] = {}
        for class_id in class_name:
#             print("\t"+class_id+" is processing...")
            folder_dir = file_dir+"/"+method+"/"+class_id
            data_dict[data_type][method][class_id] = {}
            for k in k_fold:
                filename = method+"-"+data_type+"-k"+str(k)+".npy"
                file_path = os.path.join(folder_dir,filename)
                data = np.load(file_path)
                data_dict[data_type][method][class_id][str(k)+"-fold"] = data
print("Done.")

Done.


In [70]:
# 绘制每个分类器的每种分类方式的roc曲线（其中包括10折ROC，标准直线，多分类的micro-average和macro-average）
def plot_roc_curve_perclf_perclass(data_dict,method,class_id,k_fold,path):
#     plt.figure(figsize=(15,7))
    plt.plot([0,1],[0,1],linestyle="--",lw=2,color='r',label='Luck',alpha=0.8)
    # 获得当前分类个数
    class_num = int(class_id[-1])
    for k in k_fold:
        true_label = data_dict["true_test_label"][method][class_id][str(k)+"-fold"]
        preds = data_dict["predicted_test_score"][method][class_id][str(k)+"-fold"]
        # 对标签进行二值化
        true_label = label_binarize(true_label, classes = range(class_num))
        # 二分类
        if class_num == 2:
            fpr, tpr, threshold = metrics.roc_curve(true_label,preds[:,1])
            roc_auc = metrics.auc(fpr,tpr)
            # 对每一折的每一个类绘制roc曲线
            plt.plot(fpr,tpr,lw=1,alpha=0.3,color="blue",label = 'ROC fold %d(area=%0.2f)' % (k, roc_auc))
        # 三分类和四分类
        else:
            # 统计
            fpr = {}
            tpr = {}
            roc_auc = {}
            for i in range(class_num):
                fpr[i], tpr[i], threshold = metrics.roc_curve(true_label[:,i],preds[:,i])
                roc_auc[i] = metrics.auc(fpr[i],tpr[i])
            # 方法2：计算micro-average ROC 曲线
            fpr["micro"],tpr["micro"], threshold = metrics.roc_curve(true_label.ravel(),preds.ravel())
            roc_auc["micro"] = metrics.auc(fpr["micro"],tpr["micro"])
            # 方法1：计算macro-average ROC曲线
            all_fpr = np.unique(np.concatenate([fpr[i] for i in range(class_num)]))
            mean_tpr = np.zeros_like(all_fpr)
            for i in range(class_num):
                mean_tpr += interp(all_fpr,fpr[i],tpr[i])
            mean_tpr /= class_num
            fpr["macro"] = all_fpr
            tpr["macro"] = mean_tpr
            roc_auc["macro"] = auc(fpr["macro"],tpr["macro"])
            # 绘制macro的ROC曲线和micro的ROC曲线
            plt.plot(fpr["micro"],tpr["micro"],color="blue",lw=2,label='micro-average ROC fold %d(area=%0.2f)' % (k,roc_auc["micro"]))
            plt.plot(fpr["macro"],tpr["macro"],color="black",lw=2,label='macro-average ROC fold %d(area=%0.2f)' % (k,roc_auc["macro"]))
            # 对每一折的每一个类绘制roc曲线
            colors = cycle(["darkorange","limegreen","cornflowerblue","pink"])
            for i, color in zip(range(class_num),colors):
                plt.plot(fpr[i],tpr[i],lw=1,alpha=0.3,color=color,label = 'ROC fold %d of class %d(area=%0.2f)' % (k, i, roc_auc[i]))
    plt.xlim([-0.01,1.01])
    plt.ylim([-0.01,1.01])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic -- '+ method+" ( "+str(class_num)+" categories )")
    if class_num == 2:
        plt.legend(bbox_to_anchor=(1.5,0.5),loc='center right',ncol=1)
    if class_num == 3:
        plt.legend(bbox_to_anchor=(3.2,0.5),loc='center right',ncol=3)
    if class_num == 4:
        plt.legend(bbox_to_anchor=(3.9,0.5),loc='center right',ncol=4)
#     plt.show()
    store_path = path+"ROC_"+method+"("+str(class_num)+" categories)"+".png"
    plt.savefig(store_path,bbox_inches='tight')
    plt.close()
    
for method in methods:
    for class_id in class_name:
        plot_roc_curve_perclf_perclass(data_dict,method,class_id,k_fold,save_figure_dir)

  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr

  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])


In [6]:
# 绘制每个分类器的每种分类方式的roc曲线（包括10折的mean_roc）
def plot_mean_roc_perclf_perclass(data_dict,method,class_id,k_fold,path):
    plt.figure(figsize=(2.75,1.96),dpi=300)
    plt.plot([0,1],[0,1],linestyle="--",lw=2,color='r',label='Luck',alpha=0.8)
    # 获得当前分类个数
    class_num = int(class_id[-1])
    # 用于二分类
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0,1,100)
    # 用于三分类和四分类
    micro_tprs = []
    micro_aucs = []
    micro_mean_fpr = np.linspace(0,1,100)
    macro_tprs = []
    macro_aucs = []
    macro_mean_fpr = np.linspace(0,1,100)
    for k in k_fold:
        true_label = data_dict["true_test_label"][method][class_id][str(k)+"-fold"]
        preds = data_dict["predicted_test_score"][method][class_id][str(k)+"-fold"]
        # 对标签进行二值化
        true_label = label_binarize(true_label, classes = range(class_num))
        # 二分类
        if class_num == 2:
            fpr, tpr, threshold = metrics.roc_curve(true_label,preds[:,1])
            roc_auc = metrics.auc(fpr,tpr)
            # interp:插值 把结果添加到tprs列表中
            tprs.append(interp(mean_fpr,fpr,tpr))
            tprs[-1][0] = 0.0
            aucs.append(roc_auc)
            # 对每一折的每一个类绘制roc曲线
#             plt.plot(fpr,tpr,lw=1,alpha=0.3,label = 'ROC fold %d(area=%0.2f)' % (k, roc_auc))
        # 三分类和四分类
        else:
            # 统计
            fpr = {}
            tpr = {}
            roc_auc = {}
            for i in range(class_num):
                fpr[i], tpr[i], threshold = metrics.roc_curve(true_label[:,i],preds[:,i])
                roc_auc[i] = metrics.auc(fpr[i],tpr[i])
            # 方法2：计算micro-average ROC 曲线
            fpr["micro"],tpr["micro"], threshold = metrics.roc_curve(true_label.ravel(),preds.ravel())
            roc_auc["micro"] = metrics.auc(fpr["micro"],tpr["micro"])
            # interp:插值 把结果添加到micro_tprs列表中
            micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
            micro_tprs[-1][0] = 0.0
            micro_aucs.append(roc_auc["micro"])
            
            # 方法1：计算macro-average ROC曲线
            all_fpr = np.unique(np.concatenate([fpr[i] for i in range(class_num)]))
            mean_tpr = np.zeros_like(all_fpr)
            for i in range(class_num):
                mean_tpr += interp(all_fpr,fpr[i],tpr[i])
            mean_tpr /= class_num
            fpr["macro"] = all_fpr
            tpr["macro"] = mean_tpr
            roc_auc["macro"] = auc(fpr["macro"],tpr["macro"])
            # interp:插值 把结果添加到micro_tprs列表中
            macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
            macro_tprs[-1][0] = 0.0
            macro_aucs.append(roc_auc["macro"])
            # 绘制每一折的macro的ROC曲线和micro的ROC曲线
#             plt.plot(fpr["micro"],tpr["micro"],lw=1,alpha=0.3,label='micro-average ROC fold %d(area=%0.2f)' % (k,roc_auc["micro"]))
#             plt.plot(fpr["macro"],tpr["macro"],lw=1,alpha=0.3,label='macro-average ROC fold %d(area=%0.2f)' % (k,roc_auc["macro"]))
    plt.xlim([-0.01,1.01])
    plt.ylim([-0.01,1.01])
    plt.xlabel('False Positive Rate',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    plt.ylabel('True Positive Rate',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    plt.yticks(fontproperties='Times New Roman', size=12, weight='bold')  #设置大小及加粗
    plt.xticks(fontproperties='Times New Roman', size=12, weight='bold')
#     plt.tick_params(labelsize=18)
#     plt.title('Receiver Operating Characteristic -- '+ method+" ( "+str(class_num)+" categories )")
#     plt.rcParams.update({'font.size': 14})
    # 设置坐标轴粗细
#     ax=plt.gca();#获得坐标轴的句柄
#     ax.spines['bottom'].set_linewidth(2);###设置底部坐标轴的粗细
#     ax.spines['left'].set_linewidth(2);####设置左边坐标轴的粗细
#     ax.spines['right'].set_linewidth(2);###设置右边坐标轴的粗细
#     ax.spines['top'].set_linewidth(2);####设置上部坐标轴的粗细
    if class_num == 2:
        # 绘制二分类的mean roc曲线
        mean_tpr = np.mean(tprs,axis=0)
        mean_tpr[-1] = 1.0
        mean_auc = metrics.auc(mean_fpr,mean_tpr)
        plt.plot(mean_fpr,mean_tpr,color="darkorange",lw=4,alpha=0.8,label='Mean ROC(area=%0.2f)'%mean_auc)
#         plt.legend(bbox_to_anchor=(1.5,0.5),loc='center right',ncol=1)
        plt.legend(loc='lower right',prop={'family' : 'Times New Roman', 'size' : 9,'weight':'bold'})
    else:
        # 绘制三分类和四分类的mean roc曲线
        micro_mean_tpr = np.mean(micro_tprs,axis=0)
        micro_mean_tpr[-1]=1.0
        micro_mean_auc = metrics.auc(micro_mean_fpr,micro_mean_tpr)
        plt.plot(micro_mean_fpr,micro_mean_tpr,color="darkorange",lw=4,alpha=0.8,label='Mean Micro-Average ROC(area=%0.2f)'%micro_mean_auc)
        macro_mean_tpr = np.mean(macro_tprs,axis=0)
        macro_mean_tpr[-1]=1.0
        macro_mean_auc = metrics.auc(macro_mean_fpr,macro_mean_tpr)
        plt.plot(macro_mean_fpr,macro_mean_tpr,color="cornflowerblue",lw=4,alpha=0.8,label='Mean Macro-Average ROC(area=%0.2f)'%macro_mean_auc)
        if class_num == 3:
            plt.legend(loc='lower right',prop={'family' : 'Times New Roman', 'size' : 9,'weight':'bold'})
#             plt.legend(bbox_to_anchor=(2.45,0.5),loc='center right',ncol=2)
        if class_num == 4:
            plt.legend(loc='lower right',prop={'family' : 'Times New Roman', 'size' : 9,'weight':'bold'})
#             plt.legend(bbox_to_anchor=(2.45,0.5),loc='center right',ncol=2)
#     plt.show()
    store_path = path+"Mean_ROC_"+method+"("+str(class_num)+" categories)"+".svg"
    plt.savefig(store_path,bbox_inches='tight',dpi=300)
    plt.close()

for method in methods:
    for class_id in class_name:
        plot_mean_roc_perclf_perclass(data_dict,method,class_id,k_fold,save_figure_dir)

  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["m

  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["m

  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr

  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr

In [7]:
# 同一分类器不同分类方式的比较
def plot_preclf_allclass(data_dict,methods,class_name,k_fold,path):
    for method in methods:
        plt.plot([0,1],[0,1],linestyle="--",lw=2,color='r',label='Luck',alpha=1)
        for class_id in class_name:
            # 获得当前分类个数
            class_num = int(class_id[-1])
            # 用于二分类
            tprs = []
            aucs = []
            mean_fpr = np.linspace(0,1,100)
            # 用于三分类和四分类
            micro_tprs = []
            micro_aucs = []
            micro_mean_fpr = np.linspace(0,1,100)
            macro_tprs = []
            macro_aucs = []
            macro_mean_fpr = np.linspace(0,1,100)
            for k in k_fold:
                true_label = data_dict["true_test_label"][method][class_id][str(k)+"-fold"]
                preds = data_dict["predicted_test_score"][method][class_id][str(k)+"-fold"]
                # 对标签进行二值化
                true_label = label_binarize(true_label, classes = range(class_num))
                # 二分类
                if class_num == 2:
                    fpr, tpr, threshold = metrics.roc_curve(true_label,preds[:,1])
                    roc_auc = metrics.auc(fpr,tpr)
                    # interp:插值 把结果添加到tprs列表中
                    tprs.append(interp(mean_fpr,fpr,tpr))
                    tprs[-1][0] = 0.0
                    aucs.append(roc_auc)
                    # 对每一折的每一个类绘制roc曲线
                    # plt.plot(fpr,tpr,lw=1,alpha=0.3,label = 'ROC fold %d(area=%0.2f)' % (k, roc_auc))
                # 三分类和四分类
                else:
                    # 统计
                    fpr = {}
                    tpr = {}
                    roc_auc = {}
                    for i in range(class_num):
                        fpr[i], tpr[i], threshold = metrics.roc_curve(true_label[:,i],preds[:,i])
                        roc_auc[i] = metrics.auc(fpr[i],tpr[i])
                    # 方法2：计算micro-average ROC 曲线
                    fpr["micro"],tpr["micro"], threshold = metrics.roc_curve(true_label.ravel(),preds.ravel())
                    roc_auc["micro"] = metrics.auc(fpr["micro"],tpr["micro"])
                    # interp:插值 把结果添加到micro_tprs列表中
                    micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
                    micro_tprs[-1][0] = 0.0
                    micro_aucs.append(roc_auc["micro"])

                    # 方法1：计算macro-average ROC曲线
                    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(class_num)]))
                    mean_tpr = np.zeros_like(all_fpr)
                    for i in range(class_num):
                        mean_tpr += interp(all_fpr,fpr[i],tpr[i])
                    mean_tpr /= class_num
                    fpr["macro"] = all_fpr
                    tpr["macro"] = mean_tpr
                    roc_auc["macro"] = auc(fpr["macro"],tpr["macro"])
                    # interp:插值 把结果添加到micro_tprs列表中
                    macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
                    macro_tprs[-1][0] = 0.0
                    macro_aucs.append(roc_auc["macro"])
                    # 绘制每一折的macro的ROC曲线和micro的ROC曲线
                    # plt.plot(fpr["micro"],tpr["micro"],lw=1,alpha=0.3,label='micro-average ROC fold %d(area=%0.2f)' % (k,roc_auc["micro"]))
                    # plt.plot(fpr["macro"],tpr["macro"],lw=1,alpha=0.3,label='macro-average ROC fold %d(area=%0.2f)' % (k,roc_auc["macro"]))
            if class_num == 2:
                # 绘制二分类的mean roc曲线
                mean_tpr = np.mean(tprs,axis=0)
                mean_tpr[-1] = 1.0
                mean_auc = metrics.auc(mean_fpr,mean_tpr)
                plt.plot(mean_fpr,mean_tpr,color="blue",lw=2,alpha=0.8,label='Mean ROC of two classifications(area=%0.2f)'%mean_auc)
            else:
                # 绘制三分类和四分类的mean roc曲线
                micro_mean_tpr = np.mean(micro_tprs,axis=0)
                micro_mean_tpr[-1]=1.0
                micro_mean_auc = metrics.auc(micro_mean_fpr,micro_mean_tpr)
                macro_mean_tpr = np.mean(macro_tprs,axis=0)
                macro_mean_tpr[-1]=1.0
                macro_mean_auc = metrics.auc(macro_mean_fpr,macro_mean_tpr)
                if class_num == 3:
                    plt.plot(micro_mean_fpr,micro_mean_tpr,color="darkgreen",lw=2,alpha=0.8,label='Mean Micro-Average ROC of three classifications(area=%0.2f)'%micro_mean_auc)
                    plt.plot(macro_mean_fpr,macro_mean_tpr,color="darkorange",lw=2,alpha=0.8,label='Mean Macro-Average ROC of three classifications(area=%0.2f)'%macro_mean_auc)
                if class_num == 4:
                    plt.plot(micro_mean_fpr,micro_mean_tpr,color="slategray",lw=2,alpha=0.8,label='Mean Micro-Average ROC of four classifications(area=%0.2f)'%micro_mean_auc)
                    plt.plot(macro_mean_fpr,macro_mean_tpr,color="pink",lw=2,alpha=0.8,label='Mean Macro-Average ROC of four classifications(area=%0.2f)'%macro_mean_auc)
        plt.xlim([-0.01,1.01])
        plt.ylim([-0.01,1.01])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic -- '+ method+" Classifier Multiclass Comparison")
        plt.legend(bbox_to_anchor=(2.05,0.5),loc='center right',ncol=1)
        # plt.show()
        store_path = path+method+" Classifier Multicalss Comparison"+".png"
        plt.savefig(store_path,bbox_inches='tight')
        plt.close()
        
plot_preclf_allclass(data_dict,methods,class_name,k_fold,save_figure_dir)

  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["m

  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["m

  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["m

## classifier comparison

In [17]:
# 同一分类方式不同分类器的比较
def plot_perclass_allclf(data_dict,methods,class_name,k_fold,path):
    for class_id in class_name:
        plt.figure(figsize=(7,5),dpi=300)
        plt.plot([0,1],[0,1],linestyle="--",lw=2,color='r',label='Luck',alpha=1)
        # 获得当前分类个数
        class_num = int(class_id[-1])
        colors = ["blue","darkgreen","darkorange","slategray","pink"]
        for method in methods:
            # 用于二分类
            tprs = []
            aucs = []
            mean_fpr = np.linspace(0,1,100)
            # 用于三分类和四分类
            micro_tprs = []
            micro_aucs = []
            micro_mean_fpr = np.linspace(0,1,100)
            macro_tprs = []
            macro_aucs = []
            macro_mean_fpr = np.linspace(0,1,100)
            for k in k_fold:
                true_label = data_dict["true_test_label"][method][class_id][str(k)+"-fold"]
                preds = data_dict["predicted_test_score"][method][class_id][str(k)+"-fold"]
                # 对标签进行二值化
                true_label = label_binarize(true_label, classes = range(class_num))
                # 二分类
                if class_num == 2:
                    fpr, tpr, threshold = metrics.roc_curve(true_label,preds[:,1])
                    roc_auc = metrics.auc(fpr,tpr)
                    # interp:插值 把结果添加到tprs列表中
                    tprs.append(interp(mean_fpr,fpr,tpr))
                    tprs[-1][0] = 0.0
                    aucs.append(roc_auc)
                    # 对每一折的每一个类绘制roc曲线
                    # plt.plot(fpr,tpr,lw=1,alpha=0.3,label = 'ROC fold %d(area=%0.2f)' % (k, roc_auc))
                # 三分类和四分类
                else:
                    # 统计
                    fpr = {}
                    tpr = {}
                    roc_auc = {}
                    for i in range(class_num):
                        fpr[i], tpr[i], threshold = metrics.roc_curve(true_label[:,i],preds[:,i])
                        roc_auc[i] = metrics.auc(fpr[i],tpr[i])
                    # 方法2：计算micro-average ROC 曲线
                    fpr["micro"],tpr["micro"], threshold = metrics.roc_curve(true_label.ravel(),preds.ravel())
                    roc_auc["micro"] = metrics.auc(fpr["micro"],tpr["micro"])
                    # interp:插值 把结果添加到micro_tprs列表中
                    micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
                    micro_tprs[-1][0] = 0.0
                    micro_aucs.append(roc_auc["micro"])

                    # 方法1：计算macro-average ROC曲线
                    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(class_num)]))
                    mean_tpr = np.zeros_like(all_fpr)
                    for i in range(class_num):
                        mean_tpr += interp(all_fpr,fpr[i],tpr[i])
                    mean_tpr /= class_num
                    fpr["macro"] = all_fpr
                    tpr["macro"] = mean_tpr
                    roc_auc["macro"] = auc(fpr["macro"],tpr["macro"])
                    # interp:插值 把结果添加到micro_tprs列表中
                    macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
                    macro_tprs[-1][0] = 0.0
                    macro_aucs.append(roc_auc["macro"])
                    # 绘制每一折的macro的ROC曲线和micro的ROC曲线
                    # plt.plot(fpr["micro"],tpr["micro"],lw=1,alpha=0.3,label='micro-average ROC fold %d(area=%0.2f)' % (k,roc_auc["micro"]))
                    # plt.plot(fpr["macro"],tpr["macro"],lw=1,alpha=0.3,label='macro-average ROC fold %d(area=%0.2f)' % (k,roc_auc["macro"]))
            if class_num == 2:
                # 绘制二分类的mean roc曲线
                mean_tpr = np.mean(tprs,axis=0)
                mean_tpr[-1] = 1.0
                mean_auc = metrics.auc(mean_fpr,mean_tpr)
                if method == "RandomForest":
                    plt.plot(mean_fpr,mean_tpr,lw=2,alpha=0.8,label='Mean ROC of '+'RF'+'(area=%0.2f)'%mean_auc)
                else:
                    plt.plot(mean_fpr,mean_tpr,lw=2,alpha=0.8,label='Mean ROC of '+method+'(area=%0.2f)'%mean_auc)
            else:
                # 绘制三分类和四分类的mean roc曲线
                micro_mean_tpr = np.mean(micro_tprs,axis=0)
                micro_mean_tpr[-1]=1.0
                micro_mean_auc = metrics.auc(micro_mean_fpr,micro_mean_tpr)
                macro_mean_tpr = np.mean(macro_tprs,axis=0)
                macro_mean_tpr[-1]=1.0
                macro_mean_auc = metrics.auc(macro_mean_fpr,macro_mean_tpr)
                if class_num == 3:
                    if method == "RandomForest":
                        plt.plot(micro_mean_fpr,micro_mean_tpr,lw=2,alpha=0.8,label='Micro '+'RF'+'(area=%0.2f)'%micro_mean_auc)
                        plt.plot(macro_mean_fpr,macro_mean_tpr,lw=2,alpha=0.8,label='Macro '+'RF'+'(area=%0.2f)'%macro_mean_auc)
                    else:
                        plt.plot(micro_mean_fpr,micro_mean_tpr,lw=2,alpha=0.8,label='Micro '+method+'(area=%0.2f)'%micro_mean_auc)
                        plt.plot(macro_mean_fpr,macro_mean_tpr,lw=2,alpha=0.8,label='Macro '+method+'(area=%0.2f)'%macro_mean_auc)
                if class_num == 4:
                    if method == "RandomForest":
                        plt.plot(micro_mean_fpr,micro_mean_tpr,lw=2,alpha=0.8,label='Micro '+'RF'+'(area=%0.2f)'%micro_mean_auc)
                        plt.plot(macro_mean_fpr,macro_mean_tpr,lw=2,alpha=0.8,label='Macro '+'RF''(area=%0.2f)'%macro_mean_auc)
                    else:
                        plt.plot(micro_mean_fpr,micro_mean_tpr,lw=2,alpha=0.8,label='Micro '+method+'(area=%0.2f)'%micro_mean_auc)
                        plt.plot(macro_mean_fpr,macro_mean_tpr,lw=2,alpha=0.8,label='Macro '+method+'(area=%0.2f)'%macro_mean_auc)
        plt.xlim([-0.01,1.01])
        plt.ylim([-0.01,1.01])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
        plt.ylabel('True Positive Rate',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
        plt.yticks(fontproperties='Times New Roman', size=12, weight='bold')  #设置大小及加粗
        plt.xticks(fontproperties='Times New Roman', size=12, weight='bold')
        if class_num==2:
#             plt.title('Receiver Operating Characteristic -- '+ "Classifier Comparison Based On Two Classifications")
#             plt.legend(bbox_to_anchor=(0.5,-0.35),loc='lower center',ncol=2)
            plt.legend(loc='lower right',prop={'family' : 'Times New Roman', 'size' : 9,'weight':'bold'})
            store_path = path+"Classifier Comparison Based On Two Classifications"+".svg"
        if class_num==3:
#             plt.title('Receiver Operating Characteristic -- '+ "Classifier Comparison Based On Three Classifications")
#             plt.legend(bbox_to_anchor=(0.5,-0.5),loc='lower center',ncol=2)
            plt.legend(loc='lower right',prop={'family' : 'Times New Roman', 'size' : 9,'weight':'bold'},ncol=1)
            store_path = path+"Classifier Comparison Based On Three Classifications"+".svg"
        if class_num==4:
#             plt.title('Receiver Operating Characteristic -- '+ "Classifier Comparison Based On Four Classifications")
#             plt.legend(bbox_to_anchor=(0.5,-0.5),loc='lower center',ncol=2)
            plt.legend(loc='lower right',prop={'family' : 'Times New Roman', 'size' : 9,'weight':'bold'},ncol=1)
            store_path = path+"Classifier Comparison Based On Four Classifications"+".svg"
#         plt.show()
        plt.savefig(store_path,bbox_inches='tight',dpi=300)
        plt.close()
        
plot_perclass_allclf(data_dict,methods,class_name,k_fold,save_figure_dir)

  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))
  tprs.append(interp(mean_fpr,fpr,tpr))


  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr

  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr += interp(all_fpr,fpr[i],tpr[i])
  macro_tprs.append(interp(macro_mean_fpr,fpr["macro"],tpr["macro"]))
  micro_tprs.append(interp(micro_mean_fpr,fpr["micro"],tpr["micro"]))
  mean_tpr

### 绘制柱形图

In [4]:
# 准备训练集，验证集准确率数据
acc_data = np.loadtxt("acc_data_time_supplement.txt")
acc_dict = {}
algorithms = ["SIFT-BOW","iDT-BOW","C3D","C3D-Pre","Our Method"]
classifiers = ["SVM","KNN","DT","MLP","RF","RVM"]
choose_classifiers = ["SVM","KNN","DT","MLP","RF","RVM"]
acc_type = ["Train_Acc","Test_Acc","Time"]
row_index=0
for class_id in class_name:
    acc_dict[class_id] = {}
    for clf in classifiers:
        acc_dict[class_id][clf] = {}
        for alg in algorithms:
            acc_dict[class_id][clf][alg] = {}
            for col_index, step in enumerate(acc_type):
                acc_dict[class_id][clf][alg][step] = acc_data[row_index,col_index]
            row_index += 1
acc_dict

{'class2': {'SVM': {'SIFT-BOW': {'Train_Acc': 0.716,
    'Test_Acc': 0.664,
    'Time': 1.3867},
   'iDT-BOW': {'Train_Acc': 0.752, 'Test_Acc': 0.695, 'Time': 173.434},
   'C3D': {'Train_Acc': 0.589, 'Test_Acc': 0.548, 'Time': 0.0077},
   'C3D-Pre': {'Train_Acc': 0.904, 'Test_Acc': 0.897, 'Time': 0.0077},
   'Our Method': {'Train_Acc': 0.853, 'Test_Acc': 0.845, 'Time': 0.1631}},
  'KNN': {'SIFT-BOW': {'Train_Acc': 0.814, 'Test_Acc': 0.695, 'Time': 1.3857},
   'iDT-BOW': {'Train_Acc': 0.79, 'Test_Acc': 0.69, 'Time': 173.4244},
   'C3D': {'Train_Acc': 0.589, 'Test_Acc': 0.548, 'Time': 0.0077},
   'C3D-Pre': {'Train_Acc': 0.904, 'Test_Acc': 0.897, 'Time': 0.0077},
   'Our Method': {'Train_Acc': 0.784, 'Test_Acc': 0.697, 'Time': 0.1635}},
  'DT': {'SIFT-BOW': {'Train_Acc': 0.766, 'Test_Acc': 0.683, 'Time': 1.3735},
   'iDT-BOW': {'Train_Acc': 0.765, 'Test_Acc': 0.701, 'Time': 173.7096},
   'C3D': {'Train_Acc': 0.589, 'Test_Acc': 0.548, 'Time': 0.0077},
   'C3D-Pre': {'Train_Acc': 0.904, 'T

### Algorithms Comparison

In [19]:
# 绘制训练集，验证集准确率图
for class_id in class_name:
    plt.figure(figsize=(12,8),dpi=100)
    train = [[],[],[],[]]  # 每一个元素代表一个算法
    test = [[],[],[],[]]
    for clf in choose_classifiers:
        algors = ["SIFT-BOW","iDT-BOW","C3D","Our Method"]
        for index,alg in enumerate(algors):
            train[index].append(acc_dict[class_id][clf][alg]["Train_Acc"])
            test[index].append(acc_dict[class_id][clf][alg]["Test_Acc"])
    # 画图 -- 每种分类方式下的算法比较
    label_name = ["SIFT-BOW","iDT-BOW","C3D","Our Method"]
    fig, ax = plt.subplots(figsize=(7,5),dpi=300)
    all_width = 1.2  # 八个条柱的总宽度
    width = all_width/4  # 一个条柱的宽度
    onset = width / 2  # 偶数组：偏移量是半个条柱的宽度
    # 创建x坐标
#     sift_train_x,sift_test_x = [x-(onset+3*width) for x in range(0,12,2)],[x-(onset+2*width) for x in range(0,12,2)]
#     idt_train_x,idt_test_x = [x-(onset+1*width) for x in range(0,12,2)],[x-(onset+0*width) for x in range(0,12,2)]
#     c3d_train_x,c3d_test_x = [x+(onset+0*width) for x in range(0,12,2)],[x+(onset+1*width) for x in range(0,12,2)]
#     our_train_x,our_test_x = [x+(onset+2*width) for x in range(0,12,2)],[x+(onset+3*width) for x in range(0,12,2)]
    sift_test_x = [x-(onset+1*width) for x in range(0,12,2)]
    idt_test_x = [x-(onset+0*width) for x in range(0,12,2)]
    c3d_test_x = [x+(onset+0*width) for x in range(0,12,2)]
    our_test_x = [x+(onset+1*width) for x in range(0,12,2)]
    # 绘图
#     ax.bar(sift_train_x,train[0], width=width, color="mediumseagreen",alpha=1,label=label_name[0]+"_Train")
    ax.bar(sift_test_x,test[0], width=width, color="mediumseagreen",alpha=1,label=label_name[0]+"_Test")
#     ax.bar(idt_train_x,train[1], width=width, color="deepskyblue",alpha=1,label=label_name[1]+"_Train")
    ax.bar(idt_test_x,test[1], width=width, color="deepskyblue",alpha=1,label=label_name[1]+"_Test")
#     ax.bar(c3d_train_x,train[2], width=width, color="gold",alpha=1,label=label_name[2]+"_Train")
    ax.bar(c3d_test_x,test[2], width=width, color="gold",alpha=1,label=label_name[2]+"_Test")
#     ax.bar(our_train_x,train[3], width=width, color="darkorange",alpha=1,label=label_name[3]+"_Train")
    ax.bar(our_test_x,test[3], width=width, color="darkorange",alpha=1,label=label_name[3]+"_Test")
    # 修改刻度数量，并且修正为名称
    ax.set_xticks(range(0,12,2))
    ax.set_xticklabels(choose_classifiers)
    ax.set_ylim([0,1])
    ax.set_ylabel('Accuarcy')
#     ax.set_xlabel('Classifiers Name')
#     ax.set_xlabel('Classifiers Name',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    ax.set_ylabel('Accuracy',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    plt.yticks(fontproperties='Times New Roman', size=12, weight='bold')  #设置大小及加粗
    plt.xticks(fontproperties='Times New Roman', size=12, weight='bold')
    if class_id == "class2":
#         ax.set_title("Algorithms Comparison Based On Two Classifications")
        store_path = save_figure_dir+"Algorithms Comparison Based On Two Classifications"+".svg"
    if class_id == "class3":
#         ax.set_title("Algorithms Comparison Based On Three Classifications")
        store_path = save_figure_dir+"Algorithms Comparison Based On Three Classifications"+".svg"
    if class_id == "class4":
#         ax.set_title("Algorithms Comparison Based On Four Classifications")
        store_path = save_figure_dir+"Algorithms Comparison Based On Four Classifications"+".svg"
#     ax.legend(bbox_to_anchor=(1.4,0.5),loc='center right',ncol=1)
    ax.legend(bbox_to_anchor=(0.5,1),loc='upper center',ncol=4,prop={'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    plt.savefig(store_path,bbox_inches='tight')
    plt.close()

<Figure size 1200x800 with 0 Axes>

<Figure size 1200x800 with 0 Axes>

<Figure size 1200x800 with 0 Axes>

## classification comparison

In [5]:
# 绘制训练集，验证集准确率图
for clf in choose_classifiers:
    train = [[],[],[],[]]  # 每一个元素代表一个算法
    test = [[],[],[],[]]
    for class_id in class_name:
        algors = ["SIFT-BOW","iDT-BOW","C3D","Our Method"]
        for index,alg in enumerate(algors):
            train[index].append(acc_dict[class_id][clf][alg]["Train_Acc"])
            test[index].append(acc_dict[class_id][clf][alg]["Test_Acc"])
    # 画图 -- 每种分类方式下的算法比较
    label_name = ["SIFT-BOW","iDT-BOW","C3D","Our Method"]
    classifications = ["Binary Classification","Triple Classification","Quadruple Classification"]
#    plt.figure(figsize=(7,5),dpi=300)
    fig, ax = plt.subplots(figsize=(7,5),dpi=300)
    all_width = 1.2  # 八个条柱的总宽度
    width = all_width/4  # 一个条柱的宽度
    onset = width / 2  # 偶数组：偏移量是半个条柱的宽度
    # 创建x坐标
#     sift_train_x,sift_test_x = [x-(onset+3*width) for x in range(0,6,2)],[x-(onset+2*width) for x in range(0,6,2)]
#     idt_train_x,idt_test_x = [x-(onset+1*width) for x in range(0,6,2)],[x-(onset+0*width) for x in range(0,6,2)]
#     c3d_train_x,c3d_test_x = [x+(onset+0*width) for x in range(0,6,2)],[x+(onset+1*width) for x in range(0,6,2)]
#     our_train_x,our_test_x = [x+(onset+2*width) for x in range(0,6,2)],[x+(onset+3*width) for x in range(0,6,2)]
    sift_test_x = [x-(onset+1*width) for x in range(0,6,2)]
    idt_test_x = [x-(onset+0*width) for x in range(0,6,2)]
    c3d_test_x = [x+(onset+0*width) for x in range(0,6,2)]
    our_test_x = [x+(onset+1*width) for x in range(0,6,2)]
    # 绘图
#     ax.bar(sift_train_x,train[0], width=width, color="mediumseagreen",alpha=1,label=label_name[0]+"_Train")
    ax.bar(sift_test_x,test[0], width=width, color="mediumseagreen",alpha=1,label=label_name[0]+"_Test")
#     ax.bar(idt_train_x,train[1], width=width, color="deepskyblue",alpha=1,label=label_name[1]+"_Train")
    ax.bar(idt_test_x,test[1], width=width, color="deepskyblue",alpha=1,label=label_name[1]+"_Test")
#     ax.bar(c3d_train_x,train[2], width=width, color="gold",alpha=1,label=label_name[2]+"_Train")
    ax.bar(c3d_test_x,test[2], width=width, color="gold",alpha=1,label=label_name[2]+"_Test")
#     ax.bar(our_train_x,train[3], width=width, color="darkorange",alpha=1,label=label_name[3]+"_Train")
    ax.bar(our_test_x,test[3], width=width, color="darkorange",alpha=1,label=label_name[3]+"_Test")
    # 修改刻度数量，并且修正为名称
    ax.set_xticks(range(0,6,2))
    ax.set_xticklabels(classifications)
    ax.set_ylim([0,1])
    ax.set_ylabel('Accuarcy')
#     ax.set_xlabel('Classifications')
#     ax.set_xlabel('Classifications',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    ax.set_ylabel('Accuracy',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    plt.yticks(fontproperties='Times New Roman', size=12, weight='bold')  #设置大小及加粗
    plt.xticks(fontproperties='Times New Roman', size=12, weight='bold')
#     plt.rcParams.update({'font.size': 8})
#     ax.set_title("Classifications Comparison Based On "+clf)
    store_path = save_figure_dir+"Classifications Comparison Based On "+clf+".eps"
#     ax.legend(bbox_to_anchor=(1.4,0.5),loc='center right',ncol=1)
    ax.legend(bbox_to_anchor=(0.5,1),loc='upper center',ncol=4,prop={'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    plt.savefig(store_path,bbox_inches='tight')
    plt.close()

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.


### Algorithms and classifiers comparison -- 柱状图

In [62]:
# 对test数据进行降序排序
# 绘制训练集，验证集准确率图
import matplotlib.patches as mpatches
for class_id in class_name:
    train = {}
    test = {}
    for clf in choose_classifiers:
        algors = ["SIFT-BOW","iDT-BOW","C3D","Our Method"]
        for index,alg in enumerate(algors):
            if alg=="Our Method":
                alg = "ST Feature"
                train[alg+"-"+clf] = acc_dict[class_id][clf]["Our Method"]["Train_Acc"]
                test[alg+"-"+clf] = acc_dict[class_id][clf]["Our Method"]["Test_Acc"]
            else:
                train[alg+"-"+clf] = acc_dict[class_id][clf][alg]["Train_Acc"]
                test[alg+"-"+clf] = acc_dict[class_id][clf][alg]["Test_Acc"]
    # 对train test进行排序
    train_sort = sorted(train.items(),key=lambda d:d[1],reverse=True)
    test_sort = sorted(test.items(),key=lambda d:d[1],reverse=True)
    train_sort_data = [item[1] for item in train_sort]
    train_sort_label = [item[0] for item in train_sort]
    test_sort_data = [item[1] for item in test_sort]
    test_sort_label = [item[0] for item in test_sort]
    # 对 our method 颜色进行特殊标记
    colors_train = ["deepskyblue" for i in range(len(train_sort_label))]
    for index, name in enumerate(train_sort_label):
        if name.split('-')[0] == "ST Feature":
            colors_train[index] = "darkorange"     
    colors_test = ["deepskyblue" for i in range(len(test_sort_label))]
    for index, name in enumerate(test_sort_label):
        if name.split('-')[0] == "ST Feature":
            colors_test[index] = "darkorange"
    # 画图 -- 每种分类方式下的算法比较
    fig, ax = plt.subplots(figsize=(12,8),dpi=300)
    x = np.arange(0,24,1)
    ax.bar(x,test_sort_data,width=0.5,alpha=1,color=colors_test)
    ax.set_xticks(range(0,24,1))
    ax.set_xticklabels(test_sort_label,rotation=60,fontsize=7)
    ax.set_ylim([0,1])
    ax.set_ylabel('Accuarcy(Test)')
#     ax.set_xlabel('Algorithm-Classifier')
#     ax.set_xlabel('Algorithm-Classifier',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    ax.set_ylabel('Accuarcy(Test)',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    plt.yticks(fontproperties='Times New Roman', size=12, weight='bold')  #设置大小及加粗
    plt.xticks(fontproperties='Times New Roman', size=12, weight='bold')
    color = ["darkorange","deepskyblue"]
    labels = ["Our Method","Other Methods"]
    patches = [ mpatches.Patch(color=color[i], label="{:s}".format(labels[i]) ) for i in range(len(color)) ]
#     plt.rcParams.update({'font.size': 12})
    if class_id == "class2":
#         ax.set_title("Algorithms and Classifier Comparison Based On Two Classifications")
        store_path = save_figure_dir+"Algorithms and Classifier Comparison Based On Two Classifications"+".png"
    if class_id == "class3":
#         ax.set_title("Algorithms and Classifier Comparison Based On Three Classifications")
        store_path = save_figure_dir+"Algorithms and Classifier Comparison Based On Three Classifications"+".png"
    if class_id == "class4":
#         ax.set_title("Algorithms and Classifier Comparison Based On Four Classifications")
        store_path = save_figure_dir+"Algorithms and Classifier Comparison Based On Four Classifications"+".png"
#     ax.legend(bbox_to_anchor=(1.4,0.5),loc='center right',ncol=1)
    ax.legend(handles=patches,loc='upper right',prop={'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
    plt.savefig(store_path,bbox_inches='tight')
    plt.close()

### Algorithms and classifiers comparison -- 折线图

In [5]:
# 对test数据进行降序排序
# 绘制训练集，验证集准确率图
import matplotlib.patches as mpatches
line_chart_data = {}
for class_id in class_name:
    train = {}
    test = {}
    for clf in choose_classifiers:
        algors = ["SIFT-BOW","iDT-BOW","C3D","Our Method"]
        for index,alg in enumerate(algors):
            if alg=="Our Method":
                alg = "ST Feature"
                train[alg+"-"+clf] = acc_dict[class_id][clf]["Our Method"]["Train_Acc"]
                test[alg+"-"+clf] = acc_dict[class_id][clf]["Our Method"]["Test_Acc"]
            else:
                train[alg+"-"+clf] = acc_dict[class_id][clf][alg]["Train_Acc"]
                test[alg+"-"+clf] = acc_dict[class_id][clf][alg]["Test_Acc"]
    line_chart_data[class_id] = test
line_chart_data

{'class2': {'SIFT-BOW-SVM': 0.664,
  'iDT-BOW-SVM': 0.695,
  'C3D-SVM': 0.548,
  'ST Feature-SVM': 0.845,
  'SIFT-BOW-KNN': 0.695,
  'iDT-BOW-KNN': 0.69,
  'C3D-KNN': 0.548,
  'ST Feature-KNN': 0.697,
  'SIFT-BOW-DT': 0.683,
  'iDT-BOW-DT': 0.701,
  'C3D-DT': 0.548,
  'ST Feature-DT': 0.868,
  'SIFT-BOW-MLP': 0.673,
  'iDT-BOW-MLP': 0.687,
  'C3D-MLP': 0.548,
  'ST Feature-MLP': 0.731,
  'SIFT-BOW-RF': 0.693,
  'iDT-BOW-RF': 0.714,
  'C3D-RF': 0.548,
  'ST Feature-RF': 0.829,
  'SIFT-BOW-RVM': 0.689,
  'iDT-BOW-RVM': 0.69,
  'C3D-RVM': 0.548,
  'ST Feature-RVM': 0.788},
 'class3': {'SIFT-BOW-SVM': 0.513,
  'iDT-BOW-SVM': 0.438,
  'C3D-SVM': 0.333,
  'ST Feature-SVM': 0.624,
  'SIFT-BOW-KNN': 0.54,
  'iDT-BOW-KNN': 0.486,
  'C3D-KNN': 0.333,
  'ST Feature-KNN': 0.52,
  'SIFT-BOW-DT': 0.451,
  'iDT-BOW-DT': 0.455,
  'C3D-DT': 0.333,
  'ST Feature-DT': 0.593,
  'SIFT-BOW-MLP': 0.538,
  'iDT-BOW-MLP': 0.453,
  'C3D-MLP': 0.333,
  'ST Feature-MLP': 0.521,
  'SIFT-BOW-RF': 0.503,
  'iDT-BOW-

In [8]:
# 对test依据class2进行排序
test_sort = sorted(line_chart_data['class2'].items(),key=lambda d:d[1],reverse=True)
test_sort_label = [item[0] for item in test_sort]
test_sort_data = [[],[],[]]
for index, class_id in enumerate(class_name):
    for name in test_sort_label:
        test_sort_data[index].append(line_chart_data[class_id][name])

# 对每一类画图
# 对 our method 颜色进行特殊标记
markers_test = ["o" for i in range(len(test_sort_label))]
for index, name in enumerate(test_sort_label):
    if name.split('-')[0] == "ST Feature":
        markers_test[index] = "*"
marker_size_test = [10 for i in range(len(test_sort_label))]
for index, name in enumerate(test_sort_label):
    if name.split('-')[0] == "ST Feature":
        marker_size_test[index] = 100
        
line_color = ['darkorange','deepskyblue','limegreen']
marker_color_test = [['w' for i in range(len(test_sort_label))] for i in range(3)]
for i in range(3):
    for index, name in enumerate(test_sort_label):
        marker_color_test[i][index] = line_color[i]
        if name.split('-')[0] == "ST Feature":
            marker_color_test[i][index] = 'r'

# 画图 -- 每种分类方式下的算法比较
fig, ax = plt.subplots(figsize=(7,5),dpi=300)
x = np.arange(0,24,1)
label_plot_name = ['Binary Classification','Triple Classification','Quadruple Classification']
# for index in range(3):
ax1 = ax.plot(x,test_sort_data[0],color=line_color[0],marker='.',zorder=1,label=label_plot_name[0])
ax2 = ax.plot(x,test_sort_data[1],color=line_color[1],marker='.',zorder=1,label=label_plot_name[1])
ax3 = ax.plot(x,test_sort_data[2],color=line_color[2],marker='.',zorder=1,label=label_plot_name[2])
# 图例1
# legend1 = ax.legend(loc='upper right',prop={'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})

# 绘制散点图
for index in range(3):
    for xp,yp,m,s,color in zip(x,test_sort_data[index],markers_test,marker_size_test,marker_color_test[index]):
        ax.scatter(xp, yp, marker=m,color=color,s=s,zorder=2)        

ax.set_xticks(range(0,24,1))
ax.set_xticklabels(test_sort_label,rotation=60,fontsize=7)
ax.set_ylim([0,1])
ax.set_ylabel('Accuarcy(Test)')
#     ax.set_xlabel('Algorithm-Classifier')
#     ax.set_xlabel('Algorithm-Classifier',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
ax.set_ylabel('Accuarcy(Test)',fontdict= {'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
plt.yticks(fontproperties='Times New Roman', size=12, weight='bold')  #设置大小及加粗
plt.xticks(fontproperties='Times New Roman', size=12, weight='bold')
color = ['r']
labels = ["ST Feature"]
marker = ['*']
marker_size = [10,5,5,5]
patches = ax.plot([],[],marker=marker[0], color=color[0], ms=marker_size[0], ls="", label="{:s}".format(labels[0]))
store_path = save_figure_dir+"Algorithms and Classifier Comparison"+".svg"

lns =ax1+ax2+ax3+patches
labs = [l.get_label() for l in lns]
ax.legend(lns,labs,loc='upper right',prop={'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
# ax.legend(handles=patches,loc='lower right',prop={'family' : 'Times New Roman', 'size' : 12,'weight':'bold'})
# ax.add_artist(legend1)
plt.savefig(store_path,bbox_inches='tight')
plt.close()

In [65]:
# 绘制图 -- 时间比较
for class_id in class_name:
    time= {}
    for clf in choose_classifiers:
        algors = ["SIFT-BOW","iDT-BOW","C3D","Our Method"]
        for index,alg in enumerate(algors):
            time[alg+"-"+clf] = acc_dict[class_id][clf][alg]["Time"]
    # 对train test进行排序
    time_sort = sorted(time.items(),key=lambda d:d[1],reverse=True)
    time_sort_data = [item[1] for item in time_sort]
    time_sort_label = [item[0] for item in time_sort]
    # 对 our method 颜色进行特殊标记
    colors_time = ["deepskyblue" for i in range(len(time_sort_label))]
    for index, name in enumerate(time_sort_label):
        if name.split('-')[0] == "Our Method":
            colors_time[index] = "darkorange" 
            
    # 画图 -- 每种分类方式下的算法比较
    fig, (ax1, ax2) = plt.subplots(1,2,sharey=True)
    ax1.barh(range(len(time_sort_data)),time_sort_data,height=0.5,alpha=1,color=colors_time)
    ax2.barh(range(len(time_sort_data)),time_sort_data,height=0.5,alpha=1,color=colors_time)
    # 设置区间
    if class_id == "class2":
        ax1.set_xlim(0,1.7)
        ax2.set_xlim(171,174)
    if class_id == "class3":
        ax1.set_xlim(0,1.7)
        ax2.set_xlim(172,175)
    if class_id == "class4":
        ax1.set_xlim(0,1.7)
        ax2.set_xlim(177,180)
    # 隐藏ax和ax2的间隔
    ax1.spines['right'].set_visible(False)
    ax2.spines['left'].set_visible(False)
    # 设置倾斜度
    d=0.85
    # 绘制断裂处的标记
    kwargs = dict(marker=[(-1,-d),(1,d)],linestyle='none',color='black',clip_on=False)
    ax1.plot([1,1],[1,0],transform=ax1.transAxes,**kwargs)
    ax2.plot([0,0],[0,1],transform=ax2.transAxes,**kwargs)
#     # 为柱状图右侧添加标签
#     for index, rect in enumerate(b):
#         w = rect.get_width()
#         ax.text(w,rect.get_y()+rect.get_height()/2,"%0.4f"%w,ha="left",va="center")
    
    ax1.set_yticks(range(0,12,1))
    ax1.set_yticklabels(time_sort_label,rotation=0,fontsize=7)
#     ax.set_xlim([0,5])

    

    ax.set_xlabel('Time/Seconds')
    ax.set_ylabel('Algorithm-Classifier')
    if class_id == "class2":
        ax.set_title("Time Comparison Based On Two Classifications")
        store_path = save_figure_dir+"Time Comparison Based On Two Classifications"+".png"
    if class_id == "class3":
        ax.set_title("Time Comparison Based On Three Classifications")
        store_path = save_figure_dir+"Time Comparison Based On Three Classifications"+".png"
    if class_id == "class4":
        ax.set_title("Time Comparison Based On Four Classifications")
        store_path = save_figure_dir+"Time Comparison Based On Four Classifications"+".png"
    ax.legend(bbox_to_anchor=(1.4,0.5),loc='center right',ncol=1)
    plt.savefig(store_path,bbox_inches='tight')
    plt.close()

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
