# 模型评估

In [None]:
import os
import pickle
import torch
import warnings
from torch.utils.data import DataLoader
from torchvision import transforms
from utils import ISIC2018Dataset, Evaluation, load_model, plot_confusion_matrix, plot_roc_curves, plot_losses, Logger
warnings.filterwarnings('ignore')

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### 数据准备
我将对各个训练好的模型在训练集、验证集、测试集上进行评估，因而所有的图片都进行以下相同的预处理
- 最小边放缩至224
- 中心裁剪大小为[224, 224]的区域
- 转换成PyTorch使用的tensor形式
- 应用正则化

In [None]:
# 数据预处理
BATCH_SIZE = 32
NUM_WORKERS = 4

test_trans = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = ISIC2018Dataset(
    csv_file_path='./data/ISIC2018/Train_GroundTruth.csv',
    img_dir='./data/ISIC2018/ISIC2018_Task3_Training_Input',
    transform=test_trans
)

valid_dataset = ISIC2018Dataset(
    csv_file_path='./data/ISIC2018/ISIC2018_Task3_Validation_GroundTruth.csv',
    img_dir='./data/ISIC2018/ISIC2018_Task3_Validation_Input',
    transform=test_trans
)

test_dataset = ISIC2018Dataset(
    csv_file_path='./data/ISIC2018/Test_GroundTruth.csv',
    img_dir='./data/ISIC2018/ISIC2018_Task3_Training_Input',
    transform=test_trans
)

train_iter = DataLoader(train_dataset,
                        batch_size=BATCH_SIZE,
                        num_workers=NUM_WORKERS)
valid_iter = DataLoader(valid_dataset,
                        batch_size=BATCH_SIZE,
                        num_workers=NUM_WORKERS)
test_iter = DataLoader(test_dataset,
                       batch_size=BATCH_SIZE,
                       num_workers=NUM_WORKERS)

### 模型评估
为了评估ohem算法对类别倾斜的解决效果，我选择了如下评价指标：
- accuracy
- balanced accuracy
- precision
- recall
- f1 score
- confusion matrix
- roc curves(roc-auc)

同时我还绘制了训练过程中损失和准确率的变化曲线

### 对照组评估
对照组即不使用任何应对类别倾斜的方法，按照正常的训练流程训练得到的模型。

In [None]:
RUN_FOLDER = './run/control'
LOGGER = Logger(run_folder=RUN_FOLDER, title='test')
LOGGER.info("Evaluation for control")
RUN_IDS = ['0001', '0002', '0003']

evaluation = Evaluation(device=DEVICE, categories=test_dataset.categories)

for run_id in RUN_IDS:
    loss_acc = pickle.load(open(os.path.join(RUN_FOLDER, run_id+'obj.pkl'), 'rb'))
    plot_losses([loss_acc['train_loss'], loss_acc['train_acc'], loss_acc['valid_loss'], loss_acc['valid_acc']],
                title=f"control {run_id} loss and acc",
                legend=["train loss", "train acc", "valid loss", "valid acc"],
                filename=os.path.join(RUN_FOLDER, "images", f"control-{run_id}-loss.png"))

    model = load_model(os.path.join(RUN_FOLDER, "models", run_id+"model.pkl"),
                       device=DEVICE)
    for dataset, data_loader in zip(['train', 'valid', 'test'], [train_iter, valid_iter, test_iter]):
        LOGGER.info(f"model {run_id} for {dataset} dataset")
        report = evaluation.get_report(model=model, data_loader=data_loader)
        LOGGER.info(f"\n{report}")
        result = evaluation.evaluate(metric=['acc', 'b_acc', 'precision', 'recall', 'f1_score', 'c_matrix', 'roc_curves'],
                                     model=model, data_loader=data_loader)
        LOGGER.info(f"acc:       {result['acc']:.4f}\n"
                    f"b_acc:     {result['b_acc']:.4f}\n"
                    f"precision: {result['precision']}\n"
                    f"recall:    {result['recall']}\n"
                    f"f1_score:  {result['f1_score']}")
        plot_confusion_matrix(result["c_matrix"], test_dataset.categories,
                              title=f"control {run_id} confusion matrix for {dataset}",
                              filename=os.path.join(RUN_FOLDER, "images", f"{run_id+dataset}-cm.png"))
        plot_roc_curves(result["roc_curves"][0],
                        result["roc_curves"][1],
                        result["roc_curves"][2],
                        categories=test_dataset.categories,
                        filename=os.path.join(RUN_FOLDER, "images", f"{run_id+dataset}-roc_curve.png"))


### 实验组评估
实验组在训练过程中使用了ohem算法，希望能够应对类别倾斜的问题。

In [None]:
RUN_FOLDER = './run/ohem'
LOGGER = Logger(run_folder=RUN_FOLDER, title='test')
LOGGER.info("Evaluation for ohem")
RUN_IDS = ['0001', '0002', '0003', '0004', '0005', '0006']

evaluation = Evaluation(device=DEVICE, categories=test_dataset.categories)

for run_id in RUN_IDS:
    loss_acc = pickle.load(open(os.path.join(RUN_FOLDER, run_id+'obj.pkl'), 'rb'))
    plot_losses([loss_acc['train_loss'], loss_acc['train_acc'], loss_acc['valid_loss'], loss_acc['valid_acc']],
                title=f"ohem {run_id} loss and acc",
                legend=["train loss", "train acc", "valid loss", "valid acc"],
                filename=os.path.join(RUN_FOLDER, "images", f"ohem-{run_id}-loss.png"))

    model = load_model(os.path.join(RUN_FOLDER, "models", run_id+"model.pkl"),
                       device=DEVICE)
    for dataset, data_loader in zip(['train', 'valid', 'test'], [train_iter, valid_iter, test_iter]):
        LOGGER.info(f"model {run_id} for {dataset} dataset")
        report = evaluation.get_report(model=model, data_loader=data_loader)
        LOGGER.info(f"\n{report}")
        result = evaluation.evaluate(metric=['acc', 'b_acc', 'precision', 'recall', 'f1_score', 'c_matrix', 'roc_curves'],
                                     model=model, data_loader=data_loader)
        LOGGER.info(f"acc:       {result['acc']:.4f}\n"
                    f"b_acc:     {result['b_acc']:.4f}\n"
                    f"precision: {result['precision']}\n"
                    f"recall:    {result['recall']}\n"
                    f"f1_score:  {result['f1_score']}")
        plot_confusion_matrix(result["c_matrix"], test_dataset.categories,
                              title=f"ohem {run_id} confusion matrix for {dataset}",
                              filename=os.path.join(RUN_FOLDER, "images", f"{run_id+dataset}-cm.png"))
        plot_roc_curves(result["roc_curves"][0],
                        result["roc_curves"][1],
                        result["roc_curves"][2],
                        categories=test_dataset.categories,
                        filename=os.path.join(RUN_FOLDER, "images", f"{run_id+dataset}-roc_curve.png"))

### 模型评估
为了评估ohem算法对类别倾斜的解决效果，我选择了如下评价指标：
- accuracy
- balanced accuracy
- precision
- recall
- f1 score
- confusion matrix
- roc curves(roc-auc)

同时我还绘制了训练过程中损失和准确率的变化曲线

### 对照组评估
对照组即不使用任何应对类别倾斜的方法，按照正常的训练流程训练得到的模型。

In [None]:
RUN_FOLDER = './run/control'
LOGGER = Logger(run_folder=RUN_FOLDER, title='test')
LOGGER.info("Evaluation for control")
RUN_IDS = ['0001', '0002', '0003']

evaluation = Evaluation(device=DEVICE, categories=test_dataset.categories)

for run_id in RUN_IDS:
    loss_acc = pickle.load(open(os.path.join(RUN_FOLDER, run_id+'obj.pkl'), 'rb'))
    plot_losses([loss_acc['train_loss'], loss_acc['train_acc'], loss_acc['valid_loss'], loss_acc['valid_acc']],
                title=f"control {run_id} loss and acc",
                legend=["train loss", "train acc", "valid loss", "valid acc"],
                filename=os.path.join(RUN_FOLDER, "images", f"control-{run_id}-loss.png"))

    model = load_model(os.path.join(RUN_FOLDER, "models", run_id+"model.pkl"),
                       device=DEVICE)
    for dataset, data_loader in zip(['train', 'valid', 'test'], [train_iter, valid_iter, test_iter]):
        LOGGER.info(f"model {run_id} for {dataset} dataset")
        report = evaluation.get_report(model=model, data_loader=data_loader)
        LOGGER.info(f"\n{report}")
        result = evaluation.evaluate(metric=['acc', 'b_acc', 'precision', 'recall', 'f1_score', 'c_matrix', 'roc_curves'],
                                     model=model, data_loader=data_loader)
        LOGGER.info(f"acc:       {result['acc']:.4f}\n"
                    f"b_acc:     {result['b_acc']:.4f}\n"
                    f"precision: {result['precision']}\n"
                    f"recall:    {result['recall']}\n"
                    f"f1_score:  {result['f1_score']}")
        plot_confusion_matrix(result["c_matrix"], test_dataset.categories,
                              title=f"control {run_id} confusion matrix for {dataset}",
                              filename=os.path.join(RUN_FOLDER, "images", f"{run_id+dataset}-cm.png"))
        plot_roc_curves(result["roc_curves"][0],
                        result["roc_curves"][1],
                        result["roc_curves"][2],
                        categories=test_dataset.categories,
                        filename=os.path.join(RUN_FOLDER, "images", f"{run_id+dataset}-roc_curve.png"))


### 实验组评估
实验组在训练过程中使用了ohem算法，希望能够应对类别倾斜的问题。

In [None]:
RUN_FOLDER = './run/ohem'
LOGGER = Logger(run_folder=RUN_FOLDER, title='test')
LOGGER.info("Evaluation for ohem")
RUN_IDS = ['0001', '0002', '0003', '0004', '0005', '0006']

evaluation = Evaluation(device=DEVICE, categories=test_dataset.categories)

for run_id in RUN_IDS:
    loss_acc = pickle.load(open(os.path.join(RUN_FOLDER, run_id+'obj.pkl'), 'rb'))
    plot_losses([loss_acc['train_loss'], loss_acc['train_acc'], loss_acc['valid_loss'], loss_acc['valid_acc']],
                title=f"ohem {run_id} loss and acc",
                legend=["train loss", "train acc", "valid loss", "valid acc"],
                filename=os.path.join(RUN_FOLDER, "images", f"ohem-{run_id}-loss.png"))

    model = load_model(os.path.join(RUN_FOLDER, "models", run_id+"model.pkl"),
                       device=DEVICE)
    for dataset, data_loader in zip(['train', 'valid', 'test'], [train_iter, valid_iter, test_iter]):
        LOGGER.info(f"model {run_id} for {dataset} dataset")
        report = evaluation.get_report(model=model, data_loader=data_loader)
        LOGGER.info(f"\n{report}")
        result = evaluation.evaluate(metric=['acc', 'b_acc', 'precision', 'recall', 'f1_score', 'c_matrix', 'roc_curves'],
                                     model=model, data_loader=data_loader)
        LOGGER.info(f"acc:       {result['acc']:.4f}\n"
                    f"b_acc:     {result['b_acc']:.4f}\n"
                    f"precision: {result['precision']}\n"
                    f"recall:    {result['recall']}\n"
                    f"f1_score:  {result['f1_score']}")
        plot_confusion_matrix(result["c_matrix"], test_dataset.categories,
                              title=f"ohem {run_id} confusion matrix for {dataset}",
                              filename=os.path.join(RUN_FOLDER, "images", f"{run_id+dataset}-cm.png"))
        plot_roc_curves(result["roc_curves"][0],
                        result["roc_curves"][1],
                        result["roc_curves"][2],
                        categories=test_dataset.categories,
                        filename=os.path.join(RUN_FOLDER, "images", f"{run_id+dataset}-roc_curve.png"))