# 12/2/19

Extracting the confusion matrix for the drain detector trained on the small validation set partition.

In [None]:
import os
import os.path as osp
import json
from functools import partial
os.chdir('/lfs/1/gangus/repositories/pytorch-classification/drain_detector')

import torch
import torch.nn as nn
import sklearn.metrics as skl
import numpy as np
import pandas as pd
from tqdm import tqdm

import emmental
from emmental import Meta
from emmental.data import EmmentalDataLoader
from emmental.learner import EmmentalLearner
from emmental.model import EmmentalModel
from emmental.scorer import Scorer
from emmental.task import EmmentalTask

from dataset import DrainDetectionDataset
import modules
from util import ce_loss, output

In [None]:
experiment_dir = 'experiments/drain_detection/cxr_seg/pretrain_chexnet/1'
emmental_dir = 'experiments/drain_detection/cxr_seg/pretrain_chexnet/_emmental_logs/2019_11_18/09_09_48/64ab5a7b'

In [3]:
emmental.init()
split = 'valid'

with open(osp.join(experiment_dir, 'config.json'), 'r') as f:
    config = json.load(f)

task_to_label_dict = config['task_to_label_dict']
task_to_cardinality_dict = config['task_to_cardinality_dict']

dataset_configs = config['dataset_configs']
dataloader_configs = config['dataloader_configs']

encoder_class = config['encoder_class']
encoder_args = config['encoder_args']
decoder_class = config['decoder_class']
decoder_args = config['decoder_args']

model_config = {
    'model_path': osp.join(emmental_dir, 'best_model_drain_drain-detection-dataset_valid_roc_auc.pth'),
    'device': 0,
    'dataparallel': True
}

Meta.update_config(
    config={
        'model_config': {**model_config}
    }
)

ds = DrainDetectionDataset(
    split_str='all', 
    **dataset_configs[split]['args']
)

dl = EmmentalDataLoader(
    task_to_label_dict=task_to_label_dict,
    dataset=ds,
    split=split,
    **dataloader_configs[split]
)



encoder_module = getattr(modules, encoder_class)(**encoder_args)
tasks = [
    EmmentalTask(
        name=task_name,
        module_pool=nn.ModuleDict(
            {
                f'encoder_module': encoder_module,
                f'decoder_module_{task_name}': getattr(modules, decoder_class)(task_to_cardinality_dict[task_name], **decoder_args),
            }
        ),
        task_flow=[
            {
                'name': 'encoder_module', 'module': 'encoder_module', 'inputs': [('_input_', 'image')]
            },
            {
                'name':   f'decoder_module_{task_name}',
                'module': f'decoder_module_{task_name}',
                'inputs': [('encoder_module', 0)],
            },
        ],
        loss_func=partial(ce_loss, task_name),
        output_func=partial(output, task_name),
        scorer=Scorer(
            metrics=['accuracy', 'roc_auc', 'precision', 'recall', 'f1']),
    )
    for task_name in task_to_label_dict.keys()
]
model = EmmentalModel(name='drain-detection-model', tasks=tasks)

[2019-12-02 14:35:40,657][INFO] emmental.meta:106 - Setting logging directory to: /tmp/2019_12_02/14_35_40/d124e521
[2019-12-02 14:35:40,679][INFO] emmental.meta:60 - Loading Emmental default config from /lfs/1/gangus/repositories/pytorch-classification/.emmental/src/emmental/emmental-default-config.yaml.
[2019-12-02 14:35:40,682][INFO] emmental.meta:160 - Updating Emmental config from user provided config.
[2019-12-02 14:35:40,980][INFO] emmental.data:52 - Auto generate uids for dataset drain-detection-dataset under _uids_.
[2019-12-02 14:35:44,578][INFO] root:62 - Loaded 606/606 pretrained parameters
[2019-12-02 14:35:44,588][INFO] emmental.task:48 - Created task: drain
[2019-12-02 14:35:44,613][INFO] emmental.model:71 - Moving model to GPU (cuda:0).
[2019-12-02 14:35:44,617][INFO] emmental.model:57 - Created emmental model drain-detection-model that contains task {'drain'}.
[2019-12-02 14:35:44,618][INFO] emmental.model:71 - Moving model to GPU (cuda:0).


In [4]:
if Meta.config["model_config"]["model_path"]:
    model.load(Meta.config["model_config"]["model_path"])

[2019-12-02 14:35:44,978][INFO] emmental.model:518 - [drain-detection-model] Model loaded from experiments/drain_detection/cxr_seg/pretrain_chexnet/_emmental_logs/2019_11_18/09_09_48/64ab5a7b/best_model_drain_drain-detection-dataset_valid_roc_auc.pth
[2019-12-02 14:35:44,979][INFO] emmental.model:71 - Moving model to GPU (cuda:0).


In [5]:
res = model.predict(dl, return_preds=True)

100%|██████████| 5606/5606 [17:19<00:00,  5.40it/s]


In [6]:
y_prob = res['probs']['drain']
np.save('y_prob_all_chexnet', np.array(y_prob))

In [None]:
# model.score([dl])

### 2.) Getting labels for the entire CheXNet dataset

Here we create a copy of nih_labels.csv, add a dummy drain column (for compatibility with DrainDetectionDataset), then run the model

In [23]:
labels_df = pd.read_csv('/dfs/scratch1/senwu/mmtl/emmental-tutorials/chexnet/data/nih_labels.csv')
labels_df['drain_weak'] = y_prob[:, 1]
labels_df['drain'] = (y_prob[:, 1] > 0.5).astype(int)
labels_df.to_csv('/lfs/1/gangus/repositories/pytorch-classification/drain_detector/data/by-patient-id/split/all.csv')

In [8]:
y_prob = np.array(y_prob)

In [19]:
y_pred = y_prob.argmax(axis=1)
y_prob[:, 1]

array([0.09097441, 0.00350022, 0.8352091 , ..., 0.00164293, 0.09175233,
       0.04183945], dtype=float32)

In [24]:
labels_df

Unnamed: 0,Image Index,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,Cardiomegaly,Emphysema,Effusion,Hernia,...,Atelectasis,Pneumothorax,Pleural_Thickening,Pneumonia,Fibrosis,Edema,Consolidation,fold,drain_weak,drain
0,00000001_000.png,0,1,058Y,M,PA,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,0.090974,0
1,00000001_001.png,1,1,058Y,M,PA,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,0.003500,0
2,00000001_002.png,2,1,058Y,M,PA,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,0.835209,1
3,00000002_000.png,0,2,081Y,M,PA,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,0.026913,0
4,00000003_000.png,0,3,081Y,F,PA,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,0.013373,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112115,00030801_001.png,1,30801,039Y,M,PA,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,train,0.999795,1
112116,00030802_000.png,0,30802,029Y,M,PA,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,val,0.700383,1
112117,00030803_000.png,0,30803,042Y,F,PA,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,0.001643,0
112118,00030804_000.png,0,30804,030Y,F,PA,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,0.091752,0
