# Noteboook for testing view classification

In [None]:
import sys
import os
import pandas as pd
import datetime
import numpy as np

projectdir = os.path.dirname(os.getcwd())
sys.path.append(os.path.join(projectdir, 'src'))

from d00_utils.db_utils import dbReadWriteClassification, dbReadWriteViews, dbReadWriteClean
import d02_intermediate.download_dcm as download_dcm
import d03_classification.predict_views as predict_views
from d00_utils.log_utils import setup_logging
from d03_classification.evaluate_views import evaluate_views

logger = setup_logging(__name__, 'notebook_logger')

from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix

In [None]:
img_dir = 'train_split100_downsampleby20'
model_name = 'view_23_e5_class_11-Mar-2018'
date_run = datetime.date(year = 2019, month = 8, day = 14)
study_filter = None

In [None]:
test_evaluate_views(img_dir, model_name, date_run, study_filter=None, if_exists="replace")

In [None]:
io_class = dbReadWriteClassification()
io_class.get_table('evaluation')

In [None]:
img_dir = os.path.join(os.path.expanduser('~/data'),'02_intermediate', 'train_split100_downsampleby20')
model_path=os.path.join(os.path.expanduser('~/models'), 'view_23_e5_class_11-Mar-2018')
if_exists='replace'
feature_dim=1
label_dim = len(predict_views.view_classes)
model_name = os.path.basename(model_path)

probabilities = predict_views._classify(img_dir, feature_dim, label_dim, model_path)

df_columns = ['output_' + x for x in predict_views.view_classes]
df = (
    pd.DataFrame.from_dict(probabilities, columns=df_columns, orient='index')
    .rename_axis('file_name')
    .reset_index()
) 
df['file_name'] = df['file_name'].apply(lambda x: x.split('.')[0])
df['study_id'] = df['file_name'].apply(lambda x: x.split('_')[1])
df['model_name'] = model_name
df['date_run'] = datetime.datetime.now()
df['img_dir'] = os.path.basename(img_dir)
cols = ['study_id','file_name','model_name','date_run','img_dir'] + df_columns
df = df[cols]

io_classification = dbReadWriteClassification()
io_classification.save_to_db(df, 'test_probabilities_frames', if_exists)

logger.info(
    '{} prediction on frames with model {} (feature_dim={}).'.format(
        os.path.basename(img_dir), model_name, feature_dim
    )
)

In [None]:
io_class = dbReadWriteClassification()
probabilities_frames = io_class.get_table('test_probabilities_frames')
probabilities_frames['file_name'] = probabilities_frames['file_name'].apply(
    lambda x: x.rsplit('_', 1)[0]
)

mean_cols = ['output_' + x for x in predict_views.view_classes]
agg_cols = dict(zip(mean_cols, ['mean'] * len(mean_cols)))
agg_cols['test_probabilities_frame_id'] = 'count'

probabilities = (
    probabilities_frames.groupby(['study_id','file_name','model_name','date_run','img_dir'])
    .agg(agg_cols)
    .reset_index(drop=False)
)
probabilities.rename(columns={'test_probabilities_frame_id': 'frame_count'}, inplace=True)

io_class.save_to_db(probabilities, 'test_probabilities_instances', if_exists)
logger.info('Aggregated probabilities saved to table classification.probabilities_instances')


In [None]:
probabilities = io_class.get_table('test_probabilities_instances')

predictions = probabilities.drop(columns=['test_probabilities_instance_id', 'frame_count']
                                          ).set_index(['study_id','file_name','model_name','date_run','img_dir'])
predictions['view23_pred'] = predictions.idxmax(axis=1).apply(lambda x : x.split('_', 1)[1])
predictions['view4_dev'] = predictions['view23_pred'].map(predict_views.maps_dev)
predictions['view4_seg'] = predictions['view23_pred'].map(predict_views.maps_seg)

df = predictions.loc[:,['view23_pred','view4_dev','view4_seg']]
df.reset_index(inplace=True)     

io_class.save_to_db(df, 'test_predictions', if_exists)    
logger.info('Predicted views saved to table classification.predictions')

In [None]:
# Get ground truth labels via views.instances_w_labels table
io_views = dbReadWriteViews()
io_class = dbReadWriteClassification()

groundtruth = io_views.get_table('instances_w_labels')
groundtruth.rename(columns={'filename': 'file_name','studyidk':'study_id','view': 'view_true'}, inplace=True)
groundtruth['file_name'] = 'a_'+groundtruth['study_id'].astype(str) + '_' + groundtruth['file_name'].astype(str)
groundtruth.drop(columns=['sopinstanceuid','instanceidk'], inplace=True)
predictions = io_class.get_table('test_predictions')

# Merge tables df_new and labels_df
predict_truth = predictions.merge(groundtruth, on=['file_name', 'study_id'], how='left')   

In [None]:

def _groundtruth_views():

    # Get ground truth labels via views.instances_w_labels table
    io_views = dbReadWriteViews()
    io_class = dbReadWriteClassification()

    groundtruth = io_views.get_table("instances_w_labels")
    groundtruth.rename(
        columns={"filename": "file_name", "studyidk": "study_id", "view": "view_true"},
        inplace=True,
    )
    groundtruth["file_name"] = (
        "a_"
        + groundtruth["study_id"].astype(str)
        + "_"
        + groundtruth["file_name"].astype(str)
    )
    groundtruth.drop(columns=["sopinstanceuid", "instanceidk"], inplace=True)
    predictions = io_class.get_table("test_predictions")

    # Merge tables df_new and labels_df
    predict_truth = predictions.merge(groundtruth, on=["file_name", "study_id"])

    return predict_truth


def evaluate_view_map(img_dir, model_name, date_run, view_mapping, study_filter=None):

    predict_truth = _groundtruth_views()

    df = predict_truth.loc[
        (predict_truth["img_dir"] == img_dir)
        & (predict_truth["model_name"] == model_name)
        & (pd.to_datetime(predict_truth["date_run"]).dt.date == date_run),
        :,
    ]

    if type(study_filter) == dict:
        df = df[df["study_id"].isin(list(study_filter.values())[0])]
        study_filter = list(study_filter.keys())[0]

    mcm = multilabel_confusion_matrix(
        y_pred=df[view_mapping],
        y_true=df["view_true"],
        labels=["a2c", "a4c", "plax", "other"],
    )

    df_mcm = pd.DataFrame(
        np.reshape(mcm, (4, 4)) / np.sum(mcm[0]),
        columns=["tn", "fp", "fn", "tp"],
        index=["a2c", "a4c", "plax", "other"],
    )

    eval_out = df_mcm.rename_axis("view").reset_index()

    eval_out["model_name"] = model_name
    eval_out["img_dir"] = img_dir
    eval_out["date_run"] = df["date_run"][0]
    eval_out["view_mapping"] = view_mapping
    eval_out["study_filter"] = study_filter

    cols = list(eval_out.columns[5::]) + list(eval_out.columns[:5])
    eval_out = eval_out[cols]

    return eval_out


def test_evaluate_views(img_dir, model_name, date_run, study_filter=None, if_exists="append"):
    """Evaluates classification.predictions table
    
    :param img_dir:
    :param model_name:
    :param date_run:
    :param study_filter:
    :param if_exists:            
    
    """
    for view_mapping in ["view4_dev", "view4_seg"]:
        eval_out = evaluate_view_map(
            img_dir, model_name, date_run, view_mapping, study_filter=None
        )

        io_class = dbReadWriteClassification()
        io_class.save_to_db(eval_out, "evaluation", if_exists)

        logger.info(
            "Evaluated {} {} {} {})".format(img_dir, model_name, date_run, view_mapping)
        )