In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from os.path import dirname, join, exists
from copy import deepcopy
from typing import List
import multiprocessing as mp
import torch
import numpy as np
import pandas as pd
from scipy.special import softmax
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns
from tqdm import tqdm
from IPython.display import display, HTML, clear_output, Markdown, Audio
from ipywidgets import HBox, Label, VBox, Dropdown, Layout, Output, Image

from cac.config import Config, DATA_ROOT
from cac.utils.logger import set_logger, color
from cac.utils.metrics import PrecisionAtRecall
from cac.data.dataloader import get_dataloader
from cac.analysis.classification import ClassificationAnalyzer

In [None]:
import warnings
warnings.simplefilter('ignore')

### Define inputs

In [None]:
VERSION = 'experiments/covid-detection/v9_4_cough_adam_1e-4.yml'
USER = 'piyush'
BEST_EPOCH = 99

In [None]:
BATCH_SIZE = 10
NUM_WORKERS = 10

### Define config

In [None]:
config = Config(VERSION, USER)

### Load data

In [None]:
val_dataloader, _ = get_dataloader(
    config.data, 'val',
    BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=False,
    drop_last=False
)

### Initialize the analyzer module

In [None]:
analyzer = ClassificationAnalyzer(config,checkpoint=BEST_EPOCH, load_best=False, debug=True)

### Load epochwise logs

In [None]:
logs = analyzer.load_epochwise_logs(mode='val')

In [None]:
logs.keys()

In [None]:
def _check_predicted_labels(_predict_labels, _predict_probs, recall=0.9):
    if len(_predict_labels.columns) == 3:
        targets = torch.tensor(_predict_labels['targets'])
        epoch_columns = [col for col in _predict_probs.columns if 'epoch' in col]
        for epoch_column in tqdm(epoch_columns):
            predict_proba = torch.from_numpy(np.stack(_predict_probs[epoch_column].values))
            # only for binary
            predict_proba = predict_proba[:, 1]
            _, _, threshold = PrecisionAtRecall(recall=recall)(targets, predict_proba)
            _predict_labels[epoch_column] = predict_proba.ge(threshold).int().tolist()

In [None]:
predict_probs = logs['predict_probs']

In [None]:
predict_labels = logs['predict_labels']

In [None]:
logs['predict_labels']


In [None]:
_check_predicted_labels(predict_labels, predict_probs)

In [None]:
predict_proba = np.stack(predict_probs['epoch_{}'.format(BEST_EPOCH)].values)[:, 1]

In [None]:
targets = predict_probs['targets'].values

In [None]:
predict_labels = np.stack(predict_labels['epoch_{}'.format(BEST_EPOCH)].values)

In [None]:
targets.shape, predict_proba.shape, predict_labels.shape

In [None]:
predict_proba.shape, predict_labels.shape, targets.shape

In [None]:
predict_labels

In [None]:
from sklearn.metrics import accuracy_score
from scipy.stats import mode

groups = logs['predict_labels'].groupby('unique_id').groups
agreements = {}
accuracy = {}

for _id in groups:
    print(_id)
    indices = groups[_id]
    
    _predictions = predict_labels[indices]
    _targets = targets[indices]
    
    assert len(np.unique(_targets)) == 1
    break
#     agreements[_id] = mode(_predictions).count[0]
#     accuracy[_id] = np.round(accuracy_score(_targets, _predictions), 3)

In [None]:
_predictions

In [None]:
mode(_predictions).count[0]