In [None]:
%matplotlib inline
import os
import numpy as np
import pandas as pd
# Plot Images
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
plt.style.use('ggplot')
import matplotlib
# Set GPU usage
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# Plotly 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [None]:
cd ..

In [None]:
# Import aip-eval
from studio.evaluation.keras.evaluators import CNNEvaluator

In [None]:
# Models path
model_path = ''
ensemble_models_dir= ''

In [None]:
# Data directories
dataframe_test_path = ''

In [None]:
# Model Map
model_map_path = '/data/datasets/macroscopic/141_way_april_2020/141_way_diagnosis_df.json'
model_map_df = pd.read_json(model_map_path)
concepts_list = [{'id': row['diagnosis_id'] , 'label': row['diagnosis_name']} for i, row in model_map_df.iterrows()]

In [None]:
evaluator = CNNEvaluator(
        concepts=concepts_list,
        model_path=model_path,
        batch_size=32,
        verbose=1
        )

In [None]:
results = evaluator.evaluate(data_dir=directory_test, 
                             custom_crop=True,
                             dataframe_path=dataframe_test_path, 
                             confusion_matrix=True,
                             show_confusion_matrix_text=False,
                             top_k=10)

### Metrics

In [None]:
results_average = evaluator.show_results(round_decimals=4)

In [None]:
results_average

In [None]:
results_classes = evaluator.show_results('individual', round_decimals=4)

In [None]:
results_classes

In [None]:
evaluator.save_results(id='model', csv_path='results_average', mode='average', round_decimals=4, show_id=True)
evaluator.save_results(id='model', csv_path='results_class', mode='individual', round_decimals=4, show_id=True)

### Accuracy and Sensitivity Plots

In [None]:
evaluator.plot_top_k_accuracy()

In [None]:
evaluator.plot_top_k_sensitivity_by_concept()

In [None]:
class_sensitivity = evaluator.plot_sensitivity_per_samples(title='Test Top-1 sensitivity per number of evaluation samples',
                                                          percentage=False,
                                                          top_k=1)

In [None]:
class_sensitivity.head()

In [None]:
class_sensitivity = evaluator.plot_sensitivity_per_samples(n_samples=model_map_df['n_samples'].tolist(),
                                                          title='Test Top-5 sensitivity per number of training samples',
                                                          percentage=False,
                                                          top_k=5)

In [None]:
class_sensitivity.head()

### Confidence of the Classifier

In [None]:
mean, lower, upper = evaluator.plot_confidence_interval()

### Histogram of Probabilities

In [None]:
evaluator.plot_probability_histogram()

### Uncertainty

In [None]:
uncertainty = evaluator.compute_uncertainty_distribution()

### Threshold Impact

In [None]:
vals = evaluator.show_threshold_impact(evaluator.probabilities, evaluator.labels)

### Errors when most confident

In [None]:
evaluator.plot_most_confident('errors', n_images=20)

### Classes with more errors analysis

In [None]:
sorted_names, sorted_counts = evaluator.get_keys_confusion_matrix_errors()

In [None]:
errors_cf_matrix = evaluator.get_errors_confusion_matrix_df()

In [None]:
errors_cf_matrix.to_csv('classes_more_confused.csv', index=False)

In [None]:
dict_paths = evaluator.get_image_paths_by_prediction(evaluator.probabilities, evaluator.labels, evaluator.concept_labels)

In [None]:
print(sorted_names[0])
print(len(dict_paths[sorted_names[0]]['image_paths']))
evaluator.plot_images(n_images=20, image_paths=dict_paths[sorted_names[0]]['image_paths'])