In [1]:
import os
import sys

sys.path.insert(0, '../src')

import numpy as np
import pandas as pd

from loader import ImagesDataset

In [2]:
SEED = 11
rng = np.random.default_rng(SEED)

In [3]:
DATA_PATH = '../images'
SHAPE = (128, 128, 3)

dataset = ImagesDataset(path=DATA_PATH, preload=False, encode_labels=True)
n_classes = dataset.label_encoder.classes_.size

dataset.split(shuffle=SEED)

100%|[38;2;76;175;80m████████████████████████████████████████████[0m| 9/9 [00:00<00:00, 444.33it/s][0m


In [4]:
labels, counts = np.unique(dataset.labels[dataset.test.indices], return_counts=True)
shares = counts / counts.sum()

In [5]:
classwise_metrics = ['precision', 'recall']
general_metrics = ['accuracy', 'f1_score']
metrics_names = classwise_metrics + general_metrics

metrics = {name: {} for name in metrics_names}

METRICS_PATH = '../metrics'
for file in sorted(os.listdir(METRICS_PATH)):
    if file.startswith('.'):
        continue

    file_path = os.path.join(METRICS_PATH, file)
    metrics_dict = np.load(file_path, allow_pickle=True).item()
    
    model_name = file.split('.')[0].split('_')[1]
    for name in metrics_names:
        metrics[name][model_name] = metrics_dict[name]

In [6]:
html = ''

styles = [
    {
        'selector': "caption",
        'props': [
            ('font-family', 'monospace'),
            ("font-size", "150%")
        ]
    },
    {
        'selector': "tbody",
        'props': [
            ('font-family', 'monospace'),
            ('text-align', 'right'),
        ]
    },
]

for metrics_name in metrics_names:
    metrics_dict = metrics[metrics_name]

    if metrics_name in classwise_metrics:
        metrics_dict = {'shares': shares, **metrics_dict}
        index = labels
    else:
        metrics_dict = {'shares': 1 / n_classes, **{key: [value] for key, value in metrics_dict.items()}}
        index = ['\u00A0' * 3 + 'altogether']

    df = pd.DataFrame(data=metrics_dict, index=index)
    formatter = lambda value: f"{(value * 100).round(1)}"
    styled_df = df.style.background_gradient('RdYlGn', vmin=0, vmax=1, axis=0).format(formatter).set_caption(metrics_name).set_table_styles(styles)
    display(styled_df)
    
    html += styled_df.render()
    
with open('../report.html', 'w') as f:
    f.write(html)

Unnamed: 0,shares,GUESS,LOGISTIC,VGG,EFFICIENTNET,KNN,LGBM
0,4.2,8.3,0.0,63.6,50.0,66.7,55.6
1,25.4,30.9,34.7,69.4,71.1,72.5,61.0
2,16.2,19.6,40.0,54.4,63.8,57.1,55.3
3,14.8,12.5,24.1,76.7,67.7,74.2,71.4
4,15.8,13.6,43.9,80.0,73.3,78.9,83.9
5,6.7,5.6,25.7,54.2,52.8,48.3,66.7
6,6.7,11.1,18.2,60.0,53.3,72.7,77.8
7,10.2,16.0,20.0,74.1,81.8,66.7,59.4


Unnamed: 0,shares,GUESS,LOGISTIC,VGG,EFFICIENTNET,KNN,LGBM
0,4.2,8.3,0.0,58.3,30.0,50.0,41.7
1,25.4,34.7,23.6,81.9,84.3,80.6,84.7
2,16.2,19.6,30.4,67.4,68.2,69.6,56.5
3,14.8,11.9,47.6,54.8,60.0,54.8,59.5
4,15.8,13.3,40.0,71.1,62.3,66.7,57.8
5,6.7,5.3,47.4,68.4,65.5,73.7,73.7
6,6.7,10.5,10.5,31.6,50.0,42.1,36.8
7,10.2,13.8,17.2,69.0,64.3,69.0,65.5


Unnamed: 0,shares,GUESS,LOGISTIC,VGG,EFFICIENTNET,KNN,LGBM
altogether,12.5,18.7,29.9,67.3,67.0,67.3,64.4


Unnamed: 0,shares,GUESS,LOGISTIC,VGG,EFFICIENTNET,KNN,LGBM
altogether,12.5,14.7,26.4,64.6,62.3,65.2,62.8


In [8]:
from IPython.display import HTML

disclaimer_0 = "<p>* values in 'shares' column are class frequencies in train dataset</p><style>p { font-size: 20px}p { font-family: Courier; }</style>"
disclaimer_1 = "<p>** all values are normalized to [0, 100] range</p><style>p { font-size: 20px}p { font-family: Courier; }</style>"
heading = disclaimer_0 + disclaimer_1
display(HTML(heading))