In [2]:
import numpy as np

import torch
import matplotlib.pyplot as plt
import seaborn as sns
import seml.database as db_utils
from pathlib import Path

import torch

from itertools import product

from statsmodels.stats.proportion import proportion_confint

import pandas as pd

import os

from utils import load_results

In [3]:
collection = 'invariance_sample_pred'


jk_config = {
    'username': 'schuchaj',
    'password': 'coolpassword',
    'host': 'hostofyourdatabase',
    'port': 27017,
    'db_name': 'nameofyourdatabase'
}

col = db_utils.get_collection(collection, mongodb_config=jk_config)

In [4]:
def get_experiments(col, restrictions={}):
    
    restrictions['status'] = 'COMPLETED'

    if col.count_documents(restrictions) == 0:
        raise ValueError('No matches!')

    exps = col.find(restrictions)
    
    return exps

In [5]:
def calc_accuracy(raw_results, abstain=False):
    targets = raw_results['targets']
    votes = raw_results['votes_pred']

    pred = votes.argmax(axis=1)

    if abstain:
        n_datapoints = targets.shape[0]

        n_samples = votes.sum(axis=1)
        n_majority_votes = votes[np.arange(n_datapoints), pred]
        p_lowers = proportion_confint(n_majority_votes, n_samples, alpha=0.001, method='beta')[0]

        pred[p_lowers <= 0.5] = -1

    return (pred == targets).mean()

In [6]:
def generate_exp_result_dict(exp):

    result_dict = {}
    
    result_dict['dataset'] = exp['config']['dataset']['name']
    result_dict['model'] = exp['config']['train_loading']['restrictions']['model']['model_type']

    overwrite_wrapper = exp['config']['overwrite_invariance_wrapper']

    if overwrite_wrapper:
        result_dict['wrapper'] = exp['config']['invariance_wrapper_params']['wrapper_type']
    else:
        result_dict['wrapper'] = exp['config']['train_loading']['restrictions']['model']['invariance_wrapper_params']['wrapper_type']

    result_dict['smoothed_training'] = exp['config']['train_loading']['restrictions']['training_params']['add_noise']

    result_dict['smoothing_std'] = exp['config']['sample_params']['std']

    result_dict['raw_results_file'] = exp['result']['save_file']

    raw_results = torch.load(result_dict['raw_results_file'])

    result_dict['accuracy'] = calc_accuracy(raw_results, abstain=False)
    result_dict['accuracy_w_abstains'] = calc_accuracy(raw_results, abstain=True)

    return result_dict

In [7]:
experiments = get_experiments(col)
results = load_results(
            generate_exp_result_dict,
            experiments,
            results_file='/nfs/staff-ssd/schuchaj/pointcloud_invariance_plots/neurips/smoothed_accuracy/raw_data',
            #overwrite=False
            )

In [8]:
results

Unnamed: 0,dataset,model,wrapper,smoothed_training,smoothing_std,raw_results_file,accuracy,accuracy_w_abstains
0,mnist,pointnet,ensemble_pca,False,0.01,/nfs/staff-ssd/schuchaj/pointcloud_invariance_...,0.971300,0.969700
1,mnist,pointnet,ensemble_pca,False,0.02,/nfs/staff-ssd/schuchaj/pointcloud_invariance_...,0.941700,0.933100
2,mnist,pointnet,ensemble_pca,False,0.03,/nfs/staff-ssd/schuchaj/pointcloud_invariance_...,0.888000,0.862400
3,mnist,pointnet,ensemble_pca,False,0.04,/nfs/staff-ssd/schuchaj/pointcloud_invariance_...,0.822100,0.772400
4,mnist,pointnet,ensemble_pca,False,0.05,/nfs/staff-ssd/schuchaj/pointcloud_invariance_...,0.737800,0.656700
...,...,...,...,...,...,...,...,...
141,modelnet40,dgcnn,ensemble_pca,True,0.70,/nfs/staff-ssd/schuchaj/pointcloud_invariance_...,0.154376,0.087520
142,modelnet40,dgcnn,ensemble_pca,True,0.75,/nfs/staff-ssd/schuchaj/pointcloud_invariance_...,0.123177,0.080632
143,modelnet40,dgcnn,ensemble_pca,True,0.80,/nfs/staff-ssd/schuchaj/pointcloud_invariance_...,0.107780,0.064830
144,modelnet40,dgcnn,ensemble_pca,True,0.90,/nfs/staff-ssd/schuchaj/pointcloud_invariance_...,0.066045,0.039303


In [21]:
def draw_data(results, abstain=False, legend_fontsize=7):
    results_smoothed = results.loc[results['smoothed_training'] == True]
    unique_combinations = results_smoothed[['dataset', 'model', 'wrapper']].drop_duplicates()

    stds = results_smoothed.groupby(['dataset', 'model', 'wrapper'])['smoothing_std'].apply(list).reset_index()

    if abstain:
        accuracies = results_smoothed.groupby(['dataset', 'model', 'wrapper'])['accuracy_w_abstains'].apply(list).reset_index()
        accuracies.rename(columns={'accuracy_w_abstains': 'accuracy'}, inplace=True)
        print(accuracies)
    else:
        accuracies = results_smoothed.groupby(['dataset', 'model', 'wrapper'])['accuracy'].apply(list).reset_index()

    sns.set_theme()
    pal = sns.color_palette('colorblind', 3)

    parameter_combinations = [
        ['mnist', 'pointnet', None, pal[0], '--'],
        ['modelnet40', 'pointnet', 'EnsPointNet', pal[0], '-'],
        ['modelnet40', 'pointnet_attention', 'AttnPointNet', pal[1], '-'],
        ['modelnet40', 'dgcnn', 'EnsDGCNN', pal[2], '-']
    ]

    fig, ax = plt.subplots(1, 1)

    for dataset, model, label, color, linestyle in parameter_combinations:
        print(pal[0])
        s = stds.loc[(stds['dataset'] == dataset) & (stds['model'] == model)]['smoothing_std'].iloc[0]
        a = accuracies.loc[(accuracies['dataset'] == dataset) & (accuracies['model'] == model)]['accuracy'].iloc[0]

        s.append(results.loc[(results['dataset'] == dataset) & (results['model'] == model)
                             & (results['smoothing_std'] == 0)]['smoothing_std'].iloc[0])

        a.append(results.loc[(results['dataset'] == dataset) & (results['model'] == model)
                             & (results['smoothing_std'] == 0)]['accuracy'].iloc[0])

        s = np.array(s)
        a = np.array(a)
        new_order = np.argsort(s)
        s = s[new_order]
        a = a[new_order]

        ax.plot(s,a, zorder=3, color=color, label=label, linestyle=linestyle)

    ax2 = ax.twinx()
    ax2.plot([], [], linestyle='--', color='black', label='MNIST')
    ax2.plot([], [], linestyle='-', color='black', label='ModelNet40')
    ax2.get_yaxis().set_visible(False)

    ax.set_xlim(0, 0.5)
    ax.set_ylim(0, 1)

    ax.set_xlabel('$\sigma$')
    ax.set_ylabel('Accuracy')
    
    ax2.legend(loc='upper right', fontsize=legend_fontsize)
    ax.legend(loc='lower left', fontsize=legend_fontsize)

        #a = accuracies(stds.loc[([results]['dataset'] == dataset) & (results['model'] == model)])

    #accuracies.loc[('dataset' == 'mnist') & ('model' == '']

    plt.savefig('/nfs/staff-ssd/schuchaj/pointcloud_invariance_plots/neurips/smoothed_accuracy/smoothed_accuracy', format='pgf')

In [22]:
draw_data(results)

(0.00392156862745098, 0.45098039215686275, 0.6980392156862745)
(0.00392156862745098, 0.45098039215686275, 0.6980392156862745)
(0.00392156862745098, 0.45098039215686275, 0.6980392156862745)
(0.00392156862745098, 0.45098039215686275, 0.6980392156862745)
