# Human Activity Recognition - SSL JOINT Results

Last Update : 31 July 2019

In [1]:
N_THREADS = 8
# Nota Bene : notebooks don't deallocate GPU memory
IS_FORCE_CPU = True # can also be set in the trainer

## Environment

In [2]:
cd ..

/conv


In [3]:
%autosave 600
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# CENTER PLOTS
from IPython.core.display import HTML
display(HTML(""" <style> .output_png {display: table-cell; text-align: center; margin:auto; }
.prompt display:none;}  </style>"""))

import os
if IS_FORCE_CPU:
    os.environ['CUDA_VISIBLE_DEVICES'] = ""

import sys
sys.path.append("notebooks")

import numpy as np
import matplotlib.pyplot as plt
from functools import partial
import pandas as pd
import h5py


import torch
torch.set_num_threads(N_THREADS)

Autosaving every 600 seconds


# Dataset

In [4]:
from skssl.transformers.neuralproc.datasplit import CntxtTrgtGetter, GetRandomIndcs, get_all_indcs
from utils.data.tsdata import get_timeseries_dataset, SparseMultiTimeSeriesDataset

get_cntxt_trgt_test = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.1, max_n_indcs=0.5),
                                     targets_getter=get_all_indcs,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt_feat = CntxtTrgtGetter(contexts_getter=get_all_indcs,
                                     targets_getter=get_all_indcs,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.99),
                                 targets_getter=GetRandomIndcs(min_n_indcs=0.5, max_n_indcs=0.99),
                                 is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

In [5]:
data_both = get_timeseries_dataset("har")(split="both")

def cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=False):
    def mycollate(batch):
        min_length = min([v.size(0) for b in batch for k,v in b[0].items() if "X" in k])
        # chose first min_legth of each (assumes that randomized)
        
        batch = [({k:v[:min_length, ...] for k,v in b[0].items()}, b[1]) for b in batch]        
        collated = torch.utils.data.dataloader.default_collate(batch)
        
        X = collated[0]["X"]
        y = collated[0]["y"]
        
        if is_repeat_batch:
            
            X = torch.cat([X,X], dim=0)
            y = torch.cat([y,y], dim=0)
            collated[1] = torch.cat([collated[1], collated[1]], dim=0) # targets
        
        collated[0]["X"], collated[0]["y"], collated[0]["X_trgt"], collated[0]["y_trgt"] = get_cntxt_trgt(X, y)
        
        return collated
    return mycollate

In [6]:
X_DIM = 1  # 1D spatial input (although actually 2 but the first is for sparse channels)
Y_DIM = data_both.data.shape[-1] # multiple channels
N_TARGETS = len(np.unique(data_both.targets))

sampling_percentages = [1]
label_percentages = [0.01, 0.1]

## Model

In [7]:
import torch.nn as nn
from skssl.transformers import GlobalNeuralProcess, NeuralProcessLoss, AttentiveNeuralProcess, NeuralProcessSSLLoss
from skssl.utils.helpers import rescale_range
from skssl.predefined import UnetCNN, CNN, MLP, SparseSetConv, SetConv, MlpRBF, GaussianRBF, BatchSparseSetConv
from skssl.transformers.neuralproc.datasplit import precomputed_cntxt_trgt_split
from utils.helpers import count_parameters

In [8]:
from copy import deepcopy

models = {}

min_std=5e-3

unet = partial(UnetCNN,
               Conv=torch.nn.Conv1d,
               Pool=torch.nn.MaxPool1d,
               upsample_mode="linear",
               n_layers=18,
               is_double_conv=True,
               is_depth_separable=True,
               Normalization=torch.nn.BatchNorm1d,
               is_chan_last=True,
               bottleneck=None,
               kernel_size=7,
               max_nchannels=256,
              is_force_same_bottleneck=True,
               _is_summary=True,
              )

kwargs = dict(x_dim=X_DIM, 
              y_dim=Y_DIM,
              min_std=min_std,
                n_tmp_queries=128,
                r_dim=64,
              keys_to_tmp_attn=partial(SetConv, RadialBasisFunc=GaussianRBF),
              TmpSelfAttn=unet,
              tmp_to_queries_attn=partial(SetConv, RadialBasisFunc=GaussianRBF),
              is_skip_tmp=False,
              is_use_x=False,
              get_cntxt_trgt=precomputed_cntxt_trgt_split,
              is_encode_xy=False,
             Classifier=partial(MLP, input_size=256+Y_DIM*4, output_size=N_TARGETS, 
                                dropout=0., hidden_size=128, n_hidden_layers=3, is_res=True))

models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)

kwargs_bis = deepcopy(kwargs)
kwargs_bis["Classifier"] = None

models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)

In [9]:
from utils.helpers import count_parameters
for k,v in models.items():
    print(k, "- N Param:", count_parameters(v()))

ssl_classifier_gnp_large_shared_bottleneck - N Param: 1078238
transformer_gnp_large_shared_bottleneck - N Param: 1006936


In [10]:
def load_pretrained_(models, sampling_percentages):
    # ALREADY INITALIZE TO BE ABLE TO LOAD
    models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)()

    kwargs_bis = deepcopy(kwargs)
    kwargs_bis["Classifier"] = None

    models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)()

    # load all transformers
    loaded_models = {}
    for sampling_perc in sampling_percentages:
        for k, m in models.items():
            if "transformer" not in k:
                continue

            out = train_models_({"{}%har".format(int(sampling_perc*100)): 
                                                (None, None)}, 
                                  {k :m },
                                   chckpnt_dirname=chckpnt_dirname_old,
                                seed=None,
                                   is_retrain=False)

            pretrained_model = out[list(out.keys())[0]].module_
            model_dict = models[k.replace("transformer", "ssl_classifier")].state_dict()
            model_dict.update(pretrained_model.state_dict())
            models[k.replace("transformer", "ssl_classifier")].load_state_dict(model_dict)

# Hyperparameter Optimisation


In [11]:
from ntbks_helpers import train_models_
from skorch.dataset import CVSplit
from utils.data.ssldata import get_train_dev_test_ssl
import random

N_EPOCHS = 100 
BATCH_SIZE = 32
IS_RETRAIN = False # if false load precomputed
chckpnt_dirname_old="results/challenge/har/"
chckpnt_dirname="results/challenge/har_new/"

from skssl.utils.helpers import HyperparameterInterpolator

n_steps_per_epoch = len(data_both)//BATCH_SIZE
get_lambda_clf=HyperparameterInterpolator(1, 10, N_EPOCHS*n_steps_per_epoch, mode="linear")

# No Augment

In [12]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=False)
            print(len(data_train))

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_noaug" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
7352

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug best epoch: 1 val_loss: 0.5792485174988745

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
7352

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug best epoch: 20 val_loss: 0.27427897649091754

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
7352

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finet

# All

In [13]:
from skorch.callbacks import Freezer, LRScheduler



for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)
            print(len(data_train))

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
14530

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 4 val_loss: 0.7066817718074813

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
12504

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 1 val_loss: 0.1426355674544359

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
14530

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har100%_lab1%_

## Without Neg Consistency

In [14]:
from skorch.callbacks import Freezer, LRScheduler


for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):  
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nonegcons" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 1,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons best epoch: 1 val_loss: 0.5842806758175149

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons best epoch: 1 val_loss: 0.16748589485590443

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finet

Show number of steps for convergence becauseprobably no unsup is fine but not improving

# No Entropy

In [15]:
from skorch.callbacks import Freezer, LRScheduler


for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_noent" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 1.,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent best epoch: 1 val_loss: 0.6706024456477223

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent best epoch: 1 val_loss: 0.15668519885409757

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent ---

h

## No Unsup

In [16]:
from skorch.callbacks import Freezer, LRScheduler



for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nounsup" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 0,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup best epoch: 1 val_loss: 0.6362027577476903

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup best epoch: 1 val_loss: 0.15670357431684714

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noun

#  SSL Only

In [17]:
from skorch.callbacks import Freezer, LRScheduler


for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc,
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_sslonly" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=True,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly best epoch: 1 val_loss: 0.6721114105882182

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly best epoch: 16 val_loss: 0.1699327095346933

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslo

# Sup Only 

In [18]:
from skorch.callbacks import Freezer, LRScheduler


for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc,
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = data_train.data[(data_train.targets!=-1).squeeze()]
            data_train.targets = data_train.targets[(data_train.targets!=-1).squeeze()]

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_suponly" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=True,
                                                    get_lambda_unsup=lambda: 0,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly best epoch: 2 val_loss: 0.37116682323149275

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly best epoch: 2 val_loss: 0.11194615579274406

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup

# Sup Vanilla

In [19]:
"""
from skorch.callbacks import Freezer, LRScheduler

for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc,
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = data_train.data[(data_train.targets!=-1).squeeze()]
            data_train.targets = data_train.targets[(data_train.targets!=-1).squeeze()]

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_sup_vanilla" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda : 1,
                                                    n_max_elements=None,
                                                    label_perc=None, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=True,
                                                    get_lambda_unsup=lambda: 0,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                               seed=None,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))
"""

'\nfrom skorch.callbacks import Freezer, LRScheduler\n\nfor run in range(3):\n    for sampling_perc in sampling_percentages:\n        for label_perc in label_percentages:\n            load_pretrained_(models, [sampling_perc])\n\n            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")\n\n            data_train, _, data_test = get_train_dev_test_ssl("har", \n                                                              n_labels=label_perc, \n                                                              data_perc=sampling_perc,\n                                                              seed=random.randint(0,10000),\n                                                              dev_size=0,\n                                                              is_augment=True)\n\n            # add test as unlabeled data\n            data_train.data = data_train.data[(data_train.targets!=-1).squeeze()]\n            data_train.targets = data_train.t

# No Lambda CLF

In [20]:
from skorch.callbacks import Freezer, LRScheduler



for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nolambda" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: 1,
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda best epoch: 4 val_loss: 0.7258332047496442

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda best epoch: 1 val_loss: 0.16846257951812668

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_

## Without Label Scaling

In [21]:
from skorch.callbacks import Freezer, LRScheduler


for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nolabscale" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=None, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale best epoch: 1 val_loss: 1.3277767340450073

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale best epoch: 18 val_loss: 0.2315054115376984

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_f

## No Element Scaling

In [22]:
from skorch.callbacks import Freezer, LRScheduler


for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:

            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_noelemscale" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=None,
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale best epoch: 1 val_loss: 0.5697207075480812

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale best epoch: 7 val_loss: 0.1779502336963541

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottlenec

## No Finetuning

In [23]:
from skorch.callbacks import Freezer, LRScheduler


models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)

for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in label_percentages:

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck best epoch: 1 val_loss: 0.8911615639651959

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck best epoch: 2 val_loss: 0.2708716995238013

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck best epoch: 2 val_loss: 0.5388232102667948

--- Loading har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck best epoch: 5 val_loss: 0.1961393239824964

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck best epoch: 1 val_loss: 0.8988694104252315

--- Loading har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottle

In [24]:
def get_percentile_converge_epoch(history, percentile=0.01):
    best_loss = history[-1]['train_loss']
    init_loss = history[0]['train_loss']
    threshold = init_loss + (best_loss - init_loss) * (1 - percentile)
    for h in history:
        if h['train_loss'] <= threshold:
            return h["epoch"]
    

In [35]:
import pandas as pd

out = pd.DataFrame({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}, index=["Accuracy"]
                  ).T.reset_index()#name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out = out.groupby(["models", "lab"]).agg(["mean", "std"])

out.reset_index(drop=False, inplace=True)

out=out.replace({"ssl_classifier_gnp_large_shared_bottleneck_finetune":"Joint UnetNP", 
                      "ssl_classifier_gnp_large_shared_bottleneck":"... Pretrained", 
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug":"... No Oversampling",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale":"... No Data Scaling",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_noent":"... No Entropy Minimization",
                     "ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale": "... No Label Scaling",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda":"... No Supervised Annealing",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons":"... No Negative Consistency",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup":"... No Unsupervised Loss",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly":"... No NP Loss",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly":"... Only Supervised Loss",
                 "ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly":"... Supervised Loss, No Rescaling",
                      "data sample":"Sample Percentage (%)",
                       "lab":"Label Percentage (%)"})


out["lab"] = out["lab"].map(lambda x: int(str(x)[:-1]))


out=out.rename(columns={"models":"Models",  "lab":"Label Percentage (%)"})

#out = out[out.Models != "... Supervised Loss, No Rescaling"]

#out.to_csv("table_ablation_har.csv")

out

df = out
df.Accuracy=df.Accuracy.astype(str).apply(' +/-'.join, axis=1)
df=df.droplevel(1, axis=1)
df = df.iloc[:,:-1]
df = df.pivot_table(index="Label Percentage (%)", columns="Models", values="Accuracy", aggfunc='first')

df.to_csv("results/tables/table_ablation_har.csv")

df

Models,... No Data Scaling,... No Entropy Minimization,... No Label Scaling,... No NP Loss,... No Negative Consistency,... No Oversampling,... No Supervised Annealing,... No Unsupervised Loss,... Pretrained,"... Supervised Loss, No Rescaling",Joint UnetNP
Label Percentage (%),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.8823662481619726 +/-0.003628325965919103,0.8812351543942992 +/-0.0025618712030100964,0.893677185838706 +/-0.012967189672945594,0.8765976699468386 +/-0.004705948210092729,0.8849677638276213 +/-0.004450246708043437,0.8702635448478678 +/-0.0031345789979755935,0.8854202013346907 +/-0.011800295311159352,0.8716208573690759 +/-0.00463196363068507,0.8620065603438526 +/-0.01708925101078104,0.9040832485013008 +/-0.013515032337954007,0.883497341929646 +/-0.008286371635512803
10,0.9518154054971157 +/-0.0018892990712012139,0.9453681710213777 +/-0.001795555691255216,0.9533989367718583 +/-0.007567346989561189,0.9572446555819477 +/-0.0053329601783175095,0.9493269992082345 +/-0.001929501426222352,0.9347358896052483 +/-0.0016738659186686254,0.9565659993213438 +/-0.008332561355825331,0.952607171134487 +/-0.003737747232633912,0.9317950458092975 +/-0.006219987370147012,0.9639181088112205 +/-0.0049445748205845805,0.9519285148738831 +/-0.004754631728760044


In [30]:
df.pivot_table(index="Label Percentage (%)", columns="Models", values="Accuracy", aggfunc='first')

Models,... No Data Scaling,... No Entropy Minimization,... No Label Scaling,... No NP Loss,... No Negative Consistency,... No Oversampling,... No Supervised Annealing,... No Unsupervised Loss,... Pretrained,"... Supervised Loss, No Rescaling",Joint UnetNP
Label Percentage (%),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.8823662481619726 +/-0.003628325965919103,0.8812351543942992 +/-0.0025618712030100964,0.893677185838706 +/-0.012967189672945594,0.8765976699468386 +/-0.004705948210092729,0.8849677638276213 +/-0.004450246708043437,0.8702635448478678 +/-0.0031345789979755935,0.8854202013346907 +/-0.011800295311159352,0.8716208573690759 +/-0.00463196363068507,0.8620065603438526 +/-0.01708925101078104,0.9040832485013008 +/-0.013515032337954007,0.883497341929646 +/-0.008286371635512803
10,0.9518154054971157 +/-0.0018892990712012139,0.9453681710213777 +/-0.001795555691255216,0.9533989367718583 +/-0.007567346989561189,0.9572446555819477 +/-0.0053329601783175095,0.9493269992082345 +/-0.001929501426222352,0.9347358896052483 +/-0.0016738659186686254,0.9565659993213438 +/-0.008332561355825331,0.952607171134487 +/-0.003737747232633912,0.9317950458092975 +/-0.006219987370147012,0.9639181088112205 +/-0.0049445748205845805,0.9519285148738831 +/-0.004754631728760044


In [33]:
import pandas as pd

out = pd.DataFrame({k:[v.history[-1]["valid_acc"], len(v.history), get_percentile_converge_epoch(v.history)] 
                 for k,v in data_trainers.items()}, index=["Accuracy", "Convergence Epoch", "99% Convergence Epoch"]
                  ).T.reset_index()#name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out = out.groupby(["models", "lab"]).agg(["mean", "std"])

out.reset_index(drop=False, inplace=True)

out=out.replace({"ssl_classifier_gnp_large_shared_bottleneck_finetune":"Joint UnetNP", 
                      "ssl_classifier_gnp_large_shared_bottleneck":"... Pretrained", 
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug":"... No Oversampling",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale":"... No Data Scaling",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_noent":"... No Entropy Minimization",
                     "ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale": "... No Label Scaling",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda":"... No Supervised Annealing",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons":"... No Negative Consistency",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup":"... No Unsupervised Loss",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly":"... No NP Loss",
                      "ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly":"... Only Supervised Loss",
                 "ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly":"... Supervised Loss, No Rescaling",
                      "data sample":"Sample Percentage (%)",
                       "lab":"Label Percentage (%)"})


out["lab"] = out["lab"].map(lambda x: int(str(x)[:-1]))


out=out.rename(columns={"models":"Models",  "lab":"Label Percentage (%)"})



out.to_csv("results/tables/table_ablation_har_nopivot.csv")

out

Unnamed: 0_level_0,Models,Label Percentage (%),Accuracy,Accuracy,Convergence Epoch,Convergence Epoch,99% Convergence Epoch,99% Convergence Epoch
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
0,... Pretrained,1,0.862007,0.017089,3.666667,3.785939,2.666667,2.081666
1,... Pretrained,10,0.931795,0.00622,8.666667,3.785939,1.333333,0.57735
2,Joint UnetNP,1,0.883497,0.008286,5.333333,3.21455,2.666667,0.57735
3,Joint UnetNP,10,0.951929,0.004755,8.0,7.0,4.333333,3.511885
4,... No Oversampling,1,0.870264,0.003135,17.333333,6.806859,9.0,5.0
5,... No Oversampling,10,0.934736,0.001674,13.333333,6.506407,1.666667,0.57735
6,... No Data Scaling,1,0.882366,0.003628,2.666667,0.57735,2.0,0.0
7,... No Data Scaling,10,0.951815,0.001889,8.666667,2.081666,2.0,0.0
8,... No Entropy Minimization,1,0.881235,0.002562,8.333333,12.701706,2.0,1.732051
9,... No Entropy Minimization,10,0.945368,0.001796,3.666667,1.527525,3.0,1.0
