# Human Activity Recognition - SSL JOINT Consistency

Last Update : 31 July 2019

In [1]:
N_THREADS = 8
# Nota Bene : notebooks don't deallocate GPU memory
IS_FORCE_CPU = False # can also be set in the trainer

## Environment

In [2]:
cd ..

/conv


In [3]:
%autosave 600
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# CENTER PLOTS
from IPython.core.display import HTML
display(HTML(""" <style> .output_png {display: table-cell; text-align: center; margin:auto; }
.prompt display:none;}  </style>"""))

import os
if IS_FORCE_CPU:
    os.environ['CUDA_VISIBLE_DEVICES'] = ""

import sys
sys.path.append("notebooks")

import numpy as np
import matplotlib.pyplot as plt
from functools import partial
import pandas as pd
import h5py


import torch
torch.set_num_threads(N_THREADS)

Autosaving every 600 seconds


# Dataset

In [4]:
from skssl.transformers.neuralproc.datasplit import CntxtTrgtGetter, GetRandomIndcs, get_all_indcs
from utils.data.tsdata import get_timeseries_dataset, SparseMultiTimeSeriesDataset

get_cntxt_trgt_test = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.1, max_n_indcs=0.5),
                                     targets_getter=get_all_indcs,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt_feat = CntxtTrgtGetter(contexts_getter=get_all_indcs,
                                     targets_getter=get_all_indcs,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.99),
                                 targets_getter=GetRandomIndcs(min_n_indcs=0.5, max_n_indcs=0.99),
                                 is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

In [5]:
data_both = get_timeseries_dataset("har")(split="both")

def cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=False):
    def mycollate(batch):
        min_length = min([v.size(0) for b in batch for k,v in b[0].items() if "X" in k])
        # chose first min_legth of each (assumes that randomized)
        
        batch = [({k:v[:min_length, ...] for k,v in b[0].items()}, b[1]) for b in batch]        
        collated = torch.utils.data.dataloader.default_collate(batch)
        
        X = collated[0]["X"]
        y = collated[0]["y"]
        
        if is_repeat_batch:
            
            X = torch.cat([X,X], dim=0)
            y = torch.cat([y,y], dim=0)
            collated[1] = torch.cat([collated[1], collated[1]], dim=0) # targets
        
        collated[0]["X"], collated[0]["y"], collated[0]["X_trgt"], collated[0]["y_trgt"] = get_cntxt_trgt(X, y)
        
        return collated
    return mycollate

In [6]:
X_DIM = 1  # 1D spatial input (although actually 2 but the first is for sparse channels)
Y_DIM = data_both.data.shape[-1] # multiple channels
N_TARGETS = len(np.unique(data_both.targets))

sampling_percentages = [0.05, 0.1, 0.3, 0.5, 0.7, 1]
label_percentages = [N_TARGETS, N_TARGETS*2, 0.01, 0.05, 0.1, 0.3, 0.5, 1]

## Model

In [7]:
import torch.nn as nn
from skssl.transformers import GlobalNeuralProcess, NeuralProcessLoss, AttentiveNeuralProcess, NeuralProcessSSLLoss
from skssl.utils.helpers import rescale_range
from skssl.predefined import UnetCNN, CNN, MLP, SparseSetConv, SetConv, MlpRBF, GaussianRBF, BatchSparseSetConv
from skssl.transformers.neuralproc.datasplit import precomputed_cntxt_trgt_split
from utils.helpers import count_parameters

In [8]:
from copy import deepcopy

models = {}

min_std=5e-3

unet = partial(UnetCNN,
               Conv=torch.nn.Conv1d,
               Pool=torch.nn.MaxPool1d,
               upsample_mode="linear",
               n_layers=18,
               is_double_conv=True,
               is_depth_separable=True,
               Normalization=torch.nn.BatchNorm1d,
               is_chan_last=True,
               bottleneck=None,
               kernel_size=7,
               max_nchannels=256,
              is_force_same_bottleneck=True,
               _is_summary=True,
              )

kwargs = dict(x_dim=X_DIM, 
              y_dim=Y_DIM,
              min_std=min_std,
                n_tmp_queries=128,
                r_dim=64,
              keys_to_tmp_attn=partial(SetConv, RadialBasisFunc=GaussianRBF),
              TmpSelfAttn=unet,
              tmp_to_queries_attn=partial(SetConv, RadialBasisFunc=GaussianRBF),
              is_skip_tmp=False,
              is_use_x=False,
              get_cntxt_trgt=precomputed_cntxt_trgt_split,
              is_encode_xy=False,
             Classifier=partial(MLP, input_size=256+Y_DIM*4, output_size=N_TARGETS, 
                                dropout=0., hidden_size=128, n_hidden_layers=3, is_res=True))

models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)

kwargs_bis = deepcopy(kwargs)
kwargs_bis["Classifier"] = None

models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)

In [9]:
from utils.helpers import count_parameters
for k,v in models.items():
    print(k, "- N Param:", count_parameters(v()))

ssl_classifier_gnp_large_shared_bottleneck - N Param: 1078238
transformer_gnp_large_shared_bottleneck - N Param: 1006936


In [10]:
def load_pretrained_(models, sampling_percentages):
    # ALREADY INITALIZE TO BE ABLE TO LOAD
    models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)()

    kwargs_bis = deepcopy(kwargs)
    kwargs_bis["Classifier"] = None

    models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)()

    # load all transformers
    loaded_models = {}
    for sampling_perc in sampling_percentages:
        for k, m in models.items():
            if "transformer" not in k:
                continue

            out = train_models_({"{}%har".format(int(sampling_perc*100)): 
                                                (None, None)}, 
                                  {k :m },
                                   chckpnt_dirname=chckpnt_dirname_old,
                                seed=None,
                                   is_retrain=False)

            pretrained_model = out[list(out.keys())[0]].module_
            model_dict = models[k.replace("transformer", "ssl_classifier")].state_dict()
            model_dict.update(pretrained_model.state_dict())
            models[k.replace("transformer", "ssl_classifier")].load_state_dict(model_dict)

# Hyperparameter Optimisation


In [11]:
from ntbks_helpers import train_models_
from skorch.dataset import CVSplit
from utils.data.ssldata import get_train_dev_test_ssl
import random

N_EPOCHS = 100 
BATCH_SIZE = 32
IS_RETRAIN = False # if false load precomputed
chckpnt_dirname_old="results/challenge/har/"
chckpnt_dirname="results/challenge/har_new/"

from skssl.utils.helpers import HyperparameterInterpolator

n_steps_per_epoch = len(data_both)//BATCH_SIZE
get_lambda_clf=HyperparameterInterpolator(1, 10, N_EPOCHS*n_steps_per_epoch, mode="linear")

# No Augment

In [12]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=False)
            print(len(data_train))

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_noaug" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
7352

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug best epoch: 20 val_loss: 0.27427897649091754

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
7352

--- Loading har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug ---

har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug best epoch: 8 val_loss: 0.18001823478688497

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
7352

--- Loading har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_f

In [13]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug,10%,100%,3.0,0.934736,0.001674,0.932813,0.93417,0.935528,0.935697,0.935867


# All

In [14]:
from skorch.callbacks import Freezer, LRScheduler


data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)
            print(len(data_train))

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
12504

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 1 val_loss: 0.1426355674544359

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
12504

--- Loading har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 1 val_loss: 0.14788568104815167

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
12504

--- Loading har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har100%_la

In [15]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune,10%,100%,3.0,0.951929,0.004755,0.947065,0.94961,0.952155,0.95436,0.956566


## Without Neg Consistency

In [16]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):  
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nonegcons" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 1,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons best epoch: 1 val_loss: 0.16748589485590443

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons ---

har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons best epoch: 2 val_loss: 0.1616656260042232

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_fi

In [17]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons,10%,100%,3.0,0.949327,0.00193,0.947743,0.948252,0.948761,0.950119,0.951476


# No Entropy

In [18]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_noent" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 1.,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent best epoch: 1 val_loss: 0.15668519885409757

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent ---

har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent best epoch: 1 val_loss: 0.1925992108135756

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent ---

In [19]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_noent,10%,100%,3.0,0.945368,0.001796,0.943332,0.94469,0.946047,0.946386,0.946725


## No Unsup

In [20]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nounsup" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 0,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup best epoch: 1 val_loss: 0.15670357431684714

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup ---

har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup best epoch: 1 val_loss: 0.16049837192358557

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_

In [21]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup,10%,100%,3.0,0.952607,0.003738,0.948761,0.950797,0.952833,0.95453,0.956227


#  SSL Only

In [12]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc,
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_sslonly" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=True,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                               seed=None,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly ---



HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m0.4417[0m       [32m0.9179[0m        [35m0.2222[0m     +  81.2856


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      2        [36m0.2254[0m       [32m0.9365[0m        [35m0.2180[0m     +  81.4742


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      3        0.2322       0.9175        0.3426        79.5648


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      4        [36m0.1684[0m       0.9233        0.4265        80.1608


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      5        0.1774       0.9199        0.3272        78.5329


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      6        0.3132       0.9342        [35m0.1853[0m        80.5541


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      7        [36m0.0831[0m       0.9332        0.3177        80.3634


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      8        0.2907       [32m0.9416[0m        0.2979     +  79.2275


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      9        [36m0.0259[0m       0.9410        0.2815        80.1413


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     10        0.1804       0.9332        0.4104        80.9457


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     11        [36m0.0036[0m       0.9403        0.4545        52.6508


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     12       [36m-0.0321[0m       0.9379        0.4363        53.5234


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     13        0.1342       0.9359        0.4372        56.1822


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     14       -0.0294       0.9355        0.4397        55.7584


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     15        0.6576       [32m0.9535[0m        0.2205     +  56.1551


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     16        0.1499       [32m0.9610[0m        [35m0.1699[0m     +  59.7283


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     17        0.0380       0.9484        0.3082        79.3126


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     18        0.1501       0.9508        0.2351        80.6645


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     19        0.2064       0.9545        0.2387        81.4052


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     20       -0.0122       0.9532        0.2655        79.7867


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     21        0.0504       0.9518        0.2843        80.6573


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     22        0.0338       0.9505        0.3636        77.3600


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     23        1.0224       0.9501        0.2616        79.1576


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     24        0.0465       0.9430        0.4965        80.8012


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     25        0.3215       0.9562        0.2582        80.0738


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     26        0.1287       0.9559        0.2612        81.2201


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     27        0.6983       0.9539        0.2628        79.9133


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     28        0.0938       0.9471        0.3304        78.7809


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     29        0.1093       0.9359        0.3704        80.5431


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     30        0.1500       0.9393        0.3903        79.6116


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly best epoch: 16 val_loss: 0.1699327095346933

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly ---



HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m0.4851[0m       [32m0.9386[0m        [35m0.1679[0m     +  80.9580


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      2        [36m0.3073[0m       [32m0.9440[0m        0.1987     +  80.1387


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      3        [36m0.2316[0m       0.9365        0.2588        58.7042


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      4        [36m0.0852[0m       [32m0.9566[0m        [35m0.1664[0m     +  55.8342


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      5       [36m-0.0229[0m       0.9481        0.2144        54.8578


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      6        0.3217       0.9376        0.2315        56.0347


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      7        0.4279       [32m0.9596[0m        [35m0.1598[0m     +  55.8229


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      8        0.1866       0.9464        0.2456        53.9710


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

      9        0.1498       0.9505        0.1924        55.9046


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     10       -0.0129       0.9511        0.3378        54.3489


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     11       -0.0095       0.9539        0.2092        53.2954


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     12        0.0055       0.9454        0.3480        52.4598


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     13        0.8728       0.9423        0.2934        53.5952


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     14        0.3215       0.9488        0.3230        53.4625


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     15        0.0818       0.9522        0.2538        55.1835


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     16        0.0504       0.9460        0.3109        55.1996


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     17        0.1241       0.9321        0.3091        54.7527


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     18        0.0342       0.9427        0.2807        55.6552


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     19        0.1476       0.9539        0.2480        55.3252


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     20        0.0428       0.9365        0.4001        54.9211


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

     21        0.4555       0.9498        0.2313        55.4418


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly best epoch: 7 val_loss: 0.15977995857611565


In [13]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly,10%,100%,2.0,0.960299,0.00096,0.95962,0.959959,0.960299,0.960638,0.960977


## SUp Only

In [14]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

random.seed(1)

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")
            
            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc,
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)


            # add test as unlabeled data
            data_train.data = data_train.data[(data_train.targets!=-1).squeeze()]
            data_train.targets = data_train.targets[(data_train.targets!=-1).squeeze()]

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_suponly" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=True,
                                                    get_lambda_unsup=lambda: 0,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                    seed=None,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly ---



HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m1.7205[0m       [32m0.9420[0m        [35m0.1594[0m     +  17.2882


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      2        [36m0.5487[0m       [32m0.9522[0m        [35m0.1119[0m     +  16.9470


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      3        [36m0.5105[0m       0.9352        0.2376        17.3083


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      4        0.5581       0.9491        0.1546        17.1064


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      5        [36m0.3509[0m       0.9427        0.2236        17.4736


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      6        [36m0.2475[0m       0.9447        0.2271        17.4863


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      7        [36m0.1133[0m       0.9450        0.2571        17.4543


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      8        0.1590       0.9253        0.4345        17.1062


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      9        0.5702       0.9440        0.1866        16.9203


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     10        0.2507       0.9477        0.1835        17.1786


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     11        0.3096       0.9403        0.2373        17.7751


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     12        0.4494       0.9498        0.2714        17.6188


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     13        0.1617       0.9498        0.2671        17.6785


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     14        0.1607       [32m0.9528[0m        0.2394     +  17.1189


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     15        [36m0.1072[0m       0.9477        0.3096        17.5728


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     16        0.1534       0.9318        0.3020        16.9922


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     17        0.7972       [32m0.9583[0m        0.1365     +  17.7162


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     18        0.1902       0.9498        0.1947        17.5865


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     19        0.2807       0.9379        0.2225        17.4033


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     20        0.2163       0.9420        0.3061        17.7230


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     21        0.3057       0.9233        0.5980        17.6642


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     22        1.0109       0.9284        0.3515        17.3006


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     23        0.6165       0.9352        0.3549        17.6430


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     24        0.2033       0.9355        0.3523        17.5848


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     25        0.1782       0.9382        0.3394        16.6851


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     26        0.1281       0.9345        0.3433        17.4630


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     27        [36m0.0849[0m       0.9437        0.2857        17.5177


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     28        0.1738       0.9413        0.3156        17.4152


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     29        0.8849       0.9542        0.3972        17.3130


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     30        2.1433       0.9376        0.4193        17.3452


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     31        0.2265       0.9365        0.3928        17.4978


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly best epoch: 2 val_loss: 0.11194615579274406

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly ---



HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m2.0968[0m       [32m0.9393[0m        [35m0.1887[0m     +  17.6020


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      2        [36m0.5852[0m       0.9355        0.1984        17.3802


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      3        [36m0.3013[0m       0.9386        0.2158        17.3227


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      4        0.3896       [32m0.9549[0m        [35m0.1170[0m     +  16.8216


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      5        0.5356       0.9535        0.1525        17.2007


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      6        0.4578       0.9539        0.1842        17.3159


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      7        [36m0.1223[0m       [32m0.9593[0m        0.1819     +  17.3260


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      8        0.4661       [32m0.9661[0m        0.1204     +  17.0947


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      9        0.9383       0.9498        0.1822        16.5356


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     10        0.5322       0.9569        0.1519        17.2610


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     11        0.2184       0.9617        0.1346        17.2033


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     12        0.2378       0.9549        0.1781        17.5254


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     13        [36m0.1151[0m       0.9600        0.1333        17.7832


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     14        0.1597       0.9654        0.1217        17.4616


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     15        [36m0.0777[0m       0.9603        0.1510        17.4753


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     16        0.1305       0.9508        0.2105        17.0208


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     17        0.9957       0.9332        0.3301        16.4667


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     18        0.6338       0.9501        0.1676        16.9288


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     19        0.2291       0.9559        0.2176        17.2005


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     20        0.1676       0.9457        0.2701        16.5841


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     21        0.1567       0.9505        0.2601        16.9154


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     22        0.1856       0.9488        0.2931        16.0577


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly best epoch: 4 val_loss: 0.11700662759186091

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly ---



HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m1.7188[0m       [32m0.9291[0m        [35m0.1610[0m     +  17.4609


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      2        [36m0.5189[0m       [32m0.9528[0m        [35m0.1379[0m     +  17.0388


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      3        [36m0.3240[0m       [32m0.9593[0m        0.1583     +  16.5276


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      4        [36m0.2981[0m       [32m0.9596[0m        [35m0.1262[0m     +  16.2472


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      5        0.5236       0.9216        0.3746        16.4081


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      6        [36m0.1333[0m       [32m0.9674[0m        [35m0.1170[0m     +  16.5778


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      7        [36m0.1246[0m       0.9664        0.1190        17.4434


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      8        0.4408       0.9471        0.1968        17.1569


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      9        0.2640       0.9437        0.2380        17.2770


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     10        0.1300       0.9562        0.1961        17.2334


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     11        [36m0.0856[0m       0.9589        0.2079        17.2196


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     12        0.1434       0.9610        0.1802        17.3788


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     13        0.1442       0.9467        0.3021        17.1873


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     14        0.2874       0.9545        0.1884        16.8735


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     15        0.5641       0.9491        0.1636        17.6431


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     16        0.3760       0.9566        0.1812        17.3251


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     17        0.1470       0.9539        0.2167        17.5550


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     18        0.0942       0.9576        0.2046        17.5022


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     19        0.0968       0.9600        0.2575        17.1090


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     20        0.1680       0.9494        0.2645        16.5031


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly best epoch: 6 val_loss: 0.1170268439825891


In [15]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly,10%,100%,3.0,0.963918,0.004945,0.958263,0.962165,0.966067,0.966746,0.967424


# Sup Only No Scaling

In [14]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc,
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = data_train.data[(data_train.targets!=-1).squeeze()]
            data_train.targets = data_train.targets[(data_train.targets!=-1).squeeze()]

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_sup_vanilla" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda : 1,
                                                    n_max_elements=None,
                                                    label_perc=None, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=True,
                                                    get_lambda_unsup=lambda: 0,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=True,
                                               seed=None,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))


--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla ---



HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m0.2326[0m       [32m0.9233[0m        [35m0.2089[0m     +  17.7597


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      2        [36m0.0824[0m       [32m0.9535[0m        [35m0.1454[0m     +  17.5394


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      3        [36m0.0482[0m       [32m0.9610[0m        [35m0.1342[0m     +  17.2043


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      4        0.0490       0.9481        0.1779        17.8247


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      5        [36m0.0459[0m       0.9372        0.2108        17.7624


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      6        [36m0.0389[0m       0.9477        0.1558        17.2868


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      7        0.0447       0.9494        0.1633        17.3184


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      8        [36m0.0306[0m       0.9562        0.1450        17.1968


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      9        [36m0.0279[0m       [32m0.9647[0m        [35m0.1194[0m     +  17.6493


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     10        [36m0.0196[0m       0.9627        0.1326        17.5911


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     11        0.0343       0.9467        0.2448        17.4581


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     12        0.0234       0.9488        0.1880        17.5720


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     13        0.0294       0.9623        0.1638        17.7644


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     14        0.0358       0.9572        0.1716        17.0836


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     15        0.0341       0.9457        0.2181        17.5609


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     16        0.0234       0.9430        0.2331        17.6432


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     17        [36m0.0189[0m       0.9369        0.2663        17.6876


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     18        0.0291       0.9301        0.2348        17.5801


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     19        0.0196       0.9437        0.2168        17.4684


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     20        0.0311       0.9348        0.3361        17.4953


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     21        0.0246       0.9359        0.2405        17.5650


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     22        0.0229       0.9430        0.3039        17.5389


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     23        0.0204       0.9379        0.2426        16.9236


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla best epoch: 9 val_loss: 0.11944592917293632

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla ---



HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m0.2482[0m       [32m0.9430[0m        [35m0.1434[0m     +  16.0712


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      2        [36m0.0784[0m       [32m0.9518[0m        0.1468     +  15.9407


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      3        [36m0.0514[0m       [32m0.9586[0m        0.1504     +  16.0849


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      4        [36m0.0457[0m       [32m0.9647[0m        [35m0.1242[0m     +  15.9126


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      5        0.0478       0.9576        0.1478        16.3576


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      6        [36m0.0439[0m       0.9634        [35m0.1037[0m        15.7396


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      7        [36m0.0293[0m       0.9498        0.1807        16.3389


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      8        0.0361       0.9623        0.1621        16.1349


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      9        [36m0.0189[0m       [32m0.9667[0m        0.1342     +  16.2840


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     10        0.0258       0.9606        0.1274        16.0784


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     11        0.0231       0.9600        0.1396        16.2483


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     12        0.0299       0.9522        0.1935        16.1761


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     13        0.0309       0.9528        0.2608        16.2893


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     14        0.0236       0.9617        0.1389        15.9906


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     15        0.0274       0.9559        0.1984        16.2180


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     16        0.0240       0.9532        0.1846        16.2448


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     17        0.0195       0.9603        0.1329        15.8780


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     18        0.0284       0.9606        0.1548        16.3068


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     19        0.0225       0.9515        0.2241        15.8676


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     20        0.0345       0.9403        0.3249        16.1969


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     21        0.0526       0.9620        0.1884        16.2671


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     22        0.0358       0.9433        0.2477        15.8537


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     23        0.0269       0.9539        0.2722        15.8218


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla best epoch: 6 val_loss: 0.1036831834253959

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla ---



HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m0.2588[0m       [32m0.9287[0m        [35m0.2280[0m     +  16.0761


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      2        [36m0.1147[0m       [32m0.9427[0m        [35m0.1895[0m     +  16.2848


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      3        [36m0.0690[0m       [32m0.9569[0m        [35m0.1292[0m     +  16.1193


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      4        [36m0.0460[0m       0.9481        0.2074        15.8959


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      5        [36m0.0316[0m       0.9501        0.2123        16.1078


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      6        0.0544       0.9511        0.1601        16.2500


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      7        0.0344       0.9372        0.2712        17.0520


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      8        0.0444       0.9525        0.2027        24.5794


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

      9        0.0319       0.9518        0.2220        24.6843


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     10        0.0367       0.9481        0.2474        24.5883


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     11        [36m0.0231[0m       0.9474        0.2332        24.8233


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     12        0.0432       0.9498        0.2309        25.2457


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     13        0.0297       0.9420        0.2838        24.5041


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     14        0.0239       0.9403        0.2933        25.1758


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     15        [36m0.0174[0m       0.9369        0.3352        24.5751


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     16        0.0406       0.9471        0.2295        24.6295


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

     17        [36m0.0167[0m       0.9528        0.2350        24.9286


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla best epoch: 3 val_loss: 0.12915067612861947


In [15]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla,10%,100%,3.0,0.962787,0.005194,0.956905,0.960808,0.96471,0.965728,0.966746


# No Lambda CLF

In [24]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nolambda" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: 1,
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda best epoch: 1 val_loss: 0.16846257951812668

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda ---

har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda best epoch: 1 val_loss: 0.14787215202754517

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_finet

In [25]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda,10%,100%,3.0,0.956566,0.008333,0.947404,0.953003,0.958602,0.961147,0.963692


## Without Label Scaling

In [26]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nolabscale" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=None, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale best epoch: 18 val_loss: 0.2315054115376984

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale ---

har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale best epoch: 2 val_loss: 0.18339065070230307

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottlene

In [27]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale,10%,100%,3.0,0.953399,0.007567,0.947743,0.949101,0.950458,0.956227,0.961995


## No Element Scaling

In [28]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:

            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_noelemscale" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=None,
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale best epoch: 7 val_loss: 0.1779502336963541

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale ---

har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale best epoch: 7 val_loss: 0.20060293440661897

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottl

In [29]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale,10%,100%,3.0,0.951815,0.001889,0.949779,0.950967,0.952155,0.952833,0.953512


In [29]:
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_acc_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale epoch: 7 val_loss: 0.1779502336963541 val_acc: 0.9429928741092637
har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale epoch: 7 val_loss: 0.20060293440661897 val_acc: 0.9399389209365456
har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale epoch: 7 val_loss: 0.15404326194903015 val_acc: 0.9535120461486257


## No Finetuning

In [30]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}
models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.1]:

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck best epoch: 2 val_loss: 0.2708716995238013

--- Loading har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck best epoch: 5 val_loss: 0.1961393239824964

--- Loading har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck best epoch: 2 val_loss: 0.19988441305480895


In [31]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck,10%,100%,3.0,0.931795,0.00622,0.925008,0.92908,0.933152,0.935188,0.937224


In [31]:
#0.9644 with both supervised and unsupervised with finetuning

for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_acc_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

har100%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck epoch: 2 val_loss: 0.2708716995238013 val_acc: 0.9121140142517815
har100%_lab10%_run1/ssl_classifier_gnp_large_shared_bottleneck epoch: 5 val_loss: 0.1961393239824964 val_acc: 0.9307770614183916
har100%_lab10%_run2/ssl_classifier_gnp_large_shared_bottleneck epoch: 2 val_loss: 0.19988441305480895 val_acc: 0.9270444519850696


# Evaluation Sampling Percentage

In [32]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in sampling_percentages:
        for label_perc in [1]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 5%har/transformer_gnp_large_shared_bottleneck ---

5%har/transformer_gnp_large_shared_bottleneck best epoch: 98 val_loss: -2.164531707763672

--- Loading har5%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har5%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 5 val_loss: 0.6508865955121079

--- Loading 10%har/transformer_gnp_large_shared_bottleneck ---

10%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -2.98053588682008

--- Loading har10%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har10%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 11 val_loss: 0.37507563022431145

--- Loading 30%har/transformer_gnp_large_shared_bottleneck ---

30%har/transformer_gnp_large_shared_bottleneck best epoch: 87 val_loss: -4.5797279249117215

--- Loading har30%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har30%_lab100%_run0/ssl_classifier

In [33]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune,100%,10%,3.0,0.902047,0.000706,0.901256,0.901765,0.902273,0.902443,0.902613
ssl_classifier_gnp_large_shared_bottleneck_finetune,100%,100%,3.0,0.976021,0.007296,0.968782,0.972345,0.975908,0.97964,0.983373
ssl_classifier_gnp_large_shared_bottleneck_finetune,100%,30%,3.0,0.957245,0.000588,0.956905,0.956905,0.956905,0.957414,0.957923
ssl_classifier_gnp_large_shared_bottleneck_finetune,100%,5%,3.0,0.820269,0.008146,0.810994,0.817272,0.823549,0.824907,0.826264
ssl_classifier_gnp_large_shared_bottleneck_finetune,100%,50%,3.0,0.963353,0.007907,0.95453,0.960129,0.965728,0.967764,0.9698
ssl_classifier_gnp_large_shared_bottleneck_finetune,100%,70%,3.0,0.975908,0.001796,0.97455,0.97489,0.975229,0.976586,0.977944


In [33]:
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_acc_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

har5%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 5 val_loss: 0.6508865955121079 val_acc: 0.7553444180522565
har10%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 11 val_loss: 0.37507563022431145 val_acc: 0.8961655921275874
har30%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 10 val_loss: 0.23837404603831922 val_acc: 0.9491007804546997
har50%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 1 val_loss: 0.1840641939482286 val_acc: 0.9474041398031897
har70%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 10 val_loss: 0.0867358241843336 val_acc: 0.9752290464879538
har100%_lab100%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 32 val_loss: 0.09407840634266076 val_acc: 0.9799796403121819
har5%_lab100%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 4 val_loss: 0.67058725331976 val_acc: 0.7451645741431965
har10%_lab100%_run1/ssl_classi

# Evaluation Label Percentage

In [34]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [0.5]:
        for label_perc in label_percentages:
            is_retrain = IS_RETRAIN
            if label_perc == 1: #already computed previous cell
                is_retrain = False

            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))


--- Loading 50%har/transformer_gnp_large_shared_bottleneck ---

50%har/transformer_gnp_large_shared_bottleneck best epoch: 72 val_loss: -5.531349883959131

--- Loading har50%_lab600%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har50%_lab600%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 1 val_loss: 1.7390090251802226

--- Loading 50%har/transformer_gnp_large_shared_bottleneck ---

50%har/transformer_gnp_large_shared_bottleneck best epoch: 72 val_loss: -5.531349883959131

--- Loading har50%_lab1200%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har50%_lab1200%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 4 val_loss: 1.7610424022816948

--- Loading 50%har/transformer_gnp_large_shared_bottleneck ---

50%har/transformer_gnp_large_shared_bottleneck best epoch: 72 val_loss: -5.531349883959131

--- Loading har50%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har50%_lab1%_run0/ssl_classifier


--- Loading har50%_lab100%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har50%_lab100%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 2 val_loss: 0.10383275301932965


In [35]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune,1%,50%,3.0,0.868793,0.00153,0.867323,0.868001,0.86868,0.869528,0.870377
ssl_classifier_gnp_large_shared_bottleneck_finetune,10%,50%,3.0,0.944237,0.00153,0.942654,0.943502,0.94435,0.945029,0.945707
ssl_classifier_gnp_large_shared_bottleneck_finetune,100%,50%,3.0,0.963353,0.007907,0.95453,0.960129,0.965728,0.967764,0.9698
ssl_classifier_gnp_large_shared_bottleneck_finetune,1200%,50%,3.0,0.337066,0.020202,0.314218,0.329318,0.344418,0.34849,0.352562
ssl_classifier_gnp_large_shared_bottleneck_finetune,30%,50%,3.0,0.959394,0.001371,0.958602,0.958602,0.958602,0.95979,0.960977
ssl_classifier_gnp_large_shared_bottleneck_finetune,5%,50%,3.0,0.93779,0.007514,0.932474,0.933492,0.93451,0.940448,0.946386
ssl_classifier_gnp_large_shared_bottleneck_finetune,50%,50%,3.0,0.962787,0.003399,0.959281,0.961147,0.963013,0.96454,0.966067
ssl_classifier_gnp_large_shared_bottleneck_finetune,600%,50%,3.0,0.32157,0.01694,0.307431,0.312182,0.316932,0.328639,0.340346


In [37]:
# if bad has to try freezing again and smaller params
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_acc_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

har50%_lab600%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 3 val_loss: 1.783490130688564 val_acc: 0.3169324737020699
har50%_lab1200%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 21 val_loss: 1.7878198685142845 val_acc: 0.3525619273837801
har50%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 11 val_loss: 1.0262672793553742 val_acc: 0.8673227010519172
har50%_lab5%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 9 val_loss: 0.25123960873940704 val_acc: 0.9324737020699015
har50%_lab10%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 15 val_loss: 0.3563840006180122 val_acc: 0.9443501866304717
har50%_lab30%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 32 val_loss: 0.2326741750093893 val_acc: 0.9586019681031558
har50%_lab50%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 17 val_loss: 0.1826831086999811 val_acc: 0.9630132337970818
har50%_lab100%_run0/ssl_classifier_

* 0.9304 best without n max elements
* 0.9277: jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies
* 0.9857 : jsd | 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze
* 0.9623 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.5] | linear interpolator (1,5)


* 0.9671 : jsd | no scale ? | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.1,0.5]
* 0.9365 : jsd | no scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.01,0.5] | linear interpolator
* 0.9824 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.5] | linear interpolator


* 0.9844 : jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze
* 0.9817 : jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | cntxt [0.01,0.5]


* 0.9627 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.5]
* 0.9572 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.9] | linear interpolator


* 0.9321: jsd | no 0.1 scale | n_max_elements | 100 sampels
* 0.9365: jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.1 entropies


* 0.9450 : jsd | 0.2 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.01,0.5]
* 0.9315 : jsd | no scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.01,0.5]

In [28]:
# load all transformers
loaded_models = {}
for sampling_perc in sampling_percentages:
    for k, m in models.items():
        if "transformer" not in k:
            continue
            
        out = train_models_({"{}%har".format(int(sampling_perc*100)): 
                                            (None, None)}, 
                              {k :m },
                               chckpnt_dirname=chckpnt_dirname,
                               is_retrain=False)
        
        pretrained_model = out[list(out.keys())[0]].module_
        model_dict = models[k.replace("transformer", "ssl_classifier")].state_dict()
        model_dict.update(pretrained_model.state_dict())
        models[k.replace("transformer", "ssl_classifier")].load_state_dict(model_dict)
        


--- Loading 5%har/transformer_gnp_large_shared_bottleneck ---

5%har/transformer_gnp_large_shared_bottleneck best epoch: 98 val_loss: -2.164531707763672

--- Loading 10%har/transformer_gnp_large_shared_bottleneck ---

10%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -2.98053588682008

--- Loading 30%har/transformer_gnp_large_shared_bottleneck ---

30%har/transformer_gnp_large_shared_bottleneck best epoch: 87 val_loss: -4.5797279249117215

--- Loading 50%har/transformer_gnp_large_shared_bottleneck ---

50%har/transformer_gnp_large_shared_bottleneck best epoch: 72 val_loss: -5.531349883959131

--- Loading 70%har/transformer_gnp_large_shared_bottleneck ---

70%har/transformer_gnp_large_shared_bottleneck best epoch: 98 val_loss: -7.103462460897501

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622


In [29]:
from torch.distributions import Normal, Categorical, kl_divergence

In [30]:
t1 = torch.tensor([0.2, 0.8], requires_grad=True)
t2 = torch.tensor([0.7, 0.3], requires_grad=True)
#torch.softmax(t2, -1)
#torch.softmax(t1, -1)

In [31]:
M = (t1 + t2) / 2

In [None]:
def jensen_shannon_div(p1, p2):
    p_avg = (p1 + p2) / 2
    mask = (p_avg != 0).float()
    # set to 0 p when M is 0 (because mean can only be 0 is vectors weree, but
    # this is not the case due to numerical issues)
    M = Categorical(probs=p_avg)
    return ((kl_divergence(Categorical(probs=p1 * mask), M) +
             kl_divergence(Categorical(probs=p2 * mask), M)) / 2)

def yann_div(t1, t2):
    M = (t1 + t2) / 2
    return torch.min(kl_divergence(Categorical(probs=t1), Categorical(M)) + 
               kl_divergence(Categorical(probs=t2), Categorical(M)))

def csiszar_dist(t1, t2):
    M = (t1 + t2) / 2
    return ((kl_divergence(Categorical(M), Categorical(probs=t1)
                ) + kl_divergence(Categorical(M), Categorical(probs=t2)))/2)#**0.5

def total_var(t1, t2):
    return (t1 - t2).abs().sum(-1) / 2

def bhattacharyya_dist(t1, t2):
    return -torch.log((t1 * t2).sqrt().sum(-1))

def hellinger_dist(t1, t2):
    return (t1.sqrt() - t2.sqrt()).pow(2).sum(-1).sqrt() / (2**0.5)

In [None]:
import math
math.log(2)

In [None]:
for t1,t2 in [([0., 1], [1, 0.]), 
              ([0.5, 0.5], [0.4, 0.6]), 
              ([0.5, 0.5], [0.5, 0.5]), 
              ([0.4, 0.6], [0.3, 0.7]), 
              ([1-1e-50, 1e-50], [1e-50, 1-1e-50]), 
              ([0.1, 0.1, 0.8], [0.2, 0.2, 0.6]), 
              ([0.1, 0.1, 0.8], [0.6, 0.2, 0.2])]:
    print()
    print(t1, t2)
    print("yd", yann_div(torch.tensor(t1), torch.tensor(t2)).item())
    print("cd", csiszar_dist(torch.tensor(t1), torch.tensor(t2)).item())
    print("tv", total_var(torch.tensor(t1), torch.tensor(t2)).item())
    print("jsd", jensen_shannon_div(torch.tensor(t1), torch.tensor(t2)).item())
    print("bd", bhattacharyya_dist(torch.tensor(t1), torch.tensor(t2)).item())
    print("hd", hellinger_dist(torch.tensor(t1), torch.tensor(t2)).item())

In [None]:
t1.pow(2)

In [None]:
k.backward()

t1.grad

In [None]:
t2.grad