# Human Activity Recognition - SSL JOINT Consistency

Last Update : 31 July 2019

In [1]:
N_THREADS = 8
# Nota Bene : notebooks don't deallocate GPU memory
IS_FORCE_CPU = False # can also be set in the trainer

## Environment

In [2]:
cd ..

/conv


In [3]:
%autosave 600
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# CENTER PLOTS
from IPython.core.display import HTML
display(HTML(""" <style> .output_png {display: table-cell; text-align: center; margin:auto; }
.prompt display:none;}  </style>"""))

import os
if IS_FORCE_CPU:
    os.environ['CUDA_VISIBLE_DEVICES'] = ""

import sys
sys.path.append("notebooks")

import numpy as np
import matplotlib.pyplot as plt
from functools import partial
import pandas as pd
import h5py


import torch
torch.set_num_threads(N_THREADS)

Autosaving every 600 seconds


# Dataset

In [4]:
from skssl.transformers.neuralproc.datasplit import CntxtTrgtGetter, GetRandomIndcs, get_all_indcs
from utils.data.tsdata import get_timeseries_dataset, SparseMultiTimeSeriesDataset

get_cntxt_trgt_test = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.1, max_n_indcs=0.5),
                                     targets_getter=get_all_indcs,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt_feat = CntxtTrgtGetter(contexts_getter=get_all_indcs,
                                     targets_getter=get_all_indcs,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.99),
                                 targets_getter=GetRandomIndcs(min_n_indcs=0.5, max_n_indcs=0.99),
                                 is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

In [5]:
data_both = get_timeseries_dataset("har")(split="both")

def cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=False):
    def mycollate(batch):
        min_length = min([v.size(0) for b in batch for k,v in b[0].items() if "X" in k])
        # chose first min_legth of each (assumes that randomized)
        
        batch = [({k:v[:min_length, ...] for k,v in b[0].items()}, b[1]) for b in batch]        
        collated = torch.utils.data.dataloader.default_collate(batch)
        
        X = collated[0]["X"]
        y = collated[0]["y"]
        
        if is_repeat_batch:
            
            X = torch.cat([X,X], dim=0)
            y = torch.cat([y,y], dim=0)
            collated[1] = torch.cat([collated[1], collated[1]], dim=0) # targets
        
        collated[0]["X"], collated[0]["y"], collated[0]["X_trgt"], collated[0]["y_trgt"] = get_cntxt_trgt(X, y)
        
        return collated
    return mycollate

In [6]:
X_DIM = 1  # 1D spatial input (although actually 2 but the first is for sparse channels)
Y_DIM = data_both.data.shape[-1] # multiple channels
N_TARGETS = len(np.unique(data_both.targets))

sampling_percentages = [0.05, 0.1, 0.3, 0.5, 0.7, 1]
label_percentages = [N_TARGETS, N_TARGETS*2, 0.01, 0.05, 0.1, 0.3, 0.5, 1]

## Model

In [7]:
import torch.nn as nn
from skssl.transformers import GlobalNeuralProcess, NeuralProcessLoss, AttentiveNeuralProcess, NeuralProcessSSLLoss
from skssl.utils.helpers import rescale_range
from skssl.predefined import UnetCNN, CNN, MLP, SparseSetConv, SetConv, MlpRBF, GaussianRBF, BatchSparseSetConv
from skssl.transformers.neuralproc.datasplit import precomputed_cntxt_trgt_split
from utils.helpers import count_parameters

In [8]:
from copy import deepcopy

models = {}

min_std=5e-3

unet = partial(UnetCNN,
               Conv=torch.nn.Conv1d,
               Pool=torch.nn.MaxPool1d,
               upsample_mode="linear",
               n_layers=18,
               is_double_conv=True,
               is_depth_separable=True,
               Normalization=torch.nn.BatchNorm1d,
               is_chan_last=True,
               bottleneck=None,
               kernel_size=7,
               max_nchannels=256,
              is_force_same_bottleneck=True,
               _is_summary=True,
              )

kwargs = dict(x_dim=X_DIM, 
              y_dim=Y_DIM,
              min_std=min_std,
                n_tmp_queries=128,
                r_dim=64,
              keys_to_tmp_attn=partial(SetConv, RadialBasisFunc=GaussianRBF),
              TmpSelfAttn=unet,
              tmp_to_queries_attn=partial(SetConv, RadialBasisFunc=GaussianRBF),
              is_skip_tmp=False,
              is_use_x=False,
              get_cntxt_trgt=precomputed_cntxt_trgt_split,
              is_encode_xy=False,
             Classifier=partial(MLP, input_size=256+Y_DIM*4, output_size=N_TARGETS, 
                                dropout=0., hidden_size=128, n_hidden_layers=3, is_res=True))

models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)

kwargs_bis = deepcopy(kwargs)
kwargs_bis["Classifier"] = None

models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)

In [9]:
from utils.helpers import count_parameters
for k,v in models.items():
    print(k, "- N Param:", count_parameters(v()))

ssl_classifier_gnp_large_shared_bottleneck - N Param: 1078238
transformer_gnp_large_shared_bottleneck - N Param: 1006936


In [10]:
def load_pretrained_(models, sampling_percentages):
    # ALREADY INITALIZE TO BE ABLE TO LOAD
    models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)()

    kwargs_bis = deepcopy(kwargs)
    kwargs_bis["Classifier"] = None

    models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)()

    # load all transformers
    loaded_models = {}
    for sampling_perc in sampling_percentages:
        for k, m in models.items():
            if "transformer" not in k:
                continue

            out = train_models_({"{}%har".format(int(sampling_perc*100)): 
                                                (None, None)}, 
                                  {k :m },
                                   chckpnt_dirname=chckpnt_dirname_old,
                                seed=None,
                                   is_retrain=False)

            pretrained_model = out[list(out.keys())[0]].module_
            model_dict = models[k.replace("transformer", "ssl_classifier")].state_dict()
            model_dict.update(pretrained_model.state_dict())
            models[k.replace("transformer", "ssl_classifier")].load_state_dict(model_dict)

# Hyperparameter Optimisation


In [11]:
from ntbks_helpers import train_models_
from skorch.dataset import CVSplit
from utils.data.ssldata import get_train_dev_test_ssl
import random

N_EPOCHS = 100 
BATCH_SIZE = 32
IS_RETRAIN = False # if false load precomputed
chckpnt_dirname_old="results/challenge/har/"
chckpnt_dirname="results/challenge/har_new/"

from skssl.utils.helpers import HyperparameterInterpolator

n_steps_per_epoch = len(data_both)//BATCH_SIZE
get_lambda_clf=HyperparameterInterpolator(1, 10, N_EPOCHS*n_steps_per_epoch, mode="linear")

# No Augment

In [12]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=False)
            print(len(data_train))

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_noaug" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
7352

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug best epoch: 1 val_loss: 0.5792485174988745

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
7352

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug best epoch: 4 val_loss: 0.534082076773629

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
7352

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_n

In [13]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_noaug,1%,100%,3.0,0.870264,0.003135,0.866644,0.869359,0.872073,0.872073,0.872073


# All

In [14]:
from skorch.callbacks import Freezer, LRScheduler


data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)
            print(len(data_train))

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
14530

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 4 val_loss: 0.7066817718074813

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
14530

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 1 val_loss: 0.7923410251903502

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622
14530

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

har100%_lab1%_ru

In [15]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune,1%,100%,3.0,0.883497,0.008286,0.876824,0.87886,0.880896,0.886834,0.892772


## Without Neg Consistency

In [16]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):  
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nonegcons" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 1,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons best epoch: 1 val_loss: 0.5842806758175149

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons best epoch: 1 val_loss: 0.6183924950055038

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune

In [17]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_nonegcons,1%,100%,3.0,0.884968,0.00445,0.880896,0.882592,0.884289,0.887004,0.889718


Show number of steps for convergence becauseprobably no unsup is fine but not improving

# No Entropy

In [18]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_noent" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 1.,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent best epoch: 1 val_loss: 0.6706024456477223

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent best epoch: 1 val_loss: 0.6930868627258914

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_noent ---

har1

In [19]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_noent,1%,100%,3.0,0.881235,0.002562,0.878521,0.880048,0.881574,0.882592,0.88361


## No Unsup

In [22]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nounsup" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 0,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup best epoch: 1 val_loss: 0.6362027577476903

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup best epoch: 1 val_loss: 0.6330289104488853

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup

In [23]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_nounsup,1%,100%,3.0,0.871621,0.004632,0.866305,0.870037,0.87377,0.874279,0.874788


#  SSL Only

In [24]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc,
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_sslonly" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=True,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly best epoch: 1 val_loss: 0.6721114105882182

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly best epoch: 1 val_loss: 0.6721089704181528

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly

In [25]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_sslonly,1%,100%,3.0,0.876598,0.004706,0.871395,0.874618,0.877842,0.879199,0.880556


# Sup Only 

In [13]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc,
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = data_train.data[(data_train.targets!=-1).squeeze()]
            data_train.targets = data_train.targets[(data_train.targets!=-1).squeeze()]

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_suponly" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=True,
                                                    get_lambda_unsup=lambda: 0,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                               seed=None,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly best epoch: 2 val_loss: 0.37116682323149275

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly best epoch: 2 val_loss: 0.6973583848750674

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_suponl

In [14]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_suponly,1%,100%,3.0,0.904083,0.013515,0.892433,0.896675,0.900916,0.909908,0.918901


# Sup Only No Scaling

In [16]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc,
                                                              seed=random.randint(0,10000),
                                                              dev_size=0,
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = data_train.data[(data_train.targets!=-1).squeeze()]
            data_train.targets = data_train.targets[(data_train.targets!=-1).squeeze()]

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run):
                                                (data_train, data_test)}, 
                                  {k + "_finetune_sup_vanilla" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda : 1,
                                                    n_max_elements=None,
                                                    label_perc=None, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=True,
                                                    get_lambda_unsup=lambda: 0,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=True,
                                               seed=None,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla ---



HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m0.1423[0m       [32m0.9019[0m        [35m0.4632[0m     +  27.0568


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      2        [36m0.0234[0m       0.8789        0.6497        25.7552


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      3        [36m0.0150[0m       [32m0.9050[0m        0.5428     +  26.0485


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      4        0.0204       0.8918        0.5845        26.6478


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      5        [36m0.0135[0m       0.8873        0.7393        25.8290


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      6        0.0190       0.9019        0.7017        26.0549


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      7        [36m0.0134[0m       0.8999        0.5504        26.9620


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      8        [36m0.0126[0m       0.8789        0.8902        25.2579


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      9        [36m0.0086[0m       0.8731        0.7300        25.8022


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     10        0.0153       0.8775        0.6866        27.1475


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     11        0.0087       0.8656        0.9808        26.2136


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     12        0.0223       0.8534        0.7029        26.2065


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     13        0.0141       0.8629        0.7914        26.4802


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     14        [36m0.0085[0m       0.8799        0.7899        26.3274


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     15        0.0179       0.8897        0.7920        25.8998


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     16        [36m0.0041[0m       0.8887        0.7500        26.9822


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     17        0.0186       0.8802        0.6499        26.4571


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla best epoch: 1 val_loss: 0.46322141261598004

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla ---



HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m0.1631[0m       [32m0.9203[0m        [35m0.2856[0m     +  27.3947


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      2        [36m0.0199[0m       0.8941        0.5483        26.1763


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      3        0.0238       [32m0.9253[0m        0.4433     +  26.1705


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      4        0.0212       0.9104        0.5256        26.8947


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      5        0.0202       0.9138        0.5036        26.1654


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      6        0.0205       0.9030        0.5572        26.1184


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      7        [36m0.0094[0m       0.9077        0.4847        26.9226


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      8        0.0145       0.8880        0.7473        26.1455


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      9        0.0171       0.9026        0.6188        25.7778


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     10        0.0109       0.9050        0.5586        27.0420


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     11        0.0110       0.8999        0.6263        25.9539


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     12        [36m0.0043[0m       0.9050        0.8275        26.1571


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     13        0.0076       0.8836        0.8493        26.7187


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     14        0.0173       0.8890        0.6464        25.9694


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     15        0.0217       0.8941        0.7561        26.5339


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     16        0.0167       0.9026        0.7461        26.4009


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     17        0.0144       0.8846        0.7837        25.7361


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla best epoch: 1 val_loss: 0.28563037815035747

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Training har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla ---



HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m0.1328[0m       [32m0.8965[0m        [35m0.4885[0m     +  27.1879


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      2        [36m0.0368[0m       0.8877        0.5944        25.6638


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      3        [36m0.0232[0m       0.8911        0.5975        26.4048


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      4        [36m0.0184[0m       0.8616        1.0288        27.0574


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      5        0.0228       0.8775        0.6836        26.3406


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      6        [36m0.0159[0m       0.8734        0.7550        26.1929


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      7        0.0178       0.8694        0.6896        27.2681


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      8        [36m0.0158[0m       0.8853        0.5319        26.1816


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

      9        [36m0.0084[0m       0.8907        0.6725        26.1332


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     10        0.0169       0.8765        0.6900        25.6314


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     11        0.0109       0.8941        0.4913        25.9695


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     12        0.0151       0.8758        0.6700        26.0417


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     13        0.0265       0.8639        0.8402        26.9045


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     14        [36m0.0056[0m       0.8717        0.8625        26.2144


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

     15        0.0204       0.8775        0.5983        25.7190


HBox(children=(IntProgress(value=0, max=251), HTML(value='')))

Stopping since valid_acc has not improved in the last 15 epochs.
Re-initializing optimizer.
har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla best epoch: 1 val_loss: 0.4885016896986027


In [17]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_sup_vanilla,1%,100%,3.0,0.908947,0.014823,0.896505,0.900747,0.904988,0.915168,0.925348


# No Lambda CLF

In [28]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nolambda" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: 1,
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: .5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda best epoch: 4 val_loss: 0.7258332047496442

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda best epoch: 2 val_loss: 0.8610166228339354

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_finetune_nol

In [29]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_nolambda,1%,100%,3.0,0.88542,0.0118,0.872073,0.880896,0.889718,0.892094,0.894469


## Without Label Scaling

In [30]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:
            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_nolabscale" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=None, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale best epoch: 1 val_loss: 1.3277767340450073

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale best epoch: 2 val_loss: 1.1983000453059698

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_fine

In [31]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_nolabscale,1%,100%,3.0,0.893677,0.012967,0.881574,0.886834,0.892094,0.899729,0.907363


## No Element Scaling

In [32]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:

            load_pretrained_(models, [sampling_perc])

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "_finetune_noelemscale" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=None,
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale best epoch: 1 val_loss: 0.5697207075480812

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale best epoch: 2 val_loss: 0.7624972491981142

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck_

In [33]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck_finetune_noelemscale,1%,100%,3.0,0.882366,0.003628,0.878181,0.881235,0.884289,0.884459,0.884628


## No Finetuning

In [34]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}
models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)

for run in range(3):
    for sampling_perc in [1]:
        for label_perc in [0.01]:

            get_lambda_clf=HyperparameterInterpolator(1, 50, N_EPOCHS*n_steps_per_epoch, mode="linear")

            data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                              n_labels=label_perc, 
                                                              data_perc=sampling_perc, 
                                                              dev_size=0,
                                                              seed=random.randint(0,10000),
                                                              is_augment=True)

            # add test as unlabeled data
            data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
            data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
            data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)

            data_trainers.update(train_models_({"har{}%_lab{}%_run{}".format(int(sampling_perc*100), int(label_perc*100), run): 
                                                (data_train, data_test)}, 
                                  {k + "" :m for k,m in models.items() if "ssl_classifier" in k}, 
                                  criterion=partial(NeuralProcessSSLLoss, 
                                                    get_lambda_sup=lambda: get_lambda_clf(True),
                                                    n_max_elements=int(128*sampling_perc),
                                                    label_perc=(label_perc * data_train.n_train)/data_train.n_total, # label perc is lower ebcause cocnat to test
                                                    min_sigma=min_std,
                                                    is_unsup_forall=False,
                                                    is_ssl_only=False,
                                                    get_lambda_unsup=lambda: 1,
                                                     get_lambda_ent=lambda: 0.5,  # both do something similar
                                                     get_lambda_neg_cons=lambda: 0.5,
                                                    ),
                                    patience=15,
                                  chckpnt_dirname=chckpnt_dirname,
                                  max_epochs=N_EPOCHS,
                                  batch_size=BATCH_SIZE,
                                  is_retrain=IS_RETRAIN,
                                    is_monitor_acc=True,
                                  callbacks=[],
                                  iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=True),  
                                  iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                              ))



--- Loading har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab1%_run0/ssl_classifier_gnp_large_shared_bottleneck best epoch: 1 val_loss: 0.8911615639651959

--- Loading har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab1%_run1/ssl_classifier_gnp_large_shared_bottleneck best epoch: 2 val_loss: 0.5388232102667948

--- Loading har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck ---

har100%_lab1%_run2/ssl_classifier_gnp_large_shared_bottleneck best epoch: 1 val_loss: 0.8988694104252315


In [35]:
import pandas as pd

out = pd.Series({k:v.history[-1]["valid_acc"] for k,v in data_trainers.items()}).reset_index(name="accuracy")
splitted = out["index"].str.split("/", expand = True)
out["meta"] = splitted[0]
out["models"] = splitted[1]

splitted2 = out["meta"].str.split("_run", expand = True)
out["meta"] = splitted2[0]
out["run"] = splitted2[1]

splitted3 = out["meta"].str.split("_lab", expand = True)
out["data sample"] = splitted3[0].str.split("har", expand = True)[1]
out["lab"] = splitted3[1]


out.drop(columns =["index"], inplace = True) 

out.groupby(["models", "lab", "data sample"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
models,lab,data sample,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ssl_classifier_gnp_large_shared_bottleneck,1%,100%,3.0,0.862007,0.017089,0.850017,0.852223,0.854428,0.868001,0.881574
