# Human Activity Recognition - SSL JOINT Consistency

Last Update : 24 July 2019

In [1]:
N_THREADS = 8
# Nota Bene : notebooks don't deallocate GPU memory
IS_FORCE_CPU = False # can also be set in the trainer

## Environment

In [2]:
cd ..

/conv


In [3]:
%autosave 600
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# CENTER PLOTS
from IPython.core.display import HTML
display(HTML(""" <style> .output_png {display: table-cell; text-align: center; margin:auto; }
.prompt display:none;}  </style>"""))

import os
if IS_FORCE_CPU:
    os.environ['CUDA_VISIBLE_DEVICES'] = ""

import sys
sys.path.append("notebooks")

import numpy as np
import matplotlib.pyplot as plt
from functools import partial
import pandas as pd
import h5py


import torch
torch.set_num_threads(N_THREADS)

Autosaving every 600 seconds


# Dataset

In [4]:
from skssl.transformers.neuralproc.datasplit import CntxtTrgtGetter, GetRandomIndcs, get_all_indcs
from utils.data.tsdata import get_timeseries_dataset, SparseMultiTimeSeriesDataset

get_cntxt_trgt_test = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.1, max_n_indcs=0.5),
                                     targets_getter=get_all_indcs,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt_feat = CntxtTrgtGetter(contexts_getter=get_all_indcs,
                                     targets_getter=get_all_indcs,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                 targets_getter=GetRandomIndcs(min_n_indcs=0.5, max_n_indcs=0.99),
                                 is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

In [5]:
data_both = get_timeseries_dataset("har")(split="both")

def cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=False):
    def mycollate(batch):
        min_length = min([v.size(0) for b in batch for k,v in b[0].items() if "X" in k])
        # chose first min_legth of each (assumes that randomized)
        
        batch = [({k:v[:min_length, ...] for k,v in b[0].items()}, b[1]) for b in batch]        
        collated = torch.utils.data.dataloader.default_collate(batch)
        
        X = collated[0]["X"]
        y = collated[0]["y"]
        
        if is_repeat_batch:
            
            X = torch.cat([X,X], dim=0)
            y = torch.cat([y,y], dim=0)
            collated[1] = torch.cat([collated[1], collated[1]], dim=0) # targets
        
        collated[0]["X"], collated[0]["y"], collated[0]["X_trgt"], collated[0]["y_trgt"] = get_cntxt_trgt(X, y)
        
        return collated
    return mycollate

In [6]:
X_DIM = 1  # 1D spatial input (although actually 2 but the first is for sparse channels)
Y_DIM = data_both.data.shape[-1] # multiple channels
N_TARGETS = len(np.unique(data_both.targets))

sampling_percentages = [0.05, 0.1, 0.3, 0.5, 0.7, 1]
label_percentages = [N_TARGETS, N_TARGETS*2, 0.01, 0.05, 0.1, 0.3, 0.5, 1]

## Model

In [7]:
import torch.nn as nn
from skssl.transformers import GlobalNeuralProcess, NeuralProcessLoss, AttentiveNeuralProcess
from skssl.utils.helpers import rescale_range
from skssl.predefined import UnetCNN, CNN, MLP, SparseSetConv, SetConv, MlpRBF, GaussianRBF, BatchSparseSetConv
from skssl.transformers.neuralproc.datasplit import precomputed_cntxt_trgt_split
from utils.helpers import count_parameters

In [8]:
from copy import deepcopy

models = {}

unet = partial(UnetCNN,
               Conv=torch.nn.Conv1d,
               Pool=torch.nn.MaxPool1d,
               upsample_mode="linear",
               n_layers=18,
               is_double_conv=True,
               is_depth_separable=True,
               Normalization=torch.nn.BatchNorm1d,
               is_chan_last=True,
               bottleneck=None,
               kernel_size=7,
               max_nchannels=256,
              is_force_same_bottleneck=True,
               _is_summary=True,
              )

kwargs = dict(x_dim=X_DIM, 
              y_dim=Y_DIM,
              min_std=5e-3,
                n_tmp_queries=128,
                r_dim=64,
              keys_to_tmp_attn=partial(SetConv, RadialBasisFunc=GaussianRBF),
              TmpSelfAttn=unet,
              tmp_to_queries_attn=partial(SetConv, RadialBasisFunc=GaussianRBF),
              is_skip_tmp=False,
              is_use_x=False,
              get_cntxt_trgt=precomputed_cntxt_trgt_split,
              is_encode_xy=False,
             Classifier=partial(MLP, input_size=256+Y_DIM*4, output_size=N_TARGETS, 
                                dropout=0.5, hidden_size=128, n_hidden_layers=3, is_res=True))

models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)

kwargs_bis = deepcopy(kwargs)
kwargs_bis["Classifier"] = None

models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)

In [9]:
from utils.helpers import count_parameters
for k,v in models.items():
    print(k, "- N Param:", count_parameters(v()))

ssl_classifier_gnp_large_shared_bottleneck - N Param: 1078238
transformer_gnp_large_shared_bottleneck - N Param: 1006936


In [10]:
def load_pretrained_(models, sampling_percentages):
    # ALREADY INITALIZE TO BE ABLE TO LOAD
    models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)()

    kwargs_bis = deepcopy(kwargs)
    kwargs_bis["Classifier"] = None

    models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)()

    # load all transformers
    loaded_models = {}
    for sampling_perc in sampling_percentages:
        for k, m in models.items():
            if "transformer" not in k:
                continue

            out = train_models_({"{}%har".format(int(sampling_perc*100)): 
                                                (None, None)}, 
                                  {k :m },
                                   chckpnt_dirname=chckpnt_dirname,
                                   is_retrain=False)

            pretrained_model = out[list(out.keys())[0]].module_
            model_dict = models[k.replace("transformer", "ssl_classifier")].state_dict()
            model_dict.update(pretrained_model.state_dict())
            models[k.replace("transformer", "ssl_classifier")].load_state_dict(model_dict)

# Hyperparameter Optimisation


In [11]:
from ntbks_helpers import train_models_
from skorch.dataset import CVSplit
from utils.data.ssldata import get_train_dev_test_ssl

N_EPOCHS = 100 
BATCH_SIZE = 32
IS_RETRAIN = False # if false load precomputed
chckpnt_dirname="results/challenge/har/"

from skssl.utils.helpers import HyperparameterInterpolator

n_steps_per_epoch = len(data_both)//BATCH_SIZE
get_lambda_clf=HyperparameterInterpolator(1e-5, 10, N_EPOCHS*n_steps_per_epoch, 
                              start_step=n_steps_per_epoch*10, mode="linear")

## All

In [12]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [1]:
    for label_perc in [0.1]:
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                n_max_elements=int(128*sampling_perc),
                                                label_perc=(label_perc * data_train.n_train)/data_train.n_total # label perc is lower ebcause cocnat to test
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))



--- Loading 100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck ---

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck best epoch: 21 val_loss: 21.35151945585957


In [13]:
for k,t in data_trainers.items():
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break
            


100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck epoch: 21 val_loss: 21.35151945585957 val_acc: 0.9419748897183576


## Without Neg Consistency

In [14]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [1]:
    for label_perc in [.1]:
        load_pretrained_(models, [sampling_perc])
        
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_finetune_no_negcons" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                is_neg_consistency=False,
                                                n_max_elements=int(128*sampling_perc),
                                                label_perc=(label_perc * data_train.n_train)/data_train.n_total
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading 100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_no_negcons ---

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_no_negcons best epoch: 34 val_loss: 14.35759756789193


In [15]:
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_no_negcons epoch: 34 val_loss: 14.35759756789193 val_acc: 0.9677638276213099


## Supervised

In [16]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [1]:
    for label_perc in [.1]:
        load_pretrained_(models, [sampling_perc])
        
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_finetune_sup" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="supervised",
                                                distance="jsd",
                                                n_max_elements=int(128*sampling_perc),
                                                label_perc=(label_perc * data_train.n_train)/data_train.n_total
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading 100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup ---

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup best epoch: 47 val_loss: 13.080543520655274


In [17]:
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 47 val_loss: 13.080543520655274 val_acc: 0.9738717339667459


## Without N Max Elements

In [18]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [1]:
    for label_perc in [.1]:
        load_pretrained_(models, [sampling_perc])
        
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_finetune_no_cntxt_scaling" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                n_max_elements=None,
                                                label_perc=(label_perc * data_train.n_train)/data_train.n_total
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading 100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_no_cntxt_scaling ---

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_no_cntxt_scaling best epoch: 10 val_loss: 20.53134829414712


In [19]:
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_no_cntxt_scaling epoch: 10 val_loss: 20.53134829414712 val_acc: 0.9317950458092976


## Flat Param

In [20]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [1]:
    for label_perc in [0.1]:
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_finetune_flat" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                is_neg_consistency=True,
                                                n_max_elements=int(128*sampling_perc),
                                                 get_lambda_unsup=lambda: 1,
                                                 get_lambda_sup=lambda: 1,
                                                 get_lambda_neg_cons=lambda: 1,
                                                label_perc=(label_perc * data_train.n_train)/data_train.n_total # label perc is lower ebcause cocnat to test
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))



--- Loading 100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_flat ---

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_flat best epoch: 9 val_loss: 22.130947111742675


In [21]:
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_flat epoch: 9 val_loss: 22.130947111742675 val_acc: 0.9423142178486597


## Finetuning
Note that no get_lambda_clf

In [22]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [1]:
    for label_perc in [.1]:
        load_pretrained_(models, [sampling_perc])
        
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_finetune" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                n_max_elements=int(128*sampling_perc),
                                                label_perc=(label_perc * data_train.n_train)/data_train.n_total
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))



--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622

--- Loading 100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune ---

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune best epoch: 40 val_loss: 15.338547741877333


In [23]:
#0.9644 with both supervised and unsupervised with finetuning

for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

100%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune epoch: 40 val_loss: 15.338547741877333 val_acc: 0.9704784526637258


# Evaluation Sampling Percentage

In [24]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in sampling_percentages:
    for label_perc in [1]:
        load_pretrained_(models, [sampling_perc])
        
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_finetune_sup" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="supervised",
                                                n_max_elements=int(128*sampling_perc),
                                                label_perc=(label_perc * data_train.n_train)/data_train.n_total
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))



--- Loading 5%har/transformer_gnp_large_shared_bottleneck ---

5%har/transformer_gnp_large_shared_bottleneck best epoch: 98 val_loss: -2.164531707763672

--- Loading 5%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup ---

5%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup best epoch: 30 val_loss: 74.09746773622382

--- Loading 10%har/transformer_gnp_large_shared_bottleneck ---

10%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -2.98053588682008

--- Loading 10%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup ---

10%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup best epoch: 22 val_loss: 59.960932497659456

--- Loading 30%har/transformer_gnp_large_shared_bottleneck ---

30%har/transformer_gnp_large_shared_bottleneck best epoch: 87 val_loss: -4.5797279249117215

--- Loading 30%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup ---

30%har_100%lab/ssl_classifier_gnp_large_

In [25]:
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

5%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 30 val_loss: 74.09746773622382 val_acc: 0.7404139803189684
10%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 22 val_loss: 59.960932497659456 val_acc: 0.8282999660671869
30%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 49 val_loss: 15.669058653148992 val_acc: 0.9501187648456056
50%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 58 val_loss: 14.07143764282268 val_acc: 0.9609772650152698
70%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 42 val_loss: 11.97455449099455 val_acc: 0.9691211401425178
100%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 13 val_loss: 11.608399493758219 val_acc: 0.9711571089243298


# Evaluation Label Percentage

In [26]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [0.5]:
    for label_perc in label_percentages:
        is_retrain = IS_RETRAIN
        if label_perc == 1: #already computed previous cell
            is_retrain = False
        
        load_pretrained_(models, [sampling_perc])
        
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_finetune_sup" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                n_max_elements=int(128*sampling_perc),
                                                label_perc=(label_perc * data_train.n_train)/data_train.n_total
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))



--- Loading 50%har/transformer_gnp_large_shared_bottleneck ---

50%har/transformer_gnp_large_shared_bottleneck best epoch: 72 val_loss: -5.531349883959131

--- Loading 50%har_600%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup ---

50%har_600%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup best epoch: 59 val_loss: 132.5539856855045

--- Loading 50%har/transformer_gnp_large_shared_bottleneck ---

50%har/transformer_gnp_large_shared_bottleneck best epoch: 72 val_loss: -5.531349883959131

--- Loading 50%har_1200%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup ---

50%har_1200%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup best epoch: 43 val_loss: 121.13525029809587

--- Loading 50%har/transformer_gnp_large_shared_bottleneck ---

50%har/transformer_gnp_large_shared_bottleneck best epoch: 72 val_loss: -5.531349883959131

--- Loading 50%har_1%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup ---

50%har_1%lab/ssl_classifier_gnp_larg

In [27]:
# if bad has to try freezing again and smaller params
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

50%har_600%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 59 val_loss: 132.5539856855045 val_acc: 0.4842212419409569
50%har_1200%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 43 val_loss: 121.13525029809587 val_acc: 0.496776382762131
50%har_1%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 18 val_loss: 57.958797629582584 val_acc: 0.848320325755005
50%har_5%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 6 val_loss: 41.30282708107515 val_acc: 0.9053274516457415
50%har_10%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 13 val_loss: 19.129878084579484 val_acc: 0.9389209365456397
50%har_30%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 18 val_loss: 19.8407337821699 val_acc: 0.9453681710213777
50%har_50%lab/ssl_classifier_gnp_large_shared_bottleneck_finetune_sup epoch: 44 val_loss: 15.392488726203952 val_acc: 0.9548693586698337
50%har_100%lab/ssl_classifier_gnp_large_shared_

* 0.9304 best without n max elements
* 0.9277: jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies
* 0.9857 : jsd | 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze
* 0.9623 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.5] | linear interpolator (1,5)


* 0.9671 : jsd | no scale ? | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.1,0.5]
* 0.9365 : jsd | no scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.01,0.5] | linear interpolator
* 0.9824 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.5] | linear interpolator


* 0.9844 : jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze
* 0.9817 : jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | cntxt [0.01,0.5]


* 0.9627 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.5]
* 0.9572 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.9] | linear interpolator


* 0.9321: jsd | no 0.1 scale | n_max_elements | 100 sampels
* 0.9365: jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.1 entropies


* 0.9450 : jsd | 0.2 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.01,0.5]
* 0.9315 : jsd | no scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.01,0.5]

In [28]:
# load all transformers
loaded_models = {}
for sampling_perc in sampling_percentages:
    for k, m in models.items():
        if "transformer" not in k:
            continue
            
        out = train_models_({"{}%har".format(int(sampling_perc*100)): 
                                            (None, None)}, 
                              {k :m },
                               chckpnt_dirname=chckpnt_dirname,
                               is_retrain=False)
        
        pretrained_model = out[list(out.keys())[0]].module_
        model_dict = models[k.replace("transformer", "ssl_classifier")].state_dict()
        model_dict.update(pretrained_model.state_dict())
        models[k.replace("transformer", "ssl_classifier")].load_state_dict(model_dict)
        


--- Loading 5%har/transformer_gnp_large_shared_bottleneck ---

5%har/transformer_gnp_large_shared_bottleneck best epoch: 98 val_loss: -2.164531707763672

--- Loading 10%har/transformer_gnp_large_shared_bottleneck ---

10%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -2.98053588682008

--- Loading 30%har/transformer_gnp_large_shared_bottleneck ---

30%har/transformer_gnp_large_shared_bottleneck best epoch: 87 val_loss: -4.5797279249117215

--- Loading 50%har/transformer_gnp_large_shared_bottleneck ---

50%har/transformer_gnp_large_shared_bottleneck best epoch: 72 val_loss: -5.531349883959131

--- Loading 70%har/transformer_gnp_large_shared_bottleneck ---

70%har/transformer_gnp_large_shared_bottleneck best epoch: 98 val_loss: -7.103462460897501

--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622


In [29]:
from torch.distributions import Normal, Categorical, kl_divergence

In [30]:
t1 = torch.tensor([0.2, 0.8], requires_grad=True)
t2 = torch.tensor([0.7, 0.3], requires_grad=True)
#torch.softmax(t2, -1)
#torch.softmax(t1, -1)

In [31]:
M = (t1 + t2) / 2

In [None]:
def jensen_shannon_div(p1, p2):
    p_avg = (p1 + p2) / 2
    mask = (p_avg != 0).float()
    # set to 0 p when M is 0 (because mean can only be 0 is vectors weree, but
    # this is not the case due to numerical issues)
    M = Categorical(probs=p_avg)
    return ((kl_divergence(Categorical(probs=p1 * mask), M) +
             kl_divergence(Categorical(probs=p2 * mask), M)) / 2)

def yann_div(t1, t2):
    M = (t1 + t2) / 2
    return torch.min(kl_divergence(Categorical(probs=t1), Categorical(M)) + 
               kl_divergence(Categorical(probs=t2), Categorical(M)))

def csiszar_dist(t1, t2):
    M = (t1 + t2) / 2
    return ((kl_divergence(Categorical(M), Categorical(probs=t1)
                ) + kl_divergence(Categorical(M), Categorical(probs=t2)))/2)#**0.5

def total_var(t1, t2):
    return (t1 - t2).abs().sum(-1) / 2

def bhattacharyya_dist(t1, t2):
    return -torch.log((t1 * t2).sqrt().sum(-1))

def hellinger_dist(t1, t2):
    return (t1.sqrt() - t2.sqrt()).pow(2).sum(-1).sqrt() / (2**0.5)

In [None]:
import math
math.log(2)

In [None]:
for t1,t2 in [([0., 1], [1, 0.]), 
              ([0.5, 0.5], [0.4, 0.6]), 
              ([0.5, 0.5], [0.5, 0.5]), 
              ([0.4, 0.6], [0.3, 0.7]), 
              ([1-1e-50, 1e-50], [1e-50, 1-1e-50]), 
              ([0.1, 0.1, 0.8], [0.2, 0.2, 0.6]), 
              ([0.1, 0.1, 0.8], [0.6, 0.2, 0.2])]:
    print()
    print(t1, t2)
    print("yd", yann_div(torch.tensor(t1), torch.tensor(t2)).item())
    print("cd", csiszar_dist(torch.tensor(t1), torch.tensor(t2)).item())
    print("tv", total_var(torch.tensor(t1), torch.tensor(t2)).item())
    print("jsd", jensen_shannon_div(torch.tensor(t1), torch.tensor(t2)).item())
    print("bd", bhattacharyya_dist(torch.tensor(t1), torch.tensor(t2)).item())
    print("hd", hellinger_dist(torch.tensor(t1), torch.tensor(t2)).item())

In [None]:
t1.pow(2)

In [None]:
k.backward()

t1.grad

In [None]:
t2.grad