# Human Activity Recognition - SSL JOINT Consistency

Last Update : 24 July 2019

In [1]:
N_THREADS = 8
# Nota Bene : notebooks don't deallocate GPU memory
IS_FORCE_CPU = False # can also be set in the trainer

## Environment

In [2]:
cd ..

/master


In [3]:
%autosave 600
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# CENTER PLOTS
from IPython.core.display import HTML
display(HTML(""" <style> .output_png {display: table-cell; text-align: center; margin:auto; }
.prompt display:none;}  </style>"""))

import os
if IS_FORCE_CPU:
    os.environ['CUDA_VISIBLE_DEVICES'] = ""

import sys
sys.path.append("notebooks")

import numpy as np
import matplotlib.pyplot as plt
from functools import partial
import pandas as pd
import h5py


import torch
torch.set_num_threads(N_THREADS)

Autosaving every 600 seconds


# Dataset

In [4]:
from skssl.transformers.neuralproc.datasplit import CntxtTrgtGetter, GetRandomIndcs, get_all_indcs
from utils.data.tsdata import get_timeseries_dataset, SparseMultiTimeSeriesDataset

get_cntxt_trgt_test = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.1, max_n_indcs=0.5),
                                     targets_getter=get_all_indcs,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt_feat = CntxtTrgtGetter(contexts_getter=get_all_indcs,
                                     targets_getter=get_all_indcs,
                                     is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                 targets_getter=GetRandomIndcs(min_n_indcs=0.5, max_n_indcs=0.99),
                                 is_add_cntxts_to_trgts=False)  # don't context points to tagrtes

In [5]:
data_both = get_timeseries_dataset("har")(split="both")

def cntxt_trgt_collate(get_cntxt_trgt, is_repeat_batch=False):
    def mycollate(batch):
        min_length = min([v.size(0) for b in batch for k,v in b[0].items() if "X" in k])
        # chose first min_legth of each (assumes that randomized)
        
        batch = [({k:v[:min_length, ...] for k,v in b[0].items()}, b[1]) for b in batch]        
        collated = torch.utils.data.dataloader.default_collate(batch)
        
        X = collated[0]["X"]
        y = collated[0]["y"]
        
        if is_repeat_batch:
            
            X = torch.cat([X,X], dim=0)
            y = torch.cat([y,y], dim=0)
            collated[1] = torch.cat([collated[1], collated[1]], dim=0) # targets
        
        collated[0]["X"], collated[0]["y"], collated[0]["X_trgt"], collated[0]["y_trgt"] = get_cntxt_trgt(X, y)
        
        return collated
    return mycollate

In [6]:
X_DIM = 1  # 1D spatial input (although actually 2 but the first is for sparse channels)
Y_DIM = data_both.data.shape[-1] # multiple channels
N_TARGETS = len(np.unique(data_both.targets))

sampling_percentages = [0.05, 0.1, 0.3, 0.5, 0.7, 1]
label_percentages = [N_TARGETS, N_TARGETS*2, 0.01, 0.05, 0.1, 0.3, 0.5, 1]

## Model

In [7]:
import torch.nn as nn
from skssl.transformers import GlobalNeuralProcess, NeuralProcessLoss, AttentiveNeuralProcess
from skssl.utils.helpers import rescale_range
from skssl.predefined import UnetCNN, CNN, MLP, SparseSetConv, SetConv, MlpRBF, GaussianRBF, BatchSparseSetConv
from skssl.transformers.neuralproc.datasplit import precomputed_cntxt_trgt_split
from utils.helpers import count_parameters

In [8]:
from copy import deepcopy

models = {}

unet = partial(UnetCNN,
               Conv=torch.nn.Conv1d,
               Pool=torch.nn.MaxPool1d,
               upsample_mode="linear",
               n_layers=18,
               is_double_conv=True,
               is_depth_separable=True,
               Normalization=torch.nn.BatchNorm1d,
               is_chan_last=True,
               bottleneck=None,
               kernel_size=7,
               max_nchannels=256,
              is_force_same_bottleneck=True,
               _is_summary=True,
              )

kwargs = dict(x_dim=X_DIM, 
              y_dim=Y_DIM,
              min_std=5e-3,
                n_tmp_queries=128,
                r_dim=64,
              keys_to_tmp_attn=partial(SetConv, RadialBasisFunc=GaussianRBF),
              TmpSelfAttn=unet,
              tmp_to_queries_attn=partial(SetConv, RadialBasisFunc=GaussianRBF),
              is_skip_tmp=False,
              is_use_x=False,
              get_cntxt_trgt=precomputed_cntxt_trgt_split,
              is_encode_xy=False,
             Classifier=partial(MLP, input_size=256+Y_DIM*4, output_size=N_TARGETS, 
                                dropout=0.5, hidden_size=128, n_hidden_layers=3, is_res=True))

models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)

kwargs_bis = deepcopy(kwargs)
kwargs_bis["Classifier"] = None

models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)

In [9]:
from utils.helpers import count_parameters
for k,v in models.items():
    print(k, "- N Param:", count_parameters(v()))

ssl_classifier_gnp_large_shared_bottleneck - N Param: 1078238
transformer_gnp_large_shared_bottleneck - N Param: 1006936


# Hyperparameter Optimisation


In [10]:
from ntbks_helpers import train_models_
from skorch.dataset import CVSplit
from utils.data.ssldata import get_train_dev_test_ssl

N_EPOCHS = 100 
BATCH_SIZE = 32
IS_RETRAIN = True # if false load precomputed
chckpnt_dirname="results/challenge/har/"

from skssl.utils.helpers import HyperparameterInterpolator

n_steps_per_epoch = len(data_both)//BATCH_SIZE
get_lambda_clf=HyperparameterInterpolator(1e-5, 10, N_EPOCHS*n_steps_per_epoch, 
                              start_step=n_steps_per_epoch*10, mode="linear")

## All

In [None]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [0.5]:
    for label_perc in [1]:
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                get_lambda_clf=lambda: get_lambda_clf(True),
                                                n_max_elements=int(128*sampling_perc),
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))



--- Training 50%har_100%lab/ssl_classifier_gnp_large_shared_bottleneck ---



HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m7.9444[0m       [32m0.6736[0m        [35m0.6922[0m     +  21.1911


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

      2        [36m4.1732[0m       [32m0.6858[0m        [35m0.6379[0m     +  21.1447


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

      3        [36m2.5341[0m       [32m0.7262[0m        [35m0.5816[0m     +  21.0974


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

      4        [36m2.0458[0m       0.6776        0.6560        21.3549


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

      5        [36m1.6721[0m       [32m0.7408[0m        0.6184        21.2895


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

      6        [36m1.3085[0m       0.6756        0.9155        20.7325


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

      7        [36m1.2530[0m       [32m0.7842[0m        0.6010        20.2866


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

      8        [36m0.8664[0m       0.7621        0.6917        21.3025


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

      9        [36m0.6747[0m       0.7455        0.7645        21.3654


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     10        [36m0.4369[0m       [32m0.8785[0m        [35m0.4268[0m     +  21.3988


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     11        [36m0.3739[0m       0.8185        0.4724        21.5946


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     12        [36m0.0097[0m       0.7472        0.6584        21.5670


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     13       [36m-0.0173[0m       0.8653        [35m0.3569[0m     +  21.4773


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     14       [36m-0.0842[0m       [32m0.8975[0m        [35m0.3147[0m     +  21.4850


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     15       [36m-0.2122[0m       0.8860        [35m0.2988[0m     +  21.4199


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     16       [36m-0.4549[0m       0.8188        0.4709        21.5196


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     17       -0.3409       [32m0.9043[0m        [35m0.2766[0m     +  21.4568


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     18       [36m-0.8202[0m       [32m0.9128[0m        [35m0.2273[0m     +  21.4954


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     19       [36m-1.1089[0m       0.8741        0.3188        21.4360


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     20       [36m-1.2847[0m       [32m0.9206[0m        [35m0.2208[0m     +  21.6017


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     21       [36m-1.3895[0m       [32m0.9260[0m        [35m0.2002[0m     +  20.5840


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     22       [36m-1.4829[0m       0.8914        0.3282        21.5256


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     23       [36m-1.6173[0m       0.9203        0.2246        21.7182


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     24       -1.6163       [32m0.9460[0m        [35m0.1531[0m     +  21.4484


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     25       [36m-1.7375[0m       0.9406        0.1668        21.5160


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     26       [36m-1.8691[0m       0.9345        0.1657        21.4642


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     27       [36m-2.0489[0m       0.9291        0.1668        21.5087


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     28       [36m-2.0812[0m       0.9247        0.2318        21.4466


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     29       [36m-2.1203[0m       0.9342        0.2202        21.5094


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

     30       [36m-2.2372[0m       0.9433        0.1544        21.8175


HBox(children=(IntProgress(value=0, max=346), HTML(value='')))

In [None]:
for k,t in data_trainers.items():
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

## Without Entropies

In [None]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [0.5]:
    for label_perc in [1]:
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_no_entropy" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                is_entropies=False,
                                                get_lambda_clf=lambda: get_lambda_clf(True),
                                                n_max_elements=int(128*sampling_perc),
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))


In [None]:
for k,t in data_trainers.items(): 
    k += "_no_entropy"
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

## Without Consistency

In [None]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [0.5]:
    for label_perc in [1]:
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_no_consist" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                get_lambda_clf=lambda: get_lambda_clf(True),
                                                n_max_elements=int(128*sampling_perc),
                                                is_consistency=False,
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))


In [None]:
for k,t in data_trainers.items(): 
    k += "_no_consist"
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

## Without N Max Elements

In [None]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [0.5]:
    for label_perc in [1]:
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_no_cntxt_scaling" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                get_lambda_clf=lambda: get_lambda_clf(True),
                                                n_max_elements=None,
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))


In [None]:
for k,t in data_trainers.items(): 
    k += "_no_cntxt_scaling"
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

## Without Lambda Classifier

In [None]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [0.5]:
    for label_perc in [1]:
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_no_lambda_clf" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                n_max_elements=int(128*sampling_perc),
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))


In [None]:
for k,t in data_trainers.items(): 
    k += "_no_lambda_clf"
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

## Finetuning
Note that no get_lambda_clf

In [None]:
# ALREADY INITALIZE TO BE ABLE TO LOAD
models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)()

kwargs_bis = deepcopy(kwargs)
kwargs_bis["Classifier"] = None

models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)()

In [None]:
# load all transformers
loaded_models = {}
for sampling_perc in sampling_percentages:
    for k, m in models.items():
        if "transformer" not in k:
            continue
            
        out = train_models_({"{}%har".format(int(sampling_perc*100)): 
                                            (None, None)}, 
                              {k :m },
                               chckpnt_dirname=chckpnt_dirname,
                               is_retrain=False)
        
        pretrained_model = out[list(out.keys())[0]].module_
        model_dict = models[k.replace("transformer", "ssl_classifier")].state_dict()
        model_dict.update(pretrained_model.state_dict())
        models[k.replace("transformer", "ssl_classifier")].load_state_dict(model_dict)

In [None]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [0.5]:
    for label_perc in [1]:
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k+"_finetune" :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                n_max_elements=int(128*sampling_perc),
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=IS_RETRAIN,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))


In [None]:
for k,t in data_trainers.items(): 
    k += "_finetune"
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

# Evaluation Sampling Percentage

In [None]:
# GO BACK TO NO FINETUNING
models["ssl_classifier_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs)

kwargs_bis = deepcopy(kwargs)
kwargs_bis["Classifier"] = None

models["transformer_gnp_large_shared_bottleneck"] = partial(GlobalNeuralProcess, **kwargs_bis)

In [None]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in sampling_percentages:
    for label_perc in [1]:
        is_retrain = False if sampling_perc == 0.5 and label_perc == 1 else IS_RETRAIN # already computed before
        
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                get_lambda_clf=lambda: get_lambda_clf(True),
                                                n_max_elements=int(128*sampling_perc),
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=is_retrain,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))


In [None]:
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

# Evaluation Label Percentage

In [None]:
from skorch.callbacks import Freezer, LRScheduler

data_trainers = {}

for sampling_perc in [0.5]:
    for label_perc in label_percentages:
        is_retrain = False if sampling_perc == 0.5 and label_perc == 1 else IS_RETRAIN # already computed before
        
        data_train, _, data_test = get_train_dev_test_ssl("har", 
                                                          n_labels=label_perc, 
                                                          data_perc=sampling_perc, 
                                                          dev_size=0)
        
        # add test as unlabeled data
        data_train.data = np.concatenate([data_train.data, data_test.data], axis=0)
        data_train.targets = np.concatenate([data_train.targets, -1*np.ones_like(data_test.targets)], axis=0)
        data_train.indcs = np.concatenate([data_train.indcs, data_test.indcs], axis=0)
        
        data_trainers.update(train_models_({"{}%har_{}%lab".format(int(sampling_perc*100), int(label_perc*100)): 
                                            (data_train, data_test)}, 
                              {k :m for k,m in models.items() if "ssl_classifier" in k}, 
                              criterion=partial(NeuralProcessLoss, 
                                                ssl_loss="both",
                                                distance="jsd",
                                                get_lambda_clf=lambda: get_lambda_clf(True),
                                                n_max_elements=int(128*sampling_perc),
                                                ),
                                patience=15,
                              chckpnt_dirname=chckpnt_dirname,
                              max_epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              is_retrain=is_retrain,
                              callbacks=[],
                              #callbacks=[Freezer(lambda x: not x.startswith('classifier'))],
                              iterator_train__collate_fn=cntxt_trgt_collate(get_cntxt_trgt = CntxtTrgtGetter(contexts_getter=GetRandomIndcs(min_n_indcs=0.01, max_n_indcs=0.5),
                                                                                                     targets_getter=get_all_indcs,
                                                                                                     is_add_cntxts_to_trgts=False), 
                                                                            is_repeat_batch=True),  
                              iterator_valid__collate_fn=cntxt_trgt_collate(get_cntxt_trgt_feat),
                                          ))


In [None]:
# if bad has to try freezing again and smaller params
for k,t in data_trainers.items(): 
    for e, h in enumerate(t.history[::-1]):
        if h["valid_loss_best"]:
            print(k, "epoch:", len(t.history)-e, 
                  "val_loss:", h["valid_loss"], 
                  "val_acc:", h["valid_acc"])
            break

* 0.9304 best without n max elements
* 0.9277: jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies
* 0.9857 : jsd | 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze
* 0.9623 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.5] | linear interpolator (1,5)


* 0.9671 : jsd | no scale ? | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.1,0.5]
* 0.9365 : jsd | no scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.01,0.5] | linear interpolator
* 0.9824 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.5] | linear interpolator


* 0.9844 : jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze
* 0.9817 : jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | cntxt [0.01,0.5]


* 0.9627 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.5]
* 0.9572 : jsd | no scale | n_max_elements | 100 sampels | 0.01 entropies | no freeze | no pretrain | [0.01,0.9] | linear interpolator


* 0.9321: jsd | no 0.1 scale | n_max_elements | 100 sampels
* 0.9365: jsd | no 0.1 scale | n_max_elements | 100 sampels | 0.1 entropies


* 0.9450 : jsd | 0.2 scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.01,0.5]
* 0.9315 : jsd | no scale | n_max_elements | 100 sampels | 0.05 entropies | no freeze | no pretrain | [0.01,0.5]

In [14]:
# load all transformers
loaded_models = {}
for sampling_perc in sampling_percentages:
    for k, m in models.items():
        if "transformer" not in k:
            continue
            
        out = train_models_({"{}%har".format(int(sampling_perc*100)): 
                                            (None, None)}, 
                              {k :m },
                               chckpnt_dirname=chckpnt_dirname,
                               is_retrain=False)
        
        pretrained_model = out[list(out.keys())[0]].module_
        model_dict = models[k.replace("transformer", "ssl_classifier")].state_dict()
        model_dict.update(pretrained_model.state_dict())
        models[k.replace("transformer", "ssl_classifier")].load_state_dict(model_dict)
        


--- Loading 100%har/transformer_gnp_large_shared_bottleneck ---

100%har/transformer_gnp_large_shared_bottleneck best epoch: 86 val_loss: -8.16725208180622


In [15]:
from torch.distributions import Normal, Categorical, kl_divergence

In [16]:
t1 = torch.tensor([0.2, 0.8], requires_grad=True)
t2 = torch.tensor([0.7, 0.3], requires_grad=True)
#torch.softmax(t2, -1)
#torch.softmax(t1, -1)

In [17]:
M = (t1 + t2) / 2

In [18]:
def jensen_shannon_div(p1, p2):
    p_avg = (p1 + p2) / 2
    mask = (p_avg != 0).float()
    # set to 0 p when M is 0 (because mean can only be 0 is vectors weree, but
    # this is not the case due to numerical issues)
    M = Categorical(probs=p_avg)
    return ((kl_divergence(Categorical(probs=p1 * mask), M) +
             kl_divergence(Categorical(probs=p2 * mask), M)) / 2)

def yann_div(t1, t2):
    M = (t1 + t2) / 2
    return torch.min(kl_divergence(Categorical(probs=t1), Categorical(M)) + 
               kl_divergence(Categorical(probs=t2), Categorical(M)))

def csiszar_dist(t1, t2):
    M = (t1 + t2) / 2
    return ((kl_divergence(Categorical(M), Categorical(probs=t1)
                ) + kl_divergence(Categorical(M), Categorical(probs=t2)))/2)#**0.5

def total_var(t1, t2):
    return (t1 - t2).abs().sum(-1) / 2

def bhattacharyya_dist(t1, t2):
    return -torch.log((t1 * t2).sqrt().sum(-1))

def hellinger_dist(t1, t2):
    return (t1.sqrt() - t2.sqrt()).pow(2).sum(-1).sqrt() / (2**0.5)

In [19]:
import math
math.log(2)

0.6931471805599453

In [20]:
for t1,t2 in [([0., 1], [1, 0.]), 
              ([0.5, 0.5], [0.4, 0.6]), 
              ([0.5, 0.5], [0.5, 0.5]), 
              ([0.4, 0.6], [0.3, 0.7]), 
              ([1-1e-50, 1e-50], [1e-50, 1-1e-50]), 
              ([0.1, 0.1, 0.8], [0.2, 0.2, 0.6]), 
              ([0.1, 0.1, 0.8], [0.6, 0.2, 0.2])]:
    print()
    print(t1, t2)
    print("yd", yann_div(torch.tensor(t1), torch.tensor(t2)).item())
    print("cd", csiszar_dist(torch.tensor(t1), torch.tensor(t2)).item())
    print("tv", total_var(torch.tensor(t1), torch.tensor(t2)).item())
    print("jsd", jensen_shannon_div(torch.tensor(t1), torch.tensor(t2)).item())
    print("bd", bhattacharyya_dist(torch.tensor(t1), torch.tensor(t2)).item())
    print("hd", hellinger_dist(torch.tensor(t1), torch.tensor(t2)).item())


[0.0, 1] [1, 0.0]
yd 1.3862941265106201
cd inf
tv 1.0
jsd 0.6931470632553101
bd inf
hd 1.0

[0.5, 0.5] [0.4, 0.6]
yd 0.010118838399648666
cd 0.005077211186289787
tv 0.10000000894069672
jsd 0.005059419199824333
bd 0.005076696630567312
hd 0.07116072624921799

[0.5, 0.5] [0.5, 0.5]
yd 0.0
cd 0.0
tv 0.0
jsd 0.0
bd -0.0
hd 0.0

[0.4, 0.6] [0.3, 0.7]
yd 0.011017337441444397
cd 0.005537144839763641
tv 0.09999997913837433
jsd 0.0055086687207221985
bd 0.005531022325158119
hd 0.07426819950342178

[1.0, 1e-50] [1e-50, 1.0]
yd 1.3862941265106201
cd inf
tv 1.0
jsd 0.6931470632553101
bd inf
hd 1.0

[0.1, 0.1, 0.8] [0.2, 0.2, 0.6]
yd 0.048314452171325684
cd 0.024884231388568878
tv 0.19999998807907104
jsd 0.024157226085662842
bd 0.024637971073389053
hd 0.15600308775901794

[0.1, 0.1, 0.8] [0.6, 0.2, 0.2]
yd 0.407856285572052
cd 0.24531465768814087
tv 0.6000000238418579
jsd 0.203928142786026
bd 0.2403273582458496
hd 0.4622008800506592


In [21]:
t1.pow(2)

AttributeError: 'list' object has no attribute 'pow'

In [None]:
k.backward()

t1.grad

In [None]:
t2.grad