In [2]:
import os
import json
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
from tqdm import tqdm_notebook as tqdm
import utils
import models
import imp
from collections import OrderedDict

# Plan

Models:
 - Regexp
 - USE (https://arxiv.org/pdf/1803.11175.pdf) (Small / Large)
 - ConveRT (https://arxiv.org/pdf/2003.04807.pdf, https://arxiv.org/pdf/1911.03688.pdf)
 - USE+ConveRT (https://arxiv.org/pdf/2003.04807.pdf)
 - Conversational BERT (?)

Setups:
 - Few-shot setup (10-30 training examples per intent)
 - Full dataset
 
Modes:
 - **Multilabel** vs **Multiclass**
 - **No training (similiarity-based)** vs **MLP**
 
Datasets:
 - Alexa Prize Intent Dataset
 - SNIPS Dataset (NLU Benchmark 2017)

In [3]:
assert tf.__version__ == "1.14.0", (
    f"found tf version {tf.__version__}, but need 1.14.0")
# assert tf.test.is_gpu_available(), (
#     "GPU not available. please use a GPU runtime")

# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [4]:
use_url = "https://tfhub.dev/google/universal-sentence-encoder/2"
use_large_url = "https://tfhub.dev/google/universal-sentence-encoder-large/2"
convert_url = "http://models.poly-ai.com/convert/v1/model.tar.gz"

---

# Creating datasets

In [5]:
SEED = 42
FEW_SHOT_NUM = 10

In [6]:
np.random.seed(SEED)

In [7]:
# For each dataset we create train dataset in different settings, and a test dataset
dataset = {
    "alexa_prize":{
        "train":{
            "few_shot": None,
            "full": None
        },
        "valid": None,
        "test": None
    },
    "snips":{
        "train":{
            "few_shot": None,
            "full": None
        },
        "valid": None,
        "test":None
    }
}

## Alexa Prize

In [8]:
data = json.load(open("data/alexaprize/intent_phrases.json"))

In [9]:
generated_data = dict()
for intent in data['intent_phrases']:
    generated_data[intent] = utils.generate_phrases(data['intent_phrases'][intent]['phrases'])

100%|██████████| 8/8 [00:28<00:00,  3.58s/it]
100%|██████████| 1/1 [04:09<00:00, 249.33s/it]
100%|██████████| 51/51 [01:10<00:00,  1.07it/s]
100%|██████████| 3/3 [00:10<00:00,  2.87s/it]
100%|██████████| 8/8 [00:10<00:00,  1.71s/it]
100%|██████████| 4/4 [00:09<00:00,  2.13s/it]
100%|██████████| 5/5 [00:13<00:00,  2.72s/it]
100%|██████████| 18/18 [00:32<00:00,  2.26s/it]
100%|██████████| 10/10 [00:20<00:00,  2.70s/it]
100%|██████████| 23/23 [01:18<00:00,  5.17s/it]
100%|██████████| 6/6 [00:20<00:00,  3.12s/it]
100%|██████████| 146/146 [02:14<00:00,  1.07s/it]
100%|██████████| 6/6 [00:09<00:00,  1.73s/it]
100%|██████████| 9/9 [00:14<00:00,  1.35s/it]
100%|██████████| 8/8 [00:10<00:00,  1.19s/it]
100%|██████████| 2/2 [00:03<00:00,  1.90s/it]
100%|██████████| 6/6 [00:15<00:00,  2.60s/it]
100%|██████████| 9/9 [00:09<00:00,  1.15s/it]
100%|██████████| 17/17 [01:48<00:00,  6.53s/it]
100%|██████████| 3/3 [00:07<00:00,  2.45s/it]
100%|██████████| 4/4 [00:03<00:00,  1.11it/s]


In [10]:
generated_data['random'] = utils.generate_phrases(data['random_phrases']['phrases'])

100%|██████████| 855/855 [10:46<00:00,  1.10it/s]


In [11]:
generated_data

{'topic_switching': ['enough talking  about it',
  "that's enough talking  about the weather",
  "that's enough talking  about it",
  "that's enough about the weather",
  "that's enough about this thing",
  "that's enough talking  about this thing",
  'enough talking  about music',
  'enough about me',
  'enough talking  about this thing',
  "that's enough about this topic",
  "that's enough talking  about movies",
  "that's enough about that",
  'enough about politics',
  'enough talking  about the weather',
  'enough talking  about movies',
  "that's enough about this",
  'enough about this thing',
  "that's enough about music",
  "that's enough talking  about me",
  'enough talking  about this',
  "that's enough about me",
  "that's enough talking  about politics",
  "that's enough about you",
  'enough talking  about that',
  "that's enough talking  about that",
  'enough about music',
  'enough about that',
  'enough about the weather',
  'enough talking  about politics',
  "that'

In [12]:
imp.reload(utils)

<module 'utils' from '/home/daniil/Diploma/utils.py'>

In [13]:
train, test = utils.train_test_split(generated_data, train_size=0.6)
test, valid = utils.train_test_split(test, train_size=0.5)
train_few_shot, _ = utils.train_test_split(train, train_num=FEW_SHOT_NUM)

In [14]:
# Let's print the stats of datasets (number of samples per intent)
print("Train data:\n")
utils.print_dataset_stat(train)
print('\n'+"-"*20+'\n')
print("Test data:\n")
utils.print_dataset_stat(test)
print('\n'+"-"*20+'\n')
print("Validate data:\n")
utils.print_dataset_stat(valid)
print('\n'+"-"*20+'\n')
print("Train few shot data:\n")
utils.print_dataset_stat(train_few_shot)

Train data:

topic_switching:1228
lets_chat_about:6309
exit:551
tell_me_a_story:433
repeat:184
yes:84
no:51
dont_understand:195
stupid:1186
cant_do:18504
tell_me_more:207
weather_forecast_intent:5323
what_is_your_name:23
where_are_you_from:88
what_can_you_do:13
choose_topic:6
who_made_you:42
what_is_your_job:15
opinion_request:42610
doing_well:50
what_time:5
random:65743

--------------------

Test data:

topic_switching:568
lets_chat_about:2886
exit:250
tell_me_a_story:197
repeat:84
yes:40
no:23
dont_understand:89
stupid:540
cant_do:8454
tell_me_more:92
weather_forecast_intent:2441
what_is_your_name:10
where_are_you_from:42
what_can_you_do:7
choose_topic:3
who_made_you:18
what_is_your_job:6
opinion_request:19439
doing_well:23
what_time:2
random:30015

--------------------

Validate data:

topic_switching:680
lets_chat_about:3499
exit:303
tell_me_a_story:239
repeat:102
yes:50
no:30
dont_understand:109
stupid:637
cant_do:10247
tell_me_more:105
weather_forecast_intent:2974
what_is_your_n

In [15]:
dataset["alexa_prize"]['train']['full'] = train
dataset["alexa_prize"]['train']['few_shot'] = train_few_shot
dataset["alexa_prize"]['test'] = test
dataset["alexa_prize"]['valid'] = valid

---

## SNIPS

In [16]:
!cat data/nlu-benchmark/2017-06-custom-intent-engines/README.md

# Natural Language Understanding benchmark

This file contains the results of the benchmark we ran on June 1st 2017 to compare natural language understanding services offering custom solutions (Wit, Luis, Api, and Snips) for seven intents. This benchmark and its results are described in [this paper](https://arxiv.org/abs/1805.10190) and [this blog post](https://medium.com/@alicecoucke/benchmarking-natural-language-understanding-systems-google-facebook-microsoft-and-snips-2b8ddcf9fb19).

**Any publication based on these datasets must include a full citation to the following paper in which the results were published by Snips:** 

["Snips Voice Platform: an embedded Spoken Language Understanding system 
for private-by-design voice interfaces"](https://arxiv.org/abs/1805.10190)


## Method
We focused on seven `intents`:
* SearchCreativeWork (e.g. *Find me the I, Robot television show*),
* GetWeather (e.g. *Is it windy in Boston, MA right now?*),
* BookRestaurant (e.g. *I want

In [17]:
# prepare SNIPS intent data for intent classification task
def prepare_intent_data(data):
    new_data = []
    for example in tqdm(data):
        new_data.append("".join([t['text'] for t in example['data']]))
    return new_data

In [18]:
directory = 'data/nlu-benchmark/2017-06-custom-intent-engines/'
train = {}
test = {}
for item in os.listdir(directory):
    if os.path.isdir(directory+item):
        train_data = json.load(open(directory+item+'/train_'+item+'_full.json', encoding='cp1251'))
        test_data = json.load(open(directory+item+'/validate_'+item+'.json', encoding='cp1251'))
        train[item] = prepare_intent_data(train_data[item])
        test[item] = prepare_intent_data(test_data[item])
valid, train = utils.train_test_split(train, train_num=100)
train_few_shot, _ = utils.train_test_split(train, train_num=FEW_SHOT_NUM)

HBox(children=(IntProgress(value=0, max=1959), HTML(value='')))




HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1956), HTML(value='')))




HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1942), HTML(value='')))




HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1973), HTML(value='')))




HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1954), HTML(value='')))




HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=2000), HTML(value='')))




HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=2000), HTML(value='')))




HBox(children=(IntProgress(value=0), HTML(value='')))




In [19]:
#snips train data
# Let's print the stats of datasets (number of samples per intent)
print("Train data:\n")
utils.print_dataset_stat(train)
print('\n'+"-"*20+'\n')
print("Test data:\n")
utils.print_dataset_stat(test)
print('\n'+"-"*20+'\n')
print("Validate data:\n")
utils.print_dataset_stat(valid)
print('\n'+"-"*20+'\n')
print("Train few shot data:\n")
utils.print_dataset_stat(train_few_shot)

Train data:

SearchScreeningEvent:1864
RateBook:1861
AddToPlaylist:1842
BookRestaurant:1875
SearchCreativeWork:1855
GetWeather:1903
PlayMusic:1903

--------------------

Test data:

SearchScreeningEvent:100
RateBook:100
AddToPlaylist:100
BookRestaurant:100
SearchCreativeWork:100
GetWeather:100
PlayMusic:100

--------------------

Validate data:

SearchScreeningEvent:100
RateBook:100
AddToPlaylist:100
BookRestaurant:100
SearchCreativeWork:100
GetWeather:100
PlayMusic:100

--------------------

Train few shot data:

SearchScreeningEvent:10
RateBook:10
AddToPlaylist:10
BookRestaurant:10
SearchCreativeWork:10
GetWeather:10
PlayMusic:10


In [20]:
dataset["snips"]['train']['full'] = train
dataset["snips"]['train']['few_shot'] = train_few_shot
dataset["snips"]['test'] = test
dataset["snips"]['valid'] = valid

In [21]:
json.dump(dataset, open("data/full_dataset.json", 'w'))

---

In [5]:
dataset = json.load(open("data/full_dataset.json"))

----

In [6]:
metrics = {
    "alexa_prize":{
        "few_shot":{
            "use_small":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "use_large":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "convert":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "use+convert":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            }
        },
        "full":{
            "use_small":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "use_large":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "convert":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "use+convert":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            }
        }
    },
    "snips":{
        "few_shot":{
            "use_small":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "use_large":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "convert":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "use+convert":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            }
        },
        "full":{
            "use_small":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "use_large":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "convert":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            },
            "use+convert":{
                "similarity mc":None,
                "similarity ml":None,
                "mlp mc":None,
                "mlp ml":None
            }
        }
    }
}

# Model testing: Alexa prize

In [7]:
test = OrderedDict(dataset['alexa_prize']['test'])
valid = OrderedDict(dataset['alexa_prize']['valid'])

## Few-shot setting

In [8]:
train = OrderedDict(dataset['alexa_prize']['train']['few_shot'])

### USE

In [9]:
# Similarity-based and MLP
# Multilabel and Multiclass

encoder = hub.Module(use_url)

In [11]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Multilabel similarity: 0.691394931702157


In [12]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Multiclass similarity: 0.736267611031903


In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### USE Large

In [13]:
# Similarity-based and MLP
# Multilabel and Multiclass

encoder = hub.Module(use_large_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### ConveRT

In [None]:
# Similarity-based and MLP
# Multilabel and Multiclass
encoder = hub.Module(convert_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### USE + ConveRT

In [None]:
# Similarity-based and MLP
# Multilabel and Multiclass
use_encoder = hub.Module(use_url)
convert_encoder = hub.Module(convert_url)
encoder = utils.concatenate_encoders(use_encoder, convert_encoder)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### Conversational BERT

In [None]:
# TBD

## Full setting

In [None]:
train = OrderedDict(dataset['alexa_prize']['train']['full'])

### USE

In [None]:
# Similarity-based and MLP
# Multilabel and Multiclass
encoder = hub.Module(use_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### USE Large

In [None]:
# Similarity-based and MLP
# Multilabel and Multiclass
encoder = hub.Module(use_large_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### ConveRT

In [None]:
# Similarity-based and MLP
# Multilabel and Multiclass
encoder = hub.Module(convert_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### USE + ConveRT

In [None]:
# Similarity-based and MLP
# Multilabel and Multiclass
use_encoder = hub.Module(use_url)
convert_encoder = hub.Module(convert_url)
encoder = utils.concatenate_encoders(use_encoder, convert_encoder)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### Conversational BERT

In [None]:
# TBD

---

# Model testing: SNIPS

In [None]:
test = OrderedDict(dataset['snips']['test'])
valid = OrderedDict(dataset['snips']['valid'])

## Few-shot setting

In [None]:
train = OrderedDict(dataset['snips']['train']['few_shot'])

### USE

In [None]:
# Similarity-based and MLP
encoder = hub.Module(use_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### USE Large

In [None]:
# Similarity-based and MLP
encoder = hub.Module(use_large_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### ConveRT

In [None]:
# Similarity-based and MLP
# Multilabel and Multiclass
encoder = hub.Module(convert_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### USE + ConveRT

In [None]:
# Similarity-based and MLP
# Multilabel and Multiclass
use_encoder = hub.Module(use_url)
convert_encoder = hub.Module(convert_url)
encoder = utils.concatenate_encoders(use_encoder, convert_encoder)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### Conversational BERT

In [None]:
# TBD

## Full setting

In [None]:
train = OrderedDict(dataset['snips']['train']['full'])

### USE

In [None]:
# Similarity-based and MLP
encoder = hub.Module(use_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### USE Large

In [None]:
# Similarity-based and MLP
encoder = hub.Module(use_large_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### ConveRT

In [None]:
# Similarity-based and MLP
encoder = hub.Module(convert_url)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### USE + ConveRT

In [None]:
# Similarity-based and MLP
use_encoder = hub.Module(use_url)
convert_encoder = hub.Module(convert_url)
encoder = utils.concatenate_encoders(use_encoder, convert_encoder)

In [None]:
model = models.Similarity(encoder, multilabel=True)
print(f"Multilabel similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.Similarity(encoder, multilabel=False)
print(f"Multiclass similarity: {utils.train_and_eval_model(model, train, valid, test, mode='sim')}")

In [None]:
model = models.MLP(encoder, multilabel=True)
print(f"Multilabel MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

In [None]:
model = models.MLP(encoder, multilabel=False)
print(f"Multiclass MLP: {utils.train_and_eval_model(model, train, valid, test, mode='mlp')}")

### Conversational BERT

In [None]:
# TBD

----

# Final results

In [None]:
# Tables
# Each for each dataset and each setting


# Tables format 
#(MC - multiclass; ML - multilabel)
#
# Models | Similarity MC | Similarity ML | MLP MC | MLP ML
# USE_small | ...
# USE_large | ...
# ...
#
#
#

# Few-shot setting: 2 tables
# Full setting: 2 tables 
# All-in-all: 4 tables