In [0]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/ec/e7/0a1babead1b79afabb654fbec0a052e0d833ba4205a6dfd98b1aeda9c82e/transformers-2.2.0-py3-none-any.whl (360kB)
[K     |█                               | 10kB 33.8MB/s eta 0:00:01[K     |█▉                              | 20kB 6.1MB/s eta 0:00:01[K     |██▊                             | 30kB 8.6MB/s eta 0:00:01[K     |███▋                            | 40kB 5.7MB/s eta 0:00:01[K     |████▌                           | 51kB 7.0MB/s eta 0:00:01[K     |█████▌                          | 61kB 8.2MB/s eta 0:00:01[K     |██████▍                         | 71kB 9.4MB/s eta 0:00:01[K     |███████▎                        | 81kB 10.5MB/s eta 0:00:01[K     |████████▏                       | 92kB 11.7MB/s eta 0:00:01[K     |█████████                       | 102kB 9.2MB/s eta 0:00:01[K     |██████████                      | 112kB 9.2MB/s eta 0:00:01[K     |███████████                     | 122kB 9.

In [0]:
!pip install -U sentence-transformers

Collecting sentence-transformers
[?25l  Downloading https://files.pythonhosted.org/packages/85/17/9edba42c29fda04f2eb8597bb4de380f0f43d65e317969070c04510d93eb/sentence-transformers-0.2.3.tar.gz (45kB)
[K     |████████████████████████████████| 51kB 4.1MB/s 
[?25hCollecting pytorch-transformers==1.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/50/89/ad0d6bb932d0a51793eaabcf1617a36ff530dc9ab9e38f765a35dc293306/pytorch_transformers-1.1.0-py3-none-any.whl (158kB)
[K     |████████████████████████████████| 163kB 21.1MB/s 
Building wheels for collected packages: sentence-transformers
  Building wheel for sentence-transformers (setup.py) ... [?25l[?25hdone
  Created wheel for sentence-transformers: filename=sentence_transformers-0.2.3-cp36-none-any.whl size=57413 sha256=17cb18ca192fafc7dd6dcff567850a7a9f7598747e683a27cc7711d9e0d95e99
  Stored in directory: /root/.cache/pip/wheels/20/89/88/95d8a3d4034bb77f52a6f66efdbfc623240221183dc0001c31
Successfully built sentence-trans

In [0]:
from torch.utils.data import DataLoader
import math
from sentence_transformers import SentenceTransformer,  SentencesDataset, LoggingHandler, losses, models
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from sentence_transformers.readers import STSDataReader
import logging
from datetime import datetime


#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout

# Read the dataset
train_batch_size = 16
num_epochs = 4
model_save_path = 'output/training_stsbenchmark_bert-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
sts_reader = STSDataReader('stsbenchmark', normalize_scores=True)

# Use BERT for mapping tokens to embeddings
word_embedding_model = models.BERT('bert-base-uncased')

# Apply mean pooling to get one fixed sized sentence vector
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
                               pooling_mode_mean_tokens=True,
                               pooling_mode_cls_token=False,
                               pooling_mode_max_tokens=False)

model = SentenceTransformer(modules=[word_embedding_model, pooling_model])


2019-11-28 08:19:18 - loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /root/.cache/torch/pytorch_transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.bf3b9ea126d8c0001ee8a1e8b92229871d06d36d8808208cc2449280da87785c
2019-11-28 08:19:18 - Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "torchscript": false,
  "type_vocab_size": 2,
  "vocab_size": 30522
}

2019-11-28 08:19:19 - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /root/.cache/torch/pytorch_tran

In [0]:
import urllib.request
import zipfile
import os
#folder_path = os.path.dirname(os.path.realpath(__file__))
folder_path = "./"
print('Beginning download of datasets')

datasets = ['stsbenchmark.zip']
server = "https://public.ukp.informatik.tu-darmstadt.de/reimers/sentence-transformers/datasets/"

for dataset in datasets:
    print("Download", dataset)
    url = server+dataset
    dataset_path = os.path.join(folder_path, dataset)
    urllib.request.urlretrieve(url, dataset_path)

    print("Extract", dataset)
    with zipfile.ZipFile(dataset_path, "r") as zip_ref:
        zip_ref.extractall(folder_path)
    os.remove(dataset_path)


Beginning download of datasets
Download AllNLI.zip
Extract AllNLI.zip
Download stsbenchmark.zip
Extract stsbenchmark.zip
Download wikipedia-sections-triplets.zip
Extract wikipedia-sections-triplets.zip
All datasets downloaded and extracted


In [0]:

# Convert the dataset to a DataLoader ready for training
logging.info("Read STSbenchmark train dataset")
train_data = SentencesDataset(sts_reader.get_examples('sts-train.csv'), model)
train_dataloader = DataLoader(train_data, shuffle=True, batch_size=train_batch_size)
train_loss = losses.CosineSimilarityLoss(model=model)


logging.info("Read STSbenchmark dev dataset")
dev_data = SentencesDataset(examples=sts_reader.get_examples('sts-dev.csv'), model=model)
dev_dataloader = DataLoader(dev_data, shuffle=False, batch_size=train_batch_size)
evaluator = EmbeddingSimilarityEvaluator(dev_dataloader)


# Configure the training. We skip evaluation in this example
warmup_steps = math.ceil(len(train_data)*num_epochs/train_batch_size*0.1) #10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))


# Train the model
model.fit(train_objectives=[(train_dataloader, train_loss)],
          evaluator=evaluator,
          epochs=num_epochs,
          evaluation_steps=1000,
          warmup_steps=warmup_steps,
          output_path=model_save_path)


##############################################################################
#
# Load the stored model and evaluate its performance on STS benchmark dataset
#
##############################################################################

model = SentenceTransformer(model_save_path)
test_data = SentencesDataset(examples=sts_reader.get_examples("sts-test.csv"), model=model)
test_dataloader = DataLoader(test_data, shuffle=False, batch_size=train_batch_size)
evaluator = EmbeddingSimilarityEvaluator(test_dataloader)
model.evaluate(evaluator)

Convert dataset:   2%|▏         | 98/5749 [00:00<00:06, 844.45it/s]

2019-11-28 08:19:27 - Read STSbenchmark train dataset


Convert dataset: 100%|██████████| 5749/5749 [00:02<00:00, 2636.34it/s]
Convert dataset:  26%|██▌       | 391/1500 [00:00<00:00, 3899.20it/s]

2019-11-28 08:19:29 - Num sentences: 5749
2019-11-28 08:19:29 - Sentences 0 longer than max_seqence_length: 0
2019-11-28 08:19:29 - Sentences 1 longer than max_seqence_length: 0
2019-11-28 08:19:29 - Read STSbenchmark dev dataset


Convert dataset: 100%|██████████| 1500/1500 [00:00<00:00, 2396.09it/s]
Epoch:   0%|          | 0/4 [00:00<?, ?it/s]
Iteration:   0%|          | 0/360 [00:00<?, ?it/s][A

2019-11-28 08:19:30 - Num sentences: 1500
2019-11-28 08:19:30 - Sentences 0 longer than max_seqence_length: 0
2019-11-28 08:19:30 - Sentences 1 longer than max_seqence_length: 0
2019-11-28 08:19:30 - Warmup-steps: 144



Iteration:   0%|          | 1/360 [00:08<52:08,  8.71s/it][A
Iteration:   1%|          | 2/360 [00:19<56:29,  9.47s/it][A
Iteration:   1%|          | 3/360 [00:29<56:41,  9.53s/it][A
Iteration:   1%|          | 4/360 [00:38<54:42,  9.22s/it][A
Iteration:   1%|▏         | 5/360 [00:45<51:01,  8.62s/it][A
Iteration:   2%|▏         | 6/360 [00:55<54:08,  9.18s/it][A
Iteration:   2%|▏         | 7/360 [01:02<49:28,  8.41s/it][A
Iteration:   2%|▏         | 8/360 [01:12<52:17,  8.91s/it][A
Iteration:   2%|▎         | 9/360 [01:18<47:40,  8.15s/it][A
Iteration:   3%|▎         | 10/360 [01:27<48:30,  8.31s/it][A
Iteration:   3%|▎         | 11/360 [01:35<48:02,  8.26s/it][A
Iteration:   3%|▎         | 12/360 [01:45<51:22,  8.86s/it][A
Iteration:   4%|▎         | 13/360 [01:55<52:33,  9.09s/it][A
Iteration:   4%|▍         | 14/360 [02:03<50:42,  8.79s/it][A
Iteration:   4%|▍         | 15/360 [02:11<48:45,  8.48s/it][A
Iteration:   4%|▍         | 16/360 [02:20<48:56,  8.54s/it][A


2019-11-28 09:09:11 - Evaluation the model on  dataset after epoch 0:



Convert Evaluating:   1%|          | 1/94 [00:00<01:16,  1.22it/s][A
Convert Evaluating:   2%|▏         | 2/94 [00:01<01:13,  1.25it/s][A
Convert Evaluating:   3%|▎         | 3/94 [00:02<01:16,  1.20it/s][A
Convert Evaluating:   4%|▍         | 4/94 [00:03<01:14,  1.21it/s][A
Convert Evaluating:   5%|▌         | 5/94 [00:04<01:15,  1.17it/s][A
Convert Evaluating:   6%|▋         | 6/94 [00:05<01:21,  1.08it/s][A
Convert Evaluating:   7%|▋         | 7/94 [00:06<01:18,  1.10it/s][A
Convert Evaluating:   9%|▊         | 8/94 [00:07<01:16,  1.13it/s][A
Convert Evaluating:  10%|▉         | 9/94 [00:07<01:14,  1.14it/s][A
Convert Evaluating:  11%|█         | 10/94 [00:08<01:09,  1.21it/s][A
Convert Evaluating:  12%|█▏        | 11/94 [00:09<01:07,  1.22it/s][A
Convert Evaluating:  13%|█▎        | 12/94 [00:10<01:11,  1.15it/s][A
Convert Evaluating:  14%|█▍        | 13/94 [00:11<01:09,  1.17it/s][A
Convert Evaluating:  15%|█▍        | 14/94 [00:12<01:10,  1.13it/s][A
Convert Evalua

2019-11-28 09:11:25 - Cosine-Similarity :	Pearson: 0.8651	Spearman: 0.8670
2019-11-28 09:11:25 - Manhattan-Distance:	Pearson: 0.8333	Spearman: 0.8393
2019-11-28 09:11:25 - Euclidean-Distance:	Pearson: 0.8341	Spearman: 0.8399
2019-11-28 09:11:25 - Dot-Product-Similarity:	Pearson: 0.7937	Spearman: 0.7984
2019-11-28 09:11:25 - Save model to output/training_stsbenchmark_bert-2019-11-28_08-19-18


Epoch:  25%|██▌       | 1/4 [51:56<2:35:48, 3116.10s/it]

Epoch:  25%|██▌       | 1/4 [51:56<2:35:48, 3116.10s/it]
Iteration:   0%|          | 0/360 [00:00<?, ?it/s][A

2019-11-28 09:11:26 - Restart data_iterator



Iteration:   0%|          | 1/360 [00:06<37:49,  6.32s/it][A
Iteration:   1%|          | 2/360 [00:16<44:19,  7.43s/it][A
Iteration:   1%|          | 3/360 [00:24<45:55,  7.72s/it][A
Iteration:   1%|          | 4/360 [00:32<45:42,  7.70s/it][A
Iteration:   1%|▏         | 5/360 [00:40<47:03,  7.95s/it][A
Iteration:   2%|▏         | 6/360 [00:49<48:10,  8.17s/it][A
Iteration:   2%|▏         | 7/360 [00:57<48:17,  8.21s/it][A
Iteration:   2%|▏         | 8/360 [01:06<48:30,  8.27s/it][A
Iteration:   2%|▎         | 9/360 [01:14<47:50,  8.18s/it][A
Iteration:   3%|▎         | 10/360 [01:21<45:43,  7.84s/it][A
Iteration:   3%|▎         | 11/360 [01:30<47:35,  8.18s/it][A
Iteration:   3%|▎         | 12/360 [01:37<45:40,  7.88s/it][A
Iteration:   4%|▎         | 13/360 [01:44<43:13,  7.48s/it][A
Iteration:   4%|▍         | 14/360 [01:51<42:16,  7.33s/it][A
Iteration:   4%|▍         | 15/360 [01:58<41:57,  7.30s/it][A
Iteration:   4%|▍         | 16/360 [02:05<41:07,  7.17s/it][A


2019-11-28 10:04:12 - Evaluation the model on  dataset after epoch 1:



Convert Evaluating:   1%|          | 1/94 [00:00<01:27,  1.06it/s][A
Convert Evaluating:   2%|▏         | 2/94 [00:01<01:24,  1.09it/s][A
Convert Evaluating:   3%|▎         | 3/94 [00:02<01:28,  1.03it/s][A
Convert Evaluating:   4%|▍         | 4/94 [00:03<01:27,  1.03it/s][A
Convert Evaluating:   5%|▌         | 5/94 [00:04<01:29,  1.00s/it][A
Convert Evaluating:   6%|▋         | 6/94 [00:06<01:35,  1.09s/it][A
Convert Evaluating:   7%|▋         | 7/94 [00:07<01:34,  1.08s/it][A
Convert Evaluating:   9%|▊         | 8/94 [00:08<01:31,  1.06s/it][A
Convert Evaluating:  10%|▉         | 9/94 [00:09<01:29,  1.05s/it][A
Convert Evaluating:  11%|█         | 10/94 [00:10<01:22,  1.02it/s][A
Convert Evaluating:  12%|█▏        | 11/94 [00:11<01:21,  1.02it/s][A
Convert Evaluating:  13%|█▎        | 12/94 [00:12<01:25,  1.04s/it][A
Convert Evaluating:  14%|█▍        | 13/94 [00:13<01:23,  1.02s/it][A
Convert Evaluating:  15%|█▍        | 14/94 [00:14<01:22,  1.04s/it][A
Convert Evalua

2019-11-28 10:06:52 - Cosine-Similarity :	Pearson: 0.8702	Spearman: 0.8705
2019-11-28 10:06:52 - Manhattan-Distance:	Pearson: 0.8398	Spearman: 0.8431
2019-11-28 10:06:52 - Euclidean-Distance:	Pearson: 0.8402	Spearman: 0.8432
2019-11-28 10:06:52 - Dot-Product-Similarity:	Pearson: 0.8089	Spearman: 0.8139
2019-11-28 10:06:52 - Save model to output/training_stsbenchmark_bert-2019-11-28_08-19-18


Epoch:  50%|█████     | 2/4 [1:47:22<1:45:58, 3179.25s/it]

Epoch:  50%|█████     | 2/4 [1:47:22<1:45:58, 3179.25s/it]
Iteration:   0%|          | 0/360 [00:00<?, ?it/s][A

2019-11-28 10:06:53 - Restart data_iterator



Iteration:   0%|          | 1/360 [00:10<1:04:57, 10.86s/it][A
Iteration:   1%|          | 2/360 [00:19<1:00:59, 10.22s/it][A
Iteration:   1%|          | 3/360 [00:28<57:36,  9.68s/it]  [A
Iteration:   1%|          | 4/360 [00:37<57:17,  9.66s/it][A
Iteration:   1%|▏         | 5/360 [00:45<54:19,  9.18s/it][A
Iteration:   2%|▏         | 6/360 [00:55<54:48,  9.29s/it][A
Iteration:   2%|▏         | 7/360 [01:04<54:02,  9.18s/it][A
Iteration:   2%|▏         | 8/360 [01:12<52:26,  8.94s/it][A
Iteration:   2%|▎         | 9/360 [01:22<53:47,  9.20s/it][A
Iteration:   3%|▎         | 10/360 [01:31<52:57,  9.08s/it][A
Iteration:   3%|▎         | 11/360 [01:38<49:30,  8.51s/it][A
Iteration:   3%|▎         | 12/360 [01:48<51:32,  8.89s/it][A
Iteration:   4%|▎         | 13/360 [01:55<48:38,  8.41s/it][A
Iteration:   4%|▍         | 14/360 [02:06<53:32,  9.28s/it][A
Iteration:   4%|▍         | 15/360 [02:15<52:47,  9.18s/it][A
Iteration:   4%|▍         | 16/360 [02:22<49:24,  8.62s/i

2019-11-28 11:02:08 - Evaluation the model on  dataset after epoch 2:



Convert Evaluating:   1%|          | 1/94 [00:00<01:28,  1.06it/s][A
Convert Evaluating:   2%|▏         | 2/94 [00:01<01:25,  1.08it/s][A
Convert Evaluating:   3%|▎         | 3/94 [00:02<01:29,  1.02it/s][A
Convert Evaluating:   4%|▍         | 4/94 [00:03<01:27,  1.02it/s][A
Convert Evaluating:   5%|▌         | 5/94 [00:04<01:29,  1.01s/it][A
Convert Evaluating:   6%|▋         | 6/94 [00:06<01:35,  1.09s/it][A
Convert Evaluating:   7%|▋         | 7/94 [00:07<01:33,  1.08s/it][A
Convert Evaluating:   9%|▊         | 8/94 [00:08<01:31,  1.06s/it][A
Convert Evaluating:  10%|▉         | 9/94 [00:09<01:29,  1.05s/it][A
Convert Evaluating:  11%|█         | 10/94 [00:10<01:22,  1.02it/s][A
Convert Evaluating:  12%|█▏        | 11/94 [00:11<01:21,  1.02it/s][A
Convert Evaluating:  13%|█▎        | 12/94 [00:12<01:25,  1.05s/it][A
Convert Evaluating:  14%|█▍        | 13/94 [00:13<01:23,  1.04s/it][A
Convert Evaluating:  15%|█▍        | 14/94 [00:14<01:23,  1.04s/it][A
Convert Evalua

2019-11-28 11:04:47 - Cosine-Similarity :	Pearson: 0.8727	Spearman: 0.8732
2019-11-28 11:04:47 - Manhattan-Distance:	Pearson: 0.8432	Spearman: 0.8467
2019-11-28 11:04:47 - Euclidean-Distance:	Pearson: 0.8436	Spearman: 0.8471
2019-11-28 11:04:47 - Dot-Product-Similarity:	Pearson: 0.8052	Spearman: 0.8100
2019-11-28 11:04:47 - Save model to output/training_stsbenchmark_bert-2019-11-28_08-19-18


Epoch:  75%|███████▌  | 3/4 [2:45:18<54:28, 3268.07s/it]  
Iteration:   0%|          | 0/360 [00:00<?, ?it/s][A

Epoch:  75%|███████▌  | 3/4 [2:45:18<54:28, 3268.07s/it]

2019-11-28 11:04:48 - Restart data_iterator



Iteration:   0%|          | 1/360 [00:11<1:06:27, 11.11s/it][A
Iteration:   1%|          | 2/360 [00:21<1:04:36, 10.83s/it][A
Iteration:   1%|          | 3/360 [00:30<1:02:18, 10.47s/it][A
Iteration:   1%|          | 4/360 [00:38<56:14,  9.48s/it]  [A
Iteration:   1%|▏         | 5/360 [00:46<54:41,  9.24s/it][A
Iteration:   2%|▏         | 6/360 [00:57<57:08,  9.69s/it][A
Iteration:   2%|▏         | 7/360 [01:09<1:00:44, 10.32s/it][A
Iteration:   2%|▏         | 8/360 [01:19<1:00:19, 10.28s/it][A
Iteration:   2%|▎         | 9/360 [01:29<59:34, 10.18s/it]  [A
Iteration:   3%|▎         | 10/360 [01:38<57:08,  9.79s/it][A
Iteration:   3%|▎         | 11/360 [01:47<55:27,  9.54s/it][A
Iteration:   3%|▎         | 12/360 [01:54<52:04,  8.98s/it][A
Iteration:   4%|▎         | 13/360 [02:03<51:39,  8.93s/it][A
Iteration:   4%|▍         | 14/360 [02:14<54:21,  9.43s/it][A
Iteration:   4%|▍         | 15/360 [02:25<56:31,  9.83s/it][A
Iteration:   4%|▍         | 16/360 [02:32<51:20, 

2019-11-28 11:55:56 - Evaluation the model on  dataset after epoch 3:



Convert Evaluating:   1%|          | 1/94 [00:00<01:18,  1.19it/s][A
Convert Evaluating:   2%|▏         | 2/94 [00:01<01:16,  1.20it/s][A
Convert Evaluating:   3%|▎         | 3/94 [00:02<01:19,  1.14it/s][A
Convert Evaluating:   4%|▍         | 4/94 [00:03<01:18,  1.15it/s][A
Convert Evaluating:   5%|▌         | 5/94 [00:04<01:21,  1.10it/s][A
Convert Evaluating:   6%|▋         | 6/94 [00:05<01:25,  1.03it/s][A
Convert Evaluating:   7%|▋         | 7/94 [00:06<01:23,  1.04it/s][A
Convert Evaluating:   9%|▊         | 8/94 [00:07<01:21,  1.06it/s][A
Convert Evaluating:  10%|▉         | 9/94 [00:08<01:18,  1.08it/s][A
Convert Evaluating:  11%|█         | 10/94 [00:09<01:12,  1.16it/s][A
Convert Evaluating:  12%|█▏        | 11/94 [00:09<01:12,  1.15it/s][A
Convert Evaluating:  13%|█▎        | 12/94 [00:11<01:16,  1.07it/s][A
Convert Evaluating:  14%|█▍        | 13/94 [00:11<01:14,  1.09it/s][A
Convert Evaluating:  15%|█▍        | 14/94 [00:12<01:13,  1.08it/s][A
Convert Evalua

2019-11-28 11:58:18 - Cosine-Similarity :	Pearson: 0.8730	Spearman: 0.8735
2019-11-28 11:58:18 - Manhattan-Distance:	Pearson: 0.8450	Spearman: 0.8482
2019-11-28 11:58:18 - Euclidean-Distance:	Pearson: 0.8454	Spearman: 0.8485
2019-11-28 11:58:18 - Dot-Product-Similarity:	Pearson: 0.8080	Spearman: 0.8119
2019-11-28 11:58:18 - Save model to output/training_stsbenchmark_bert-2019-11-28_08-19-18


Epoch: 100%|██████████| 4/4 [3:38:48<00:00, 3250.84s/it]


2019-11-28 11:58:19 - Load pretrained SentenceTransformer: output/training_stsbenchmark_bert-2019-11-28_08-19-18
2019-11-28 11:58:19 - Load SentenceTransformer from folder: output/training_stsbenchmark_bert-2019-11-28_08-19-18
2019-11-28 11:58:19 - loading configuration file output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT/config.json
2019-11-28 11:58:19 - Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "torchscript": false,
  "type_vocab_size": 2,
  "vocab_size": 30522
}

2019-11-28 11:58:19 - loading weights file output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT/pytorch_model.bin


Convert dataset:  28%|██▊       | 392/1379 [00:00<00:00, 3914.37it/s]

2019-11-28 11:58:20 - Model name 'output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc). Assuming 'output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT' is a path or url to a directory containing tokenizer files.
2019-11-28 11:58:20 - loading file output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT/vocab.txt
2019-11-28 11:58:20 - loading file output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT/added_tokens.json
2019-11-28 11:58:20 - loading file output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT/special_tokens_map.js

Convert dataset: 100%|██████████| 1379/1379 [00:00<00:00, 2910.46it/s]
Convert Evaluating:   0%|          | 0/87 [00:00<?, ?it/s]

2019-11-28 11:58:21 - Num sentences: 1379
2019-11-28 11:58:21 - Sentences 0 longer than max_seqence_length: 0
2019-11-28 11:58:21 - Sentences 1 longer than max_seqence_length: 0
2019-11-28 11:58:21 - Evaluation the model on  dataset:


Convert Evaluating: 100%|██████████| 87/87 [01:58<00:00,  1.13it/s]

2019-11-28 12:00:19 - Cosine-Similarity :	Pearson: 0.8507	Spearman: 0.8497
2019-11-28 12:00:19 - Manhattan-Distance:	Pearson: 0.8381	Spearman: 0.8384
2019-11-28 12:00:19 - Euclidean-Distance:	Pearson: 0.8390	Spearman: 0.8392
2019-11-28 12:00:19 - Dot-Product-Similarity:	Pearson: 0.7777	Spearman: 0.7690





0.8497360050809678

In [0]:
!zip -r output.zip output

  adding: output/ (stored 0%)
  adding: output/training_stsbenchmark_bert-2019-11-28_08-19-18/ (stored 0%)
  adding: output/training_stsbenchmark_bert-2019-11-28_08-19-18/config.json (stored 0%)
  adding: output/training_stsbenchmark_bert-2019-11-28_08-19-18/1_Pooling/ (stored 0%)
  adding: output/training_stsbenchmark_bert-2019-11-28_08-19-18/1_Pooling/config.json (deflated 47%)
  adding: output/training_stsbenchmark_bert-2019-11-28_08-19-18/similarity_evaluation_results.csv (deflated 50%)
  adding: output/training_stsbenchmark_bert-2019-11-28_08-19-18/modules.json (deflated 51%)
  adding: output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT/ (stored 0%)
  adding: output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT/config.json (deflated 51%)
  adding: output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT/vocab.txt (deflated 53%)
  adding: output/training_stsbenchmark_bert-2019-11-28_08-19-18/0_BERT/pytorch_model.bin (deflated 7%)
  adding: output/training_stsbe

In [0]:
from google.colab import drive

drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
root_dir = "/content/gdrive/My Drive/"

In [0]:
!cp output.zip '/content/gdrive/My Drive/bert_finetuned_stsb.zip'