In [15]:
import torch
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM, BertForSequenceClassification


In [4]:

# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenized input
text = "Who was Jim Henson ? Jim Henson was a puppeteer"
tokenized_text = tokenizer.tokenize(text)

# Mask a token that we will try to predict back with `BertForMaskedLM`
masked_index = 6
tokenized_text[masked_index] = '[MASK]'
assert tokenized_text == ['who', 'was', 'jim', 'henson', '?', 'jim', '[MASK]', 'was', 'a', 'puppet', '##eer']

# Convert token to vocabulary indices
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
# Define sentence A and B indices associated to 1st and 2nd sentences (see paper)
segments_ids = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]

# Convert inputs to PyTorch tensors
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])

11/19/2018 16:49:10 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/wyang/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


In [7]:
# Load pre-trained model (weights)
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()

# Predict all tokens
predictions = model(tokens_tensor, segments_tensors)

# confirm we were able to predict 'henson'
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])
assert predicted_token[0] == 'henson'

11/19/2018 16:50:26 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /home/wyang/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
11/19/2018 16:50:26 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /home/wyang/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /tmp/tmp7yg7rtxv
11/19/2018 16:50:29 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
 

In [9]:
predictions.shape

torch.Size([1, 11, 30522])

In [11]:
tokens_tensor

tensor([[ 2040,  2001,  3958, 27227,  1029,  3958,   103,  2001,  1037, 13997,
         11510]])

In [20]:
segments_tensors

tensor([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])

In [21]:
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')



11/19/2018 17:12:28 - INFO - pytorch_pretrained_bert.file_utils -   https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt not found in cache, downloading to /tmp/tmpwmisy2so
100%|██████████| 109540/109540 [00:00<00:00, 1847449.49B/s]
11/19/2018 17:12:28 - INFO - pytorch_pretrained_bert.file_utils -   copying /tmp/tmpwmisy2so to cache at /home/wyang/.pytorch_pretrained_bert/8a0c070123c1f794c42a29c6904beb7c1b8715741e235bee04aca2c7636fc83f.9b42061518a39ca00b8b52059fd2bede8daa613f8a8671500e518a8c29de8c00
11/19/2018 17:12:28 - INFO - pytorch_pretrained_bert.file_utils -   creating metadata file for /home/wyang/.pytorch_pretrained_bert/8a0c070123c1f794c42a29c6904beb7c1b8715741e235bee04aca2c7636fc83f.9b42061518a39ca00b8b52059fd2bede8daa613f8a8671500e518a8c29de8c00
11/19/2018 17:12:28 - INFO - pytorch_pretrained_bert.file_utils -   removing temp file /tmp/tmpwmisy2so
11/19/2018 17:12:28 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://

In [22]:
# Tokenized input
text = "谁是周杰伦?周杰伦是一个歌手"
tokenized_text = tokenizer.tokenize(text)

# Mask a token that we will try to predict back with `BertForMaskedLM`
masked_index = 6
tokenized_text[masked_index] = '[MASK]'

# Convert token to vocabulary indices
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)

In [23]:
tokenized_text

['谁', '是', '周', '杰', '伦', '?', '[MASK]', '杰', '伦', '是', '一', '个', '歌', '手']

In [24]:
indexed_tokens

[6443, 3221, 1453, 3345, 840, 136, 103, 3345, 840, 3221, 671, 702, 3625, 2797]

In [25]:
segments_tensors = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]

tensor([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])

In [28]:
segments_ids = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]

# Convert inputs to PyTorch tensors
tokens_tensor = torch.tensor([indexed_tokens])
segments_tensors = torch.tensor([segments_ids])

In [40]:
# Load pre-trained model (weights)
model = BertModel.from_pretrained('bert-base-chinese')


11/19/2018 17:50:00 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz from cache at /home/wyang/.pytorch_pretrained_bert/42d4a64dda3243ffeca7ec268d5544122e67d9d06b971608796b483925716512.02ac7d664cff08d793eb00d6aac1d04368a1322435e5fe0a27c70b0b3a85327f
11/19/2018 17:50:00 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /home/wyang/.pytorch_pretrained_bert/42d4a64dda3243ffeca7ec268d5544122e67d9d06b971608796b483925716512.02ac7d664cff08d793eb00d6aac1d04368a1322435e5fe0a27c70b0b3a85327f to temp dir /tmp/tmph9slzw23
11/19/2018 17:50:04 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 1

In [41]:
predictions = model(tokens_tensor, segments_tensors, output_all_encoded_layers=True)


In [49]:
len(predictions[0])

12

In [32]:
# Predict all tokens
predictions = model_masked(tokens_tensor, segments_tensors)
predictions


tensor([[[-1.1313e+01, -1.1907e+01, -1.1674e+01,  ..., -6.4066e+00,
          -7.2315e+00, -1.2502e+01],
         [-1.0289e+01, -1.0009e+01, -1.0193e+01,  ..., -5.0743e+00,
          -2.6988e+00, -6.5099e+00],
         [-1.1042e+01, -1.0886e+01, -1.1356e+01,  ..., -6.3718e+00,
          -3.7927e+00, -7.1015e+00],
         ...,
         [-1.5990e+01, -1.6219e+01, -1.6190e+01,  ..., -1.1053e+01,
          -6.2380e+00, -1.1631e+01],
         [-1.6091e+01, -1.6767e+01, -1.6467e+01,  ..., -1.0600e+01,
          -6.5341e+00, -1.1487e+01],
         [-1.4194e+01, -1.5757e+01, -1.5586e+01,  ..., -1.1100e+01,
          -6.5375e+00, -1.1307e+01]]], grad_fn=<ThAddBackward>)

In [33]:
# confirm we were able to predict 'henson'
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_index

1453

In [37]:
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])


In [38]:
predicted_token

['周']

In [31]:
# Load pre-trained model (weights)
model_masked = BertForMaskedLM.from_pretrained('bert-base-chinese')


11/19/2018 17:18:58 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz from cache at /home/wyang/.pytorch_pretrained_bert/42d4a64dda3243ffeca7ec268d5544122e67d9d06b971608796b483925716512.02ac7d664cff08d793eb00d6aac1d04368a1322435e5fe0a27c70b0b3a85327f
11/19/2018 17:18:58 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /home/wyang/.pytorch_pretrained_bert/42d4a64dda3243ffeca7ec268d5544122e67d9d06b971608796b483925716512.02ac7d664cff08d793eb00d6aac1d04368a1322435e5fe0a27c70b0b3a85327f to temp dir /tmp/tmpegmsroa6
11/19/2018 17:19:02 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 1

In [None]:
# Load pre-trained model (weights)
model = BertForSequenceClassification.from_pretrained('bert-base-chinese')


In [17]:
tokens_tensor

tensor([[ 2040,  2001,  3958, 27227,  1029,  3958,   103,  2001,  1037, 13997,
         11510]])

In [50]:
# model.eval()

# # Predict all tokens
# predictions = model(tokens_tensor, segments_tensors)

In [None]:
# confirm we were able to predict 'henson'
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])
assert predicted_token[0] == 'henson'

# run BertForSequenceClassification on annotation-test

In [1]:
import numpy as np

import torch

from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM, BertForSequenceClassification

# Load pre-trained model (weights)
model = BertForSequenceClassification.from_pretrained('bert-base-chinese')
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
model.to("cuda")

11/19/2018 19:01:05 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz from cache at /home/wyang/.pytorch_pretrained_bert/42d4a64dda3243ffeca7ec268d5544122e67d9d06b971608796b483925716512.02ac7d664cff08d793eb00d6aac1d04368a1322435e5fe0a27c70b0b3a85327f
11/19/2018 19:01:05 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /home/wyang/.pytorch_pretrained_bert/42d4a64dda3243ffeca7ec268d5544122e67d9d06b971608796b483925716512.02ac7d664cff08d793eb00d6aac1d04368a1322435e5fe0a27c70b0b3a85327f to temp dir /tmp/tmpc88gmikz
11/19/2018 19:01:09 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 1

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1)
            )
          )
          (intermediate): BertInterme

In [10]:
# f = open("/data/wyang/ShortTextSemanticSimilarity/data/corpora/annotation/annotation_test.csv")
# acc = 0
# total = 0
# for l in f:
#     label, a, b = l.replace("\n", "").split("\t")
#     a_index = tokenize_index(a, tokenizer)
#     b_index = tokenize_index(b, tokenizer)
#     combine_index = a_index + b_index
#     segments_ids = [0] * len(a_index) + [1] * len(b_index)
#     # Convert inputs to PyTorch tensors
#     tokens_tensor = torch.tensor([combine_index])
#     segments_tensors = torch.tensor([segments_ids])
#     # Predict all tokens
#     predictions = model(tokens_tensor, segments_tensors)
#     predicted_index = torch.argmax(predictions).item()
#     if predicted_index == int(label):
#         acc += 1
#     total += 1


In [None]:
def get_p1(prediction_score_list, labels):
    f = open("/data/wyang/ShortTextSemanticSimilarity/data/corpora/annotation/annotation_test.csv")
    a2score_label = {}
    for line, p, s in zip(f, prediction_score_list, labels):
        label, a, b = line.replace("\n", "").split("\t")
        if a not in a2prediction_label:
            a2score_label[a] = []
        a2score_label[a].append((p, s))
    
    acc = 0
    for a in a2score_label:
        a2score_label[a] = sorted(a2score_label[a], key=lambda x: x[1], reverse=True)
        if a2score_label[a][0][0] > 0:
            acc += 1
            
    p1 = acc / len(a2score_label)
    
    return p1
        
        

In [30]:
def eval(model):
    model.eval()
    f = open("/data/wyang/ShortTextSemanticSimilarity/data/corpora/annotation/annotation_test.csv")
    prediction_score_list, prediction_index_list = [], []
    labels = []
    test_batch, testid_batch = [], []
    batch_size = 16
    device = "cuda"
    for l in f:
        label, a, b = l.replace("\n", "").split("\t")
        labels.append(int(label))
        a_index = tokenize_index(a, tokenizer)
        b_index = tokenize_index(b, tokenizer)
        combine_index = a_index + b_index
        segments_ids = [0] * len(a_index) + [1] * len(b_index)
        test_batch.append(torch.tensor(combine_index))
        testid_batch.append(torch.tensor(segments_ids))
        if len(test_batch) >= batch_size:
            # Convert inputs to PyTorch tensors
            tokens_tensor = torch.nn.utils.rnn.pad_sequence(test_batch, batch_first=True, padding_value=0).to(device)
            segments_tensors = torch.nn.utils.rnn.pad_sequence(testid_batch, batch_first=True, padding_value=0).to(device)
            # Predict all tokens
            predictions = model(tokens_tensor, segments_tensors)
            predicted_index = list(torch.argmax(predictions, dim=1).cpu().numpy())
            prediction_index_list += predicted_index
            predicted_score = list(predictions[:, 1].cpu().detach().numpy())
            prediction_score_list.extend(predicted_score)
            
    if len(test_batch) > 0:
        # Convert inputs to PyTorch tensors
        tokens_tensor = torch.nn.utils.rnn.pad_sequence(test_batch, batch_first=True, padding_value=0).to(device)
        segments_tensors = torch.nn.utils.rnn.pad_sequence(testid_batch, batch_first=True, padding_value=0).to(device)
        # Predict all tokens
        predictions = model(tokens_tensor, segments_tensors)
        predicted_index = list(torch.argmax(predictions, dim=1).cpu().numpy())
        prediction_index_list += predicted_index
        predicted_score = list(predictions[:, 1].cpu().detach().numpy())
        prediction_score_list.extend(predicted_score)
    
    return prediction_score_list, prediction_index_list, labels

In [31]:
prediction_score_list, prediction_index_list, labels = eval(model)

RuntimeError: CUDA error: out of memory

In [14]:
import numpy as np
sum(np.array(prediction_list) == np.array(labels))

6646

In [15]:
np.array(prediction_list).shape

(7678,)

# fine tune BertForSequenceClassification on annotation-train


In [1]:
def tokenize_index(text, tokenizer):
    tokenized_text = tokenizer.tokenize(text)
    # Convert token to vocabulary indices
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    return indexed_tokens

In [2]:
from tqdm import tqdm
import random 

import torch

from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM, BertForSequenceClassification
from pytorch_pretrained_bert.optimization import BertAdam


# Load pre-trained model (weights)
model = BertForSequenceClassification.from_pretrained('bert-base-chinese')
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
model.to("cuda")

11/20/2018 21:14:48 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese.tar.gz from cache at /home/wyang/.pytorch_pretrained_bert/42d4a64dda3243ffeca7ec268d5544122e67d9d06b971608796b483925716512.02ac7d664cff08d793eb00d6aac1d04368a1322435e5fe0a27c70b0b3a85327f
11/20/2018 21:14:48 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /home/wyang/.pytorch_pretrained_bert/42d4a64dda3243ffeca7ec268d5544122e67d9d06b971608796b483925716512.02ac7d664cff08d793eb00d6aac1d04368a1322435e5fe0a27c70b0b3a85327f to temp dir /tmp/tmpb2xsjj6b
11/20/2018 21:14:52 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 1

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1)
            )
          )
          (intermediate): BertInterme

In [3]:
f = open("/data/wyang/ShortTextSemanticSimilarity/data/corpora/annotation/annotation_train.csv")
predictions = []
labels = []
test_batch, testid_batch, mask_batch, label_batch = [], [], [], []
batch_size = 16
train_data_set = []
device = "cuda"
for l in f:
    label, a, b = l.replace("\n", "").split("\t")
    labels.append(int(label))
    a_index = tokenize_index(a, tokenizer)
    b_index = tokenize_index(b, tokenizer)
    combine_index = a_index + b_index
    segments_ids = [0] * len(a_index) + [1] * len(b_index)
    test_batch.append(torch.tensor(combine_index))
    testid_batch.append(torch.tensor(segments_ids))
    mask_batch.append(torch.ones(len(combine_index)))
    label_batch.append(int(label))
    if len(test_batch) >= batch_size:
        # Convert inputs to PyTorch tensors
        tokens_tensor = torch.nn.utils.rnn.pad_sequence(test_batch, batch_first=True, padding_value=0).to(device)
        segments_tensor = torch.nn.utils.rnn.pad_sequence(testid_batch, batch_first=True, padding_value=0).to(device)
        mask_tensor = torch.nn.utils.rnn.pad_sequence(mask_batch, batch_first=True, padding_value=0).to(device)
        label_tensor = torch.tensor(label_batch, device=device)
        train_data_set.append((tokens_tensor, segments_tensor, mask_tensor, label_tensor))
        test_batch, testid_batch, mask_batch, label_batch = [], [], [], []

if len(test_batch) != 0:
    tokens_tensor = torch.nn.utils.rnn.pad_sequence(test_batch, batch_first=True, padding_value=0).to(device)
    segments_tensor = torch.nn.utils.rnn.pad_sequence(testid_batch, batch_first=True, padding_value=0).to(device)
    mask_tensor = torch.nn.utils.rnn.pad_sequence(mask_batch, batch_first=True, padding_value=0).to(device)
    label_tensor = torch.tensor(label_batch, device=device)
    train_data_set.append((tokens_tensor, segments_tensor, mask_tensor, label_tensor))
    test_batch, testid_batch, mask_batch, label_batch = [], [], [], []


In [4]:
learning_rate = 5e-5
warmup_proportion = 0.1 
"Proportion of training to perform linear learning rate warmup for. "
"E.g., 0.1 = 10%% of training."
num_train_epochs = 10

param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
num_train_steps = len(train_data_set) * num_train_epochs
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if n not in no_decay], 'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if n in no_decay], 'weight_decay_rate': 0.0}
    ]
    
optimizer = BertAdam(optimizer_grouped_parameters,
                lr=learning_rate,
                warmup=warmup_proportion,
                t_total=num_train_steps)

In [None]:
model.train()
global_step = 0
for epoch in range(1, num_train_epochs+1):
    tr_loss = 0
    random.shuffle(train_data_set)
    for step, batch in enumerate(tqdm(train_data_set)):
        tokens_tensor, segments_tensor, mask_tensor, label_tensor = batch
        # Predict all tokens
        loss, logits = model(tokens_tensor, segments_tensor, mask_tensor, label_tensor)
        loss.backward()
        tr_loss += loss.item()
        optimizer.step()
        model.zero_grad()
        global_step += 1

 62%|██████▏   | 2392/3852 [09:26<04:01,  6.05it/s] 

In [8]:
pytorch_dump_path = "model/annotation_finetuned.pt"
# Save pytorch-model
print("Save PyTorch model to {}".format(pytorch_dump_path))
torch.save(model.state_dict(), pytorch_dump_path)

Save PyTorch model to model/annotation_finetuned.pt
