###1 Install requirements

In [1]:
!pip install datasets -q
!pip install sentencepiece -q
!pip install transformers -q

[K     |████████████████████████████████| 451 kB 4.1 MB/s 
[K     |████████████████████████████████| 115 kB 67.7 MB/s 
[K     |████████████████████████████████| 212 kB 77.6 MB/s 
[K     |████████████████████████████████| 182 kB 80.6 MB/s 
[K     |████████████████████████████████| 127 kB 58.5 MB/s 
[K     |████████████████████████████████| 1.3 MB 4.1 MB/s 
[K     |████████████████████████████████| 5.5 MB 4.2 MB/s 
[K     |████████████████████████████████| 7.6 MB 58.2 MB/s 
[?25h

### 2 Import libraries

In [2]:
from datasets import load_dataset, load_metric
from transformers import BertTokenizer, TFBertModel, BertGenerationEncoder, BertGenerationDecoder, EncoderDecoderModel
import numpy as np
import tensorflow as tf
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer
import pandas as pd
import math
import torch
from csv import writer

###3 Mount google drive to use for file saving and loading

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### 4 Load Model

In [4]:
# Select origin and target languages
orig = "en"
target = "es"
max_length = 100
min_length = None
ngram_size = None
beam = None

In [5]:
# Dictionary to store model checkpoints
pair_checkpoint = {'en-zh': '/checkpoint-9000',
             'en-es': '/checkpoint-10000',
             'es-en': '/checkpoint-10000',
             'es-zh': '/checkpoint-8500',
             'zh-es': '/checkpoint-8500',
             'zh-en': '/checkpoint-9000'}

In [15]:
# Define path
dir_path = f'drive/MyDrive/MIDS/W266/Final_Project/bert2bert-finetuned/{orig}_{target}'
if not max_length:
  file_path = f'{dir_path}/baseline'
elif not min_length:
  if not ngram_size:
    file_path = f'{dir_path}/max_length_100'
  else:
    file_path = f'{dir_path}/max_length_100/ngram_{ngram_size}'
else:
  if not beam:
    if not ngram_size:
      file_path = f'{dir_path}/max_length_100/min_length_{min_length}'
    else:
      file_path = f'{dir_path}/max_length_100/min_length_{min_length}/ngram_{ngram_size}'
  else:
    file_path = f'{dir_path}/max_length_100/min_length_{min_length}/beam_{beam}'


In [16]:
file_path

'drive/MyDrive/MIDS/W266/Final_Project/bert2bert-finetuned/en_es/max_length_100'

In [19]:
model_file = f'{dir_path}/bert2bert_uncased_{orig}_{target}'

In [20]:
model_file

'drive/MyDrive/MIDS/W266/Final_Project/bert2bert-finetuned/en_es/bert2bert_uncased_en_es'

### 5 Save Model

In [10]:
# Upload saved fine-tuned model
bert2bert_saved = EncoderDecoderModel.from_pretrained(file_path + pair_checkpoint[f'{orig}-{target}'])

In [21]:
# Save model
torch.save(bert2bert_saved, model_file)