In [1]:
!rm -rf sample_data/
!pip install OpenNMT-tf
!pip install gdown
!pip install sacremoses
import opennmt
import os
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
import numpy as np
import sacrebleu
import pyonmttok
from opennmt.utils import checkpoint as checkpoint_util
from pyonmttok import SentencePieceTokenizer
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)

Collecting OpenNMT-tf
[?25l  Downloading https://files.pythonhosted.org/packages/83/30/525a1b22667dfb387bd91ac880fea3f6f534997a332b876f982c527e28fd/OpenNMT_tf-2.19.0-py3-none-any.whl (154kB)
[K     |████████████████████████████████| 163kB 3.8MB/s 
[?25hCollecting pyyaml<5.5,>=5.3
[?25l  Downloading https://files.pythonhosted.org/packages/7a/a5/393c087efdc78091afa2af9f1378762f9821c9c1d7a22c5753fb5ac5f97a/PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636kB)
[K     |████████████████████████████████| 645kB 4.8MB/s 
[?25hCollecting rouge<2,>=1.0
  Downloading https://files.pythonhosted.org/packages/43/cc/e18e33be20971ff73a056ebdb023476b5a545e744e3fc22acd8c758f1e0d/rouge-1.0.0-py3-none-any.whl
Collecting sacrebleu<1.6,>=1.5.0
[?25l  Downloading https://files.pythonhosted.org/packages/7e/57/0c7ca4e31a126189dab99c19951910bd081dea5bbd25f24b77107750eae7/sacrebleu-1.5.1-py3-none-any.whl (54kB)
[K     |████████████████████████████████| 61kB 4.7MB/s 
[?25hCollecting tensorflow-addons<0.14

In [2]:
def split_data(data, train_size, val_size, test_size):
  if train_size + val_size + test_size != 1.0:
    raise Exception("Train, validation, and test sizes must add up to 1.") 
  
  train_mark = int(len(data) * train_size)
  val_mark = train_mark + int(len(data) * val_size)

  train_data = data[0:train_mark]
  val_data = data[train_mark:val_mark]
  test_data = data[val_mark:]

  return train_data, val_data, test_data

def save_data(data, data_folder_name, filename):
  with open(os.path.join(data_folder_name, filename), mode="w") as f:
    for line in data:
      if line.strip():
        f.write(line)

def count_weights(model):
  trainable_count = np.sum([K.count_params(w) for w in model.trainable_weights])
  non_trainable_count = np.sum([K.count_params(w) for w in model.non_trainable_weights])

  print('Total params: {:,}'.format(trainable_count + non_trainable_count))
  print('Trainable params: {:,}'.format(trainable_count))
  print('Non-trainable params: {:,}'.format(non_trainable_count))

def display_weights(model):
  for layer in model.encoder.layers:
    print(f"===== LAYER: {layer.name} =====")
    if layer.get_weights() != []:
        weights = layer.get_weights()[0]
        biases = layer.get_weights()[1]
        print("weights:")
        print(weights)
        print("biases:")
        print(biases)
    else:
        print("weights: ", [])

def compute_scores(runner, features_filename, labels_filename, pred_filename, include_ppl=False, include_ter=False):
  runner.infer(features_filename, pred_filename)

  dot_idx = pred_filename.index('.')
  base_pred_name = pred_filename[0:dot_idx]
  dot_idx = labels_filename.index('.')
  base_model_name = labels_filename[0:dot_idx]
  pred_filename = detokenize_data(base_pred_name, base_model_name)
  detokenized_labels_filename = detokenize_data(base_model_name, base_model_name)
  preds = []
  truth = []
  with open(pred_filename) as f:
    preds = f.readlines()

  with open(detokenized_labels_filename) as f:
    truth = f.readlines()

  scores = dict()
  if include_ppl:
    scores = runner.evaluate(
        features_file=features_filename,
        labels_file=labels_filename)
  
  bleu = sacrebleu.corpus_bleu(preds, [truth])
  scores.update({'bleu': bleu.score})
  if include_ter:
    ter = sacrebleu.corpus_ter(preds, [truth])
    scores.update({'ter': ter.score})
  
  return scores

def tokenize_data(save_folder_name, basename):
  tokenize_sub_data(save_folder_name, basename, "train")
  tokenize_sub_data(save_folder_name, basename, "test")
  tokenize_sub_data(save_folder_name, basename, "val")

def tokenize_sub_data(save_folder_name, basename, set_type):
  model_path = os.path.join("sentencepiece_models", f"{basename}.model")
  vocabulary_path = os.path.join("sentencepiece_models", f"{basename}.vocab")
  tokenizer = SentencePieceTokenizer(model_path=model_path,
                                     vocabulary_path=vocabulary_path,)
  
  with open(os.path.join(f"{save_folder_name}_raw", f"{basename}_{set_type}.raw")) as f:
    with open(os.path.join(save_folder_name, f"{basename}_{set_type}.tok"), mode="w") as fout:
      for line in f.readlines():
        if line.strip():
          fout.write(" ".join(tokenizer.tokenize(line)[0]) + "\n")


def detokenize_data(tokenized_basename, model_basename):
  model_path = os.path.join("sentencepiece_models", model_basename + ".model")
  vocabulary_path = os.path.join("sentencepiece_models", f"{model_basename}.vocab")
  tokenizer = SentencePieceTokenizer(model_path=model_path,
                                     vocabulary_path=vocabulary_path,)
  
  with open(f"{tokenized_basename}.tok") as f:
    with open(f"{tokenized_basename}.txt", mode="w") as fout:
      for line in f.readlines():
        fout.write(tokenizer.detokenize(line.strip().split(" ")) + "\n")

  return f"{tokenized_basename}.txt"

In [3]:
#!cp '/content/gdrive/My Drive/capstone-data-final/src_pvt_data.zip' '/content/'
#!cp '/content/gdrive/My Drive/capstone-data-final/pvt_tgt_data.zip' '/content/'
!cp '/content/gdrive/My Drive/capstone-data-final/src_tgt_data.zip' '/content/'
!cp '/content/gdrive/My Drive/capstone-data-final/sentencepiece_models.zip' '/content/'  
!cp '/content/gdrive/My Drive/capstone-models/src_tgt_model.zip' '/content/'  
#!cp '/content/gdrive/My Drive/capstone-models/pvt_tgt_model.zip' '/content/'  
#!cp '/content/gdrive/My Drive/capstone-models/src_pvt_model.zip' '/content/'  
#!cp '/content/gdrive/My Drive/capstone-models/baseline_model.zip' '/content/'  

"""
!cp '/content/gdrive/My Drive/capstone-data-final/es_it.zip' '/content/'
!cp '/content/gdrive/My Drive/capstone-data-final/es_ca.zip' '/content/'
!cp '/content/gdrive/My Drive/capstone-data-final/ca_it.zip' '/content/'

!cp '/content/gdrive/My Drive/capstone-models/src_pvt_model.zip' '/content/'  
!cp '/content/gdrive/My Drive/capstone-models/pvt_tgt_model.zip' '/content/'  
!cp '/content/gdrive/My Drive/capstone-models/src_tgt_model.zip' '/content/'  
!cp '/content/gdrive/My Drive/capstone-models/baseline_model.zip' '/content/'  
"""

"\n!cp '/content/gdrive/My Drive/capstone-data-final/es_it.zip' '/content/'\n!cp '/content/gdrive/My Drive/capstone-data-final/es_ca.zip' '/content/'\n!cp '/content/gdrive/My Drive/capstone-data-final/ca_it.zip' '/content/'\n\n!cp '/content/gdrive/My Drive/capstone-models/src_pvt_model.zip' '/content/'  \n!cp '/content/gdrive/My Drive/capstone-models/pvt_tgt_model.zip' '/content/'  \n!cp '/content/gdrive/My Drive/capstone-models/src_tgt_model.zip' '/content/'  \n!cp '/content/gdrive/My Drive/capstone-models/baseline_model.zip' '/content/'  \n"

In [4]:
!unzip src_tgt_data.zip
#!unzip pvt_tgt_data.zip
#!unzip src_pvt_data.zip
!unzip sentencepiece_models.zip
#!unzip src_pvt_model.zip
#!unzip pvt_tgt_model.zip
!unzip src_tgt_model.zip

"""
!unzip es_ca.zip
!unzip es_it.zip
!unzip ca_it.zip

!unzip src_pvt_model.zip
!unzip pvt_tgt_model.zip
!unzip src_tgt_model.zip
!unzip baseline_model.zip

!mkdir sentencepiece_models
"""

Archive:  src_tgt_data.zip
   creating: src_tgt_data/
  inflating: src_tgt_data/src_tgt_train.tok  
   creating: src_tgt_data/.ipynb_checkpoints/
  inflating: src_tgt_data/tgt_src_val.tok  
  inflating: src_tgt_data/tgt_src_train.tok  
  inflating: src_tgt_data/tgt_src_test.tok  
  inflating: src_tgt_data/src_tgt_val.tok  
  inflating: src_tgt_data/src_tgt_test.tok  
Archive:  sentencepiece_models.zip
   creating: sentencepiece_models/
  inflating: sentencepiece_models/src_tgt.model  
  inflating: sentencepiece_models/tgt.vocab  
  inflating: sentencepiece_models/tgt_src.vocab  
  inflating: sentencepiece_models/pvt_tgt.vocab  
  inflating: sentencepiece_models/src.vocab  
  inflating: sentencepiece_models/pvt_tgt.model  
  inflating: sentencepiece_models/pvt_src.vocab  
  inflating: sentencepiece_models/src.model  
  inflating: sentencepiece_models/tgt.model  
  inflating: sentencepiece_models/pvt_src.model  
  inflating: sentencepiece_models/src_tgt.vocab  
  inflating: sentencepiece

'\n!unzip es_ca.zip\n!unzip es_it.zip\n!unzip ca_it.zip\n\n!unzip src_pvt_model.zip\n!unzip pvt_tgt_model.zip\n!unzip src_tgt_model.zip\n!unzip baseline_model.zip\n\n!mkdir sentencepiece_models\n'

In [None]:
# Build vocab (uses SentencePiece)
# source = catalan   (ca)
# pivot  = spanish   (es)
# target = italian  (it)

!onmt-build-vocab --sentencepiece model_type=bpe --size 32000 --save_vocab /content/sentencepiece_models/src /content/src_pvt_data/src_train.raw
!onmt-build-vocab --sentencepiece model_type=bpe --size 32000 --save_vocab /content/sentencepiece_models/pvt_src /content/src_pvt_data/pvt_src_train.raw

!onmt-build-vocab --sentencepiece model_type=bpe --size 32000 --save_vocab /content/sentencepiece_models/tgt /content/pvt_tgt_data/tgt_train.raw
!onmt-build-vocab --sentencepiece model_type=bpe --size 32000 --save_vocab /content/sentencepiece_models/pvt_tgt /content/pvt_tgt_data/pvt_tgt_train.raw

!onmt-build-vocab --sentencepiece model_type=bpe --size 32000 --save_vocab /content/sentencepiece_models/src_tgt /content/src_tgt_data/src_tgt_train.raw
!onmt-build-vocab --sentencepiece model_type=bpe --size 32000 --save_vocab /content/sentencepiece_models/tgt_src /content/src_tgt_data/tgt_src_train.raw

In [None]:
!mv /content/sentencepiece_models/src.vocab /content/src_pvt_data/src_vocab.txt
!mv /content/sentencepiece_models/pvt_src.vocab /content/src_pvt_data/pvt_src_vocab.txt

!mv /content/sentencepiece_models/tgt.vocab /content/pvt_tgt_data/tgt_vocab.txt
!mv /content/sentencepiece_models/pvt_tgt.vocab /content/pvt_tgt_data/pvt_tgt_vocab.txt

!mv /content/sentencepiece_models/src_tgt.vocab /content/src_tgt_data/src_tgt_vocab.txt
!mv /content/sentencepiece_models/tgt_src.vocab /content/src_tgt_data/tgt_src_vocab.txt

In [5]:
config_src_pvt = {
    "model_dir": "/content/src_pvt_model/",
    "data": {
        "train_features_file": "/content/src_pvt_data/src_train.tok/",
        "train_labels_file": "/content/src_pvt_data/pvt_src_train.tok/",
        "eval_features_file": "/content/src_pvt_data/src_val.tok/",
        "eval_labels_file": "/content/src_pvt_data/pvt_src_val.tok/",
        "source_vocabulary": "/content/sentencepiece_models/src.vocab/",
        "target_vocabulary": "/content/sentencepiece_models/pvt_src.vocab/",
    },
    "train": {
        "max_step": 25000,
        "save_checkpoints_steps": 500,
        "keep_checkpoint_max": 2,
    },
    "eval": {
        "save_eval_predictions": True,
        "steps": 50000,
        "max_exports_to_keep": 2,
        "early_stopping": {
            "metric": "loss",
            "min_improvement": 0.1,
            "steps": 100,
        },
    }
}

config_pvt_tgt = {
    "model_dir": "/content/pvt_tgt_model/",
    "data": {
        "train_features_file": "/content/pvt_tgt_data/pvt_tgt_train.tok/",
        "train_labels_file": "/content/pvt_tgt_data/tgt_train.tok/",
        "eval_features_file": "/content/pvt_tgt_data/pvt_tgt_val.tok/",
        "eval_labels_file": "/content/pvt_tgt_data/tgt_val.tok/",
        "source_vocabulary": "/content/sentencepiece_models/pvt_tgt.vocab/",
        "target_vocabulary": "/content/sentencepiece_models/tgt.vocab/",
    },
    "train": {
        "max_step": 25000,
        "save_checkpoints_steps": 500,
        "keep_checkpoint_max": 2,
    },
    "eval": {
        "save_eval_predictions": True,
        "steps": 50000,
        "max_exports_to_keep": 2,
        "early_stopping": {
            "metric": "loss",
            "min_improvement": 0.1,
            "steps": 100,
        },
    }
}

config_src_tgt = {
    "model_dir": "/content/src_tgt_model/",
    "data": {
        "train_features_file": "/content/src_tgt_data/src_tgt_train.tok",
        "train_labels_file": "/content/src_tgt_data/tgt_src_train.tok",
        "eval_features_file": "/content/src_tgt_data/src_tgt_val.tok",
        "eval_labels_file": "/content/src_tgt_data/tgt_src_val.tok",
        "source_vocabulary": "/content/sentencepiece_models/src_tgt.vocab",
        "target_vocabulary": "/content/sentencepiece_models/tgt_src.vocab",
    },
    "train": {
        "max_step": 25000,
        "save_checkpoints_steps": 500,
        "keep_checkpoint_max": 2,
    },
    "eval": {
        "save_eval_predictions": True,
        "steps": 50000,
        "max_exports_to_keep": 2,
        "early_stopping": {
            "metric": "loss",
            "min_improvement": 0.1,
            "steps": 100,
        },
    }
}

config_baseline = {
    "model_dir": "/content/baseline_model/",
    "data": {
        "train_features_file": "/content/src_tgt_data/src_tgt_train.tok/",
        "train_labels_file": "/content/src_tgt_data/tgt_src_train.tok/",
        "eval_features_file": "/content/src_tgt_data/src_tgt_val.tok/",
        "eval_labels_file": "/content/src_tgt_data/tgt_src_val.tok/",
        "source_vocabulary": "/content/sentencepiece_models/src_tgt.vocab/",
        "target_vocabulary": "/content/sentencepiece_models/tgt_src.vocab/",
    },
    "train": {
        "max_step": 25000,
        "save_checkpoints_steps": 500,
        "keep_checkpoint_max": 2,
    },
    "eval": {
        "save_eval_predictions": True,
        "steps": 50000,
        "max_exports_to_keep": 2,
        "early_stopping": {
            "metric": "loss",
            "min_improvement": 0.1,
            "steps": 100,
        },
    }
}

In [6]:
learning_rate = opennmt.schedules.NoamDecay(scale=2.0, model_dim=512, warmup_steps=8000)
optimizer = tfa.optimizers.LazyAdam(learning_rate)

In [None]:
# Training source-pivot model
src_pvt_model = opennmt.models.TransformerBase()
src_pvt_runner = opennmt.Runner(src_pvt_model, config_src_pvt, auto_config=True)
sp_config = src_pvt_runner._finalize_config(training=True)

#src_pvt_runner.train(num_devices=1, with_eval=True)

In [None]:
!zip -r src_pvt_model.zip src_pvt_model/
!cp src_pvt_model.zip '/content/gdrive/My Drive/capstone-models/'

In [None]:
# Training pivot-target model
pvt_tgt_model = opennmt.models.TransformerBase()
pvt_tgt_runner = opennmt.Runner(pvt_tgt_model, config_pvt_tgt, auto_config=True)
pt_config = pvt_tgt_runner._finalize_config(training=True)

#pvt_tgt_runner.train(num_devices=1, with_eval=True)

In [None]:
!zip -r pvt_tgt_model.zip pvt_tgt_model/
!cp pvt_tgt_model.zip '/content/gdrive/My Drive/capstone-models/'

In [9]:
!head -n 5000 src_tgt_data/src_tgt_val.tok > src_tgt.tok
!head -n 5000 src_tgt_data/tgt_src_val.tok > tgt_src.tok

In [19]:
def specific_detokenize(model_basename, tokenized_basename):
  model_path = os.path.join("sentencepiece_models", model_basename + ".model")
  vocabulary_path = os.path.join("sentencepiece_models", f"{model_basename}.vocab")
  detokenizer = SentencePieceTokenizer(model_path=model_path,
                                     vocabulary_path=vocabulary_path,)
  
  with open(f"{tokenized_basename}.tok") as f:
    with open(f"{tokenized_basename}.txt", mode="w") as fout:
      for line in f.readlines():
        fout.write(detokenizer.detokenize(line.strip().split(" ")) + "\n")

  return f"{tokenized_basename}.txt"

def specific_tokenize(input_file, basename):
  model_path = os.path.join("sentencepiece_models", f"{basename}.model")
  vocabulary_path = os.path.join("sentencepiece_models", f"{basename}.vocab")
  tokenizer = SentencePieceTokenizer(model_path=model_path,
                                     vocabulary_path=vocabulary_path,)
  
  with open(os.path.join(f"{input_file}")) as f:
    with open(os.path.join(f"{basename}.tok"), mode="w") as fout:
      for line in f.readlines():
        if line.strip():
          fout.write(" ".join(tokenizer.tokenize(line)[0]) + "\n")

In [26]:
#specific_tokenize("src_tgt_TRUTH.txt","src")
#src_pvt_runner.infer("src.tok", "pvt_src.tok")
#specific_detokenize("pvt_src", "pvt_src")

#specific_tokenize("pvt_src.txt","pvt_tgt")
#pvt_tgt_runner.infer("pvt_tgt.tok", "tgt.tok")
#specific_detokenize("tgt", "tgt")

'tgt.txt'

In [28]:
pred_filename = "tgt.txt"
truth_filename = "tgt_src_TRUTH.txt"

with open(pred_filename) as f:
  preds = f.readlines()

with open(truth_filename) as f:
  truth = f.readlines()

scores = dict()

bleu = sacrebleu.corpus_bleu(preds, [truth])
ter = sacrebleu.corpus_ter(preds, [truth])
scores.update({'bleu': bleu.score})
scores.update({'ter': ter.score})  

In [29]:
print(scores)

{'bleu': 21.133085989899232, 'ter': 0.7513867399244266}


In [None]:
!head -n 5000 pvt_tgt_data/pvt_tgt_val.tok > pvt_tgt.tok
!head -n 5000 pvt_tgt_data/tgt_val.tok > tgt.tok

scores = compute_scores(pvt_tgt_runner, "pvt_tgt.tok", "tgt.tok", "pred.tok")
print(f"============ Baseline Pivot-Target NMT Evaluation ============\n {scores}")

In [None]:
# Restore both models weights
src_pvt_model.initialize(data_config=sp_config['data'], params=sp_config['params'])
src_pvt_model.create_variables(optimizer=optimizer)

pvt_tgt_model.initialize(data_config=pt_config['data'], params=pt_config['params'])
pvt_tgt_model.create_variables(optimizer=optimizer)

checkpoint_path = sp_config['model_dir']
checkpoint = checkpoint_util.Checkpoint.from_config(sp_config, src_pvt_model, optimizer=optimizer)
checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True)

checkpoint_path = pt_config['model_dir']
checkpoint = checkpoint_util.Checkpoint.from_config(pt_config, pvt_tgt_model, optimizer=optimizer)
checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True)

count_weights(src_pvt_model)
count_weights(pvt_tgt_model)

In [None]:
# Transfer weights to src_tgt_model
src_tgt_model = opennmt.models.TransformerBase()
src_tgt_runner = opennmt.Runner(src_tgt_model, config_src_tgt, auto_config=True)
st_config = src_tgt_runner._finalize_config(training=True)

src_tgt_model.initialize(data_config=st_config['data'], params=st_config['params'])
src_tgt_model.create_variables(optimizer=optimizer)

src_tgt_model.encoder = src_pvt_model.encoder
src_tgt_model.decoder = pvt_tgt_model.decoder

new_checkpoint = checkpoint_util.Checkpoint.from_config(st_config, src_tgt_model, optimizer=optimizer)
new_checkpoint.save()

In [7]:
# Training source-target model (using pretrained models)
src_tgt_model = opennmt.models.TransformerBase()
src_tgt_runner = opennmt.Runner(src_tgt_model, config_src_tgt, auto_config=True)
st_config = src_tgt_runner._finalize_config(training=True)
#src_tgt_runner.train(num_devices=1, with_eval=True)

#!zip -r src_tgt_model.zip src_tgt_model/
#!cp src_tgt_model.zip '/content/gdrive/My Drive/capstone-models/'

INFO:tensorflow:Using OpenNMT-tf version 2.19.0
INFO:tensorflow:Using model:
(model): TransformerBase(
  (examples_inputter): SequenceToSequenceInputter(
    (features_inputter): WordEmbedder()
    (labels_inputter): WordEmbedder()
    (inputters): ListWrapper(
      (0): WordEmbedder()
      (1): WordEmbedder()
    )
  )
  (encoder): SelfAttentionEncoder(
    (position_encoder): SinusoidalPositionEncoder(
      (reducer): SumReducer()
    )
    (layer_norm): LayerNorm()
    (layers): ListWrapper(
      (0): SelfAttentionEncoderLayer(
        (self_attention): TransformerLayerWrapper(
          (layer): MultiHeadAttention(
            (linear_queries): Dense(512)
            (linear_keys): Dense(512)
            (linear_values): Dense(512)
            (linear_output): Dense(512)
          )
          (input_layer_norm): LayerNorm()
        )
        (ffn): TransformerLayerWrapper(
          (layer): FeedForwardNetwork(
            (inner): Dense(2048)
            (outer): Dense(512)
  

In [None]:
!zip -r src_tgt_model.zip src_tgt_model/
!cp src_tgt_model.zip '/content/gdrive/My Drive/capstone-models/'

In [None]:
!head -n 5000 src_tgt_data/src_tgt_val.tok > src_tgt.tok
!head -n 5000 src_tgt_data/tgt_src_val.tok > tgt_src.tok

scores = compute_scores(src_tgt_runner, "src_tgt.tok", "tgt_src.tok", "pred.tok", True, True)
print(f"============ Baseline Source-Target NMT Evaluation ============\n {scores}")

In [None]:
# Training source-target model (using no models)
baseline_model = opennmt.models.TransformerBase()
baseline_runner = opennmt.Runner(baseline_model, config_baseline, auto_config=True)

baseline_runner.train(num_devices=1, with_eval=True)

In [None]:
!zip -r baseline_model.zip baseline_model/
!cp baseline_model.zip '/content/gdrive/My Drive/capstone-models/'

In [None]:
!head -n 5000 src_tgt_data/src_tgt_val.tok > src_tgt.tok
!head -n 5000 src_tgt_data/tgt_src_val.tok > tgt_src.tok

scores = compute_scores(baseline_runner, "src_tgt.tok", "tgt_src.tok", "pred.tok", True, True)
print(f"============ Baseline Source-Target NMT Evaluation ============\n {scores}")

In [None]:
#!zip -r src_pvt_model.zip src_pvt_model/
#!zip -r pvt_tgt_model.zip pvt_tgt_model/
#!zip -r src_tgt_model.zip src_tgt_model/
#!zip -r baseline_model.zip baseline_model/

#!cp '/content/gdrive/My Drive/capstone-models/src_pvt_model.zip' .
#!cp pvt_tgt_model.zip '/content/gdrive/My Drive/capstone-models/'
#!cp src_tgt_model.zip '/content/gdrive/My Drive/capstone-models/'
#!cp baseline_model.zip '/content/gdrive/My Drive/capstone-models/'

In [None]:
!zip -r src_pvt_data.zip src_pvt_data
!zip -r pvt_tgt_data.zip pvt_tgt_data
!zip -r src_tgt_data.zip src_tgt_data

!cp src_pvt_data.zip '/content/gdrive/My Drive/capstone-data-final/'
!cp pvt_tgt_data.zip '/content/gdrive/My Drive/capstone-data-final/'
!cp src_tgt_data.zip '/content/gdrive/My Drive/capstone-data-final/'

In [None]:
# Compute scores
baseline_scores = compute_scores(
    runner=baseline_runner,
    features_filename="/content/src_tgt_data/src_tgt_test.txt",
    labels_filename="/content/src_tgt_data/tgt_src_test.txt",
    pred_filename="/content/baseline_pred.txt")

pivot_based_tl_scores = compute_scores(
    runner=src_tgt_runner,
    features_filename="/content/src_tgt_data/src_tgt_test.txt",
    labels_filename="/content/src_tgt_data/tgt_src_test.txt",
    pred_filename="/content/src_to_tgt_pred.txt")

print(f"============ Baseline Source-Target NMT Evaluation ============\n {baseline_scores}")
print(f"============ Pretrain Source-Target NMT Evaluation ============\n {pivot_based_tl_scores}")

In [None]:
!rm -rf src_pvt_model*
!rm -rf pvt_tgt_model*
!rm -rf src_tgt_model*
!rm -rf baseline_model*

# Old Stuff

In [None]:
!zip -r src_pvt_model.pt.zip src_pvt_model.pt/
!zip -r pvt_tgt_model.pt.zip pvt_tgt_model.pt/
!zip -r src_tgt_model.pt.zip src_tgt_model.pt/

from google.colab import files
files.download('src_pvt_model.pt.zip') 
files.download('pvt_tgt_model.pt.zip') 
files.download('src_tgt_model.pt.zip') 

In [None]:
# Export models
src_pvt_runner.export("src_pvt_saved")
pvt_tgt_runner.export("pvt_tgt_saved")
src_tgt_runner.export("src_tgt_saved")
baseline_runner.export("baseline_saved")