<a href="https://colab.research.google.com/github/ansonmiu0214/C490CW/blob/master/QEV_BiRNN_wholeDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Coursework: Quality Estimation Vectors


In [1]:
# Imports
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import sklearn
import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

device = torch.device('cuda' if torch.cuda.is_available() else 'gpu')
print(f'DEVICE={device}')
  torch.manual_seed(0)
  torch.cuda.manual_seed(0)
  np.random.seed(0)
# PyTorch version
print(torch.__version__)

# Disable warnings :)
import warnings
warnings.filterwarnings('ignore')

IndentationError: ignored

In [5]:
# Google Drive authorisation
from google.colab import drive
drive.mount('/content/gdrive')

def in_gdrive(path):
    return f'/content/gdrive/My Drive/Colab Notebooks/{path}'

# !ls /content/gdrive/My\ Drive

KeyboardInterrupt: ignored

## Importing Data

In [0]:
import os

if not os.path.exists('enzh_data.zip'):
    !wget -O enzh_data.zip https://competitions.codalab.org/my/datasets/download/03e23bd7-8084-4542-997b-6a1ca6dd8a5f
    !unzip enzh_data.zip

TRAIN_EN = 'train.enzh.src'
TRAIN_ZH = 'train.enzh.mt'
TRAIN_SCORES = 'train.enzh.scores'
VAL_EN = 'dev.enzh.src'
VAL_ZH = 'dev.enzh.mt'
VAL_SCORES = 'dev.enzh.scores'
TEST_EN = 'test.enzh.src'
TEST_ZH = 'test.enzh.mt'

## Preprocessing

### English

1. Tokenise with spaCy language model
2. Remove stop words and punctuation
3. Normalise - lemmas

In [6]:
# Downloading spacy models for English

!spacy download en_core_web_md
!spacy link en_core_web_md en300 --force

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_md')
[38;5;2m✔ Linking successful[0m
/usr/local/lib/python3.6/dist-packages/en_core_web_md -->
/usr/local/lib/python3.6/dist-packages/spacy/data/en300
You can now load the model via spacy.load('en300')


In [7]:
# Downloading stop words for English

from nltk import download
from nltk.corpus import stopwords

download('stopwords')
stop_words_en = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [0]:
# Get tokenizer

import spacy

nlp_en = spacy.load('en300')

In [0]:
def preprocess_en(sentence=None, *, keep_stopwords=False):
    def wrapper(sentence):
        text = sentence.lower()
        processed = [token.lemma_ for token in nlp_en.tokenizer(text)]
        processed = [token for token in processed if token.isalpha()]
        if not keep_stopwords:
            processed = [token for token in processed if token not in stop_words_en]
        return processed

    return wrapper if sentence is None else wrapper(sentence)

In [10]:
def outlier_remove(en_data, zh_data, scores):
    

SyntaxError: ignored

### Chinese

1. Tokenise with jieba
2. Remove stop words and punctuation

In [0]:
# Download stop words
FILE_STOP_WORDS_ZH = './chinese_stop_words.txt'

if not os.path.exists(FILE_STOP_WORDS_ZH):
    !wget -c https://github.com/Tony607/Chinese_sentiment_analysis/blob/master/data/chinese_stop_words.txt

with open(FILE_STOP_WORDS_ZH, 'r', encoding='utf-8') as f:
    stop_words_zh = [line.rstrip() for line in f]

In [0]:
import jieba

def preprocess_zh(sentence=None, *, keep_stopwords=False):
    def wrapper(sentence):
        tokens = jieba.cut(sentence, cut_all=True)
        processed = [token for token in tokens if token.isalnum()]
        if not keep_stopwords:
            processed = [token for token in processed if token not in stop_words_zh]
        return processed

    return wrapper if sentence is None else wrapper(sentence)

## Language Vocabulary

In [0]:
class Language(object):

    SOS_TOKEN = '<SOS>'
    EOS_TOKEN = '<EOS>'
    UNK_TOKEN = '<UNK>'

    def __init__(self, name):
        self.name = name
        self.word2idx = {}
        self.word2count = {}
        self.idx2word = {0: self.SOS_TOKEN,
                         1: self.EOS_TOKEN,
                         2: self.UNK_TOKEN}
    
    def __len__(self):
        return len(self.idx2word)

    def add_sentence(self, sentence):
        for token in sentence:
            self.add_word(token)

    def add_word(self, word):
        if word not in self.word2idx:
            idx = len(self)
            self.word2idx[word] = idx
            self.idx2word[idx] = word
        
        count = self.word2count.get(word, 0)
        self.word2count[word] = count + 1

    def sent_to_idxs(self, sent):
        return [self.word2idx.get(word, 2) for word in sent]
    
    def __repr__(self):
        return f'Language(name={self.name}) with {len(self)} words'

## Loading Data

In [0]:
# Read from file

with open(TRAIN_EN) as f:
    train_en = f.readlines()
with open(TRAIN_ZH) as f:
    train_zh = f.readlines()
with open(TRAIN_SCORES) as f:
    train_scores = f.readlines()
with open(VAL_EN) as f:
    val_en = f.readlines()
with open(VAL_ZH) as f:
    val_zh = f.readlines()
with open(VAL_SCORES) as f:
    val_scores = f.readlines()
with open(TEST_EN) as f:
    test_en = f.readlines()
with open(TEST_ZH) as f:
    test_zh = f.readlines()

In [16]:
# English data

preprocess_english = preprocess_en(keep_stopwords=False)

train_en_sents = [preprocess_english(sent) for sent in train_en]
val_en_sents = [preprocess_english(sent) for sent in val_en]
test_en_sents = [preprocess_english(sent) for sent in test_en]

EN = Language('EN')
for sent in train_en_sents:
    EN.add_sentence(sent)

print(EN)

print()
print('Sample sentence')
sample_sent_en = train_en_sents[42]
print(sample_sent_en)
print(EN.sent_to_idxs(sample_sent_en))

train_en_idxs = [EN.sent_to_idxs(sent) for sent in train_en_sents]
val_en_idxs = [EN.sent_to_idxs(sent) for sent in val_en_sents]
test_en_idxs = [EN.sent_to_idxs(sent) for sent in test_en_sents]

Language(name=EN) with 19142 words

Sample sentence
['artilleryman', 'record', 'wound', 'die']
[293, 294, 295, 296]


In [17]:
# Chinese data

preprocess_chinese = preprocess_zh(keep_stopwords=False)

train_zh_sents = [preprocess_chinese(sent) for sent in train_zh]
val_zh_sents = [preprocess_chinese(sent) for sent in val_zh]
test_zh_sents = [preprocess_chinese(sent) for sent in test_zh]

ZH = Language('ZH')
for sent in train_zh_sents:
    ZH.add_sentence(sent)

print(ZH)

print()
print('Sample sentence')
sample_sent_zh = train_zh_sents[42]
print(sample_sent_zh)
print(ZH.sent_to_idxs(sample_sent_zh))

train_zh_idxs = [ZH.sent_to_idxs(sent) for sent in train_zh_sents]
val_zh_idxs = [ZH.sent_to_idxs(sent) for sent in val_zh_sents]
test_zh_idxs = [ZH.sent_to_idxs(sent) for sent in test_zh_sents]

Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.610 seconds.
Prefix dict has been built successfully.


Language(name=ZH) with 23851 words

Sample sentence
['据', '记录', '所有', '6', '名', '炮兵', '都', '受伤', '了']
[483, 484, 485, 267, 486, 487, 488, 489, 17]


In [0]:
# Process scores

def prepare_score(score):
    return float(score)

train_scores = [prepare_score(score) for score in train_scores]
val_scores = [prepare_score(score) for score in val_scores]

In [0]:
# Datasets

train_en_tensors = [torch.LongTensor(sent_idxs) for sent_idxs in train_en_idxs]
train_zh_tensors = [torch.LongTensor(sent_idxs) for sent_idxs in train_zh_idxs]

train_pairs = list(zip(train_en_tensors, train_zh_tensors))
train_set = list(zip(train_pairs, train_scores))

val_en_tensors = [torch.LongTensor(sent_idxs) for sent_idxs in val_en_idxs]
val_zh_tensors = [torch.LongTensor(sent_idxs) for sent_idxs in val_zh_idxs]

val_pairs = list(zip(val_en_tensors, val_zh_tensors))
val_set = list(zip(val_pairs, val_scores))

# val_pairs = list(zip(val_en_idxs, val_zh_idxs))
# test_pairs = list(zip(test_en_idxs, test_zh_idxs))

## Models

In [0]:
# Utilities

from scipy.stats.stats import pearsonr

def unzip(args):
    return zip(*args)

def RMSELoss(pred, target):
    return torch.sqrt(torch.mean((pred - target) ** 2))

### Bi-Direction NN

In [0]:
def get_context_vec(hidden_outputs, zh_emb):
    def activation(zh_emb, hidden):
        return zh_emb.squeeze().dot(hidden.squeeze())

    e_s = torch.Tensor([
        activation(zh_emb, hid)
        for hid in hidden_outputs
    ])

    a_s = F.softmax(e_s)

    ctx = torch.zeros(1, 100, device=device)
    for a, h in zip(a_s, hidden_outputs):
        ctx += a * h
    return ctx

In [0]:
from collections import deque

class BiRNN(nn.Module):
    def __init__(self, *, vocab_size, emb_dim=100, num_layers=1):
        super().__init__()
        self.vocab_size = vocab_size
        self.emb_dim = emb_dim
        self.num_layers = num_layers

        self.embedding = nn.Embedding(self.vocab_size, self.emb_dim)

        self.forward_gru = torch.nn.GRU(
            input_size=self.emb_dim, hidden_size=self.emb_dim, num_layers=self.num_layers,batch_first=False, bidirectional=True)
        self.backward_gru = torch.nn.GRU(
            input_size=self.emb_dim, hidden_size=self.emb_dim, num_layers=self.num_layers, batch_first=False, bidirectional=False)
        
        self.backward_gru.weight_ih_l0 = self.forward_gru.weight_ih_l0_reverse
        self.backward_gru.weight_hh_l0 = self.forward_gru.weight_hh_l0_reverse
        self.backward_gru.bias_ih_l0 = self.forward_gru.bias_ih_l0_reverse
        self.backward_gru.bias_hh_l0 = self.forward_gru.bias_hh_l0_reverse

    def forward(self, tensor):
        """Return hidden states of backward RNN."""
        sent_len = len(tensor)
        emb = self.embedding(tensor).view(sent_len, 1, -1)

        # self.forward_gru.flatten_parameters()
        bi_output, bi_hidden = self.forward_gru(emb)
        rev_emb = emb[np.arange(sent_len - 1, -1, -1), :, :]

        # self.backward_gru.flatten_parameters()
        rev_output, rev_hidden = self.backward_gru(rev_emb)
        return rev_output

class BiRNNAttention(nn.Module):
    def __init__(self, *, vocab_size, emb_dim=100, num_layers=1):
        super().__init__()
        self.vocab_size = vocab_size
        self.emb_dim = emb_dim
        self.num_layers = num_layers

        self.embedding = nn.Embedding(self.vocab_size, self.emb_dim)

        self.forward_gru = torch.nn.GRU(
            input_size=self.emb_dim, hidden_size=self.emb_dim, num_layers=self.num_layers,batch_first=False, bidirectional=True)
        self.backward_gru = torch.nn.GRU(
            input_size=self.emb_dim, hidden_size=self.emb_dim, num_layers=self.num_layers, batch_first=False, bidirectional=False)
        
        self.backward_gru.weight_ih_l0 = self.forward_gru.weight_ih_l0_reverse
        self.backward_gru.weight_hh_l0 = self.forward_gru.weight_hh_l0_reverse
        self.backward_gru.bias_ih_l0 = self.forward_gru.bias_ih_l0_reverse
        self.backward_gru.bias_hh_l0 = self.forward_gru.bias_hh_l0_reverse

    def forward(self, tensor, prev_hiddens):
        """Return hidden states of backward RNN."""
        sent_len = len(tensor)

        embs = self.embedding(tensor).view(sent_len, 1, -1)

        ctxs = torch.stack([get_context_vec(prev_hiddens, zh_emb)
                            for zh_emb in embs])

        # self.forward_gru.flatten_parameters()
        bi_output, bi_hidden = self.forward_gru(ctxs)

        rev_ctxs = ctxs[np.arange(sent_len - 1, -1, -1), :, :]

        # self.backward_gru.flatten_parameters()
        rev_output, rev_hidden = self.backward_gru(rev_ctxs)
        return rev_output

class QualVecRNN(nn.Module):
    def __init__(self, *, emb_dim=100, num_layers=1):
        super().__init__()
        self.emb_dim = emb_dim
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size=self.emb_dim, hidden_size=1, num_layers=self.num_layers)
    
    def forward(self, tensor):
        # self.lstm.flatten_parameters()
        output, hidden = self.lstm(tensor)
        return hidden[0]

In [0]:
def train(num_epochs):
    for eidx in range(num_epochs):
        print(f'Epoch {eidx + 1}')

        for comp in components:
            comp.zero_grad()

        loss = 0


        print(f'Training: {len(train_set)}', end=' ')
        for idx, ((en_tensor, zh_tensor), score) in enumerate(train_set):
            # Forward pass to get 
            en_hiddens = en_rnn(en_tensor.to(device))
            qual_vecs = zh_rnn(zh_tensor.to(device), en_hiddens)  
            pred_score = regressor(qual_vecs).squeeze()

            loss += loss_fn(pred_score, score)
            if idx % 500 == 0:
                print('.', end='')
        
        print()
        loss /= len(train_set)
        print(f'Loss={loss}')

        val_pred_scores = []

        val_loss = 0
        for idx, ((en_tensor, zh_tensor), score) in enumerate(val_set):
            en_hiddens = en_rnn(en_tensor.to(device))
            qual_vecs = zh_rnn(zh_tensor.to(device), en_hiddens)  
            pred_score = regressor(qual_vecs).squeeze()
            val_pred_scores.append(pred_score.data.cpu().numpy())

            val_loss += loss_fn(pred_score, score)
            if idx % 500 == 0:
                print('.', end='')

        val_loss /= len(val_set)
        print(f'validation loss = {val_loss:.5f}\t', end='')

        pearson_score, _ = pearsonr(np.array(val_pred_scores), np.array(val_scores))
        print(f'validation pearson = {pearson_score:.5f}\t')

        # Backpropagation
        loss.backward()

        for opt in opts:
            opt.step()


In [24]:
en_rnn = BiRNN(vocab_size=len(EN))
en_rnn.to(device)

zh_rnn = BiRNNAttention(vocab_size=len(ZH))
zh_rnn.to(device)

regressor = QualVecRNN()
regressor.to(device)

LR = 0.003

en_opt = torch.optim.Adam(en_rnn.parameters(), lr=LR)
zh_opt = torch.optim.Adam(zh_rnn.parameters(), lr=LR)
regr_opt = torch.optim.Adam(regressor.parameters(), lr=LR)

loss_fn = RMSELoss

print(en_rnn)
print(zh_rnn)
print(regressor)

components = (en_rnn, zh_rnn, regressor)
opts = (en_opt, zh_opt, regr_opt)


BiRNN(
  (embedding): Embedding(19142, 100)
  (forward_gru): GRU(100, 100, bidirectional=True)
  (backward_gru): GRU(100, 100)
)
BiRNNAttention(
  (embedding): Embedding(23851, 100)
  (forward_gru): GRU(100, 100, bidirectional=True)
  (backward_gru): GRU(100, 100)
)
QualVecRNN(
  (lstm): LSTM(100, 1)
)


In [0]:
train(40) #max noticed: 0.14/0.16

Epoch 1
Training: 7000 ..............
Loss=0.8162736892700195
..validation loss = 0.78711	validation pearson = -0.04118	
Epoch 2
Training: 7000 ..............
Loss=0.7002050876617432
..validation loss = 0.69896	validation pearson = 0.01286	
Epoch 3
Training: 7000 ..............
Loss=0.7010735273361206
..validation loss = 0.70184	validation pearson = 0.07286	
Epoch 4
Training: 7000 ..............
Loss=0.6951229572296143
..validation loss = 0.69867	validation pearson = 0.09685	
Epoch 5
Training: 7000 ..............
Loss=0.6840256452560425
..validation loss = 0.69313	validation pearson = 0.09686	
Epoch 6
Training: 7000 ..............
Loss=0.6857578158378601
..validation loss = 0.69874	validation pearson = 0.08600	
Epoch 7
Training: 7000 ..............
Loss=0.6785259246826172
..validation loss = 0.69680	validation pearson = 0.09386	
Epoch 8
Training: 7000 ..............
Loss=0.6694183945655823
..validation loss = 0.69169	validation pearson = 0.10776	
Epoch 9
Training: 7000 .

### FFNN with trained embeddings

In [0]:
class FFNN(nn.Module):
    
    def __init__(self, *, en_vocab_size, zh_vocab_size, emb_dim):
        super().__init__()
        self.en_vocab_size = en_vocab_size
        self.zh_vocab_size = zh_vocab_size
        self.emb_dim = emb_dim

        self.en_embedding = nn.Embedding(self.en_vocab_size, self.emb_dim)
        self.zh_embedding = nn.Embedding(self.zh_vocab_size, self.emb_dim)

        self.en_hidden = nn.Linear(self.emb_dim, 1)
        self.zh_hidden = nn.Linear(self.emb_dim, 1)

        self.out = nn.Linear(2, 1)
    
    def forward(self, en_tensors, zh_tensors):
        en_emb = self.en_embedding(en_tensors)
        zh_emb = self.zh_embedding(zh_tensors)

        en_hid = F.relu(self.en_hidden(en_emb))
        zh_hid = F.relu(self.zh_hidden(en_emb))

        hid_concat = torch.stack((en_hid, zh_hid), axis=1).squeeze()
        score = self.out(hid_concat)
        return score.mean()

In [39]:
ffnn = FFNN(en_vocab_size=len(EN), zh_vocab_size=len(ZH), emb_dim=200)
ffnn.to(device)
print(ffnn)

ffnn_opt = torch.optim.Adam(ffnn.parameters(), lr=0.003)
loss_fn = RMSELoss

FFNN(
  (en_embedding): Embedding(19142, 200)
  (zh_embedding): Embedding(23851, 200)
  (en_hidden): Linear(in_features=200, out_features=1, bias=True)
  (zh_hidden): Linear(in_features=200, out_features=1, bias=True)
  (out): Linear(in_features=2, out_features=1, bias=True)
)


In [40]:
NUM_EPOCHS = 20 #0.06 best around E30

train_losses = []
val_losses = []
val_pearson = []

for eidx in range(NUM_EPOCHS):
    print(f'Epoch {eidx + 1}: \t', end=' ')
    ffnn.zero_grad()
    
    loss = 0
    for (en_tensor, zh_tensor), score in train_set:
        pred = ffnn(en_tensor.to(device), zh_tensor.to(device))
        loss += loss_fn(pred, score)

    loss /= len(train_set)
    train_losses.append(loss)
    
    print(f'train loss = {loss:.5f}\t', end='')

    # Validation loss
    val_loss = 0
    for (en_tensor, zh_tensor), score in val_set:
        pred = ffnn(en_tensor.to(device), zh_tensor.to(device))
        val_loss += loss_fn(pred, score)
    val_loss /= len(val_set)
    print(f'validation loss = {val_loss:.5f}\t', end='')
    val_losses.append(val_loss)

    # Validation score
    val_preds, val_targets = unzip([(ffnn(en_tensor.to(device), zh_tensor.to(device)).detach().cpu().numpy(), score)
                              for (en_tensor, zh_tensor), score in val_set])
    
    val_preds = np.array(val_preds)
    val_targets = np.array(val_targets)

    pearson_score, _ = pearsonr(val_preds, val_targets)
    val_pearson.append(pearson_score)
    print(f'validation pearson = {pearson_score:.5f}\t')

    # Backpropagation
    loss.backward()
    ffnn_opt.step()

Epoch 1: 	 train loss = 0.78949	validation loss = 0.79252	validation pearson = 0.06212	
Epoch 2: 	 train loss = 0.77863	validation loss = 0.78406	validation pearson = 0.06094	
Epoch 3: 	 train loss = 0.76857	validation loss = 0.77620	validation pearson = 0.05827	
Epoch 4: 	 train loss = 0.75928	validation loss = 0.76868	validation pearson = 0.05525	
Epoch 5: 	 train loss = 0.75061	validation loss = 0.76155	validation pearson = 0.05286	
Epoch 6: 	 train loss = 0.74243	validation loss = 0.75489	validation pearson = 0.05114	
Epoch 7: 	 train loss = 0.73475	validation loss = 0.74872	validation pearson = 0.04932	
Epoch 8: 	 train loss = 0.72747	validation loss = 0.74284	validation pearson = 0.04736	
Epoch 9: 	 train loss = 0.72060	validation loss = 0.73736	validation pearson = 0.04458	
Epoch 10: 	 train loss = 0.71403	validation loss = 0.73226	validation pearson = 0.04218	
Epoch 11: 	 train loss = 0.70770	validation loss = 0.72748	validation pearson = 0.04059	
Epoch 12: 	 train loss = 0.701

### RNN Chain

In [0]:
class RNNChain(nn.Module):

    def __init__(self, *, en_vocab_size, zh_vocab_size, emb_dim):
        super().__init__()
        self.en_vocab_size = en_vocab_size
        self.zh_vocab_size = zh_vocab_size
        self.emb_dim = emb_dim

        self.en_embedding = nn.Embedding(self.en_vocab_size, self.emb_dim)
        self.zh_embedding = nn.Embedding(self.zh_vocab_size, self.emb_dim)

        self.en_rnn = nn.GRU(self.emb_dim, self.emb_dim, bidirectional=True)
        self.zh_rnn = nn.GRU(self.emb_dim, self.emb_dim, bidirectional=True)

        self.hidden = nn.Linear(self.emb_dim, 50)
        self.out = nn.Linear(50, 1)

    def forward(self, en_tensor, zh_tensor):
        en_emb = self.en_embedding(en_tensor)
        zh_emb = self.zh_embedding(zh_tensor)

        en_hidden = torch.zeros(2, 1, self.emb_dim, device=device)

        for word_idx in en_emb:
            word_idx = word_idx.view(1, 1, -1)
            _, en_hidden = self.en_rnn(word_idx, en_hidden)
    
        zh_hidden = en_hidden
        for word_idx in zh_emb:
            word_idx = word_idx.view(1, 1, -1)
            _, zh_hidden = self.zh_rnn(word_idx, zh_hidden)

        score = self.out(F.relu(self.hidden(zh_hidden[-1])))
        return score

In [43]:
USE_PREV = True

rnn = RNNChain(en_vocab_size=len(EN), zh_vocab_size=len(ZH), emb_dim=100)
rnn.to(device)

rnn_opt = torch.optim.Adam(rnn.parameters(), lr=0.003)
loss_fn = RMSELoss

NUM_EPOCHS = 100

state = {
    'curr_epoch': 1,
    'train_losses': [],
    'val_losses': [],
    'val_pearson': [],
}

if USE_PREV and os.path.exists(in_gdrive('rnn.pt')):
    print('Loading from Google Drive...', end=' ')
    rnn.load_state_dict(torch.load(in_gdrive('rnn.pt')))

    with open(in_gdrive('rnn.json'), 'r') as f:
        state = json.load(f)
    print('done!')


while state['curr_epoch'] <= NUM_EPOCHS:
    print(f'Epoch {state["curr_epoch"]}:')
    rnn.zero_grad()
    
    loss = 0
    print(f'Training {len(train_set)}: ', end='')
    for idx, ((en_tensor, zh_tensor), score) in enumerate(train_set):
        pred = rnn(en_tensor.to(device), zh_tensor.to(device)).squeeze()
        curr_loss = loss_fn(pred, score) 
        loss += curr_loss
        if idx % 500 == 0:
            print('.', end='')
    print()

    loss /= len(train_set)
    state['train_losses'].append(loss.detach().cpu().numpy().tolist())
    
    print(f'==>train loss = {loss:.5f}')

    # Validation loss
    val_loss = 0
    print(f'Validating loss {len(val_set)}: ', end='')
    for idx, ((en_tensor, zh_tensor), score) in enumerate(val_set):
        pred = rnn(en_tensor.to(device), zh_tensor.to(device))
        val_loss += loss_fn(pred, score)

        if idx % 100 == 0:
            print('.', end='')
    print()    

    val_loss /= len(val_set)
    print(f'==>validation loss = {val_loss:.5f}')
    state['val_losses'].append(val_loss.detach().cpu().numpy().tolist())

    # Validation score
    val_preds, val_targets = unzip([(rnn(en_tensor.to(device), zh_tensor.to(device)).squeeze().detach().cpu().numpy(), score)
                              for (en_tensor, zh_tensor), score in val_set])
    val_preds = np.array(val_preds)
    val_targets = np.array(val_targets)

    pearson_score, _ = pearsonr(val_preds, val_targets)
    state['val_pearson'].append(pearson_score)
    print(f'==>validation pearson = {pearson_score:.5f}')

    # Backpropagation
    print('Backpropagation...', end=' ')
    loss.backward()
    rnn_opt.step()
    print('done!')

    # Save
    print('Saving to Google Drive...', end=' ')
    torch.save(rnn.state_dict(), in_gdrive('rnn.pt'))

    state['curr_epoch'] += 1
    with open(in_gdrive('rnn.json'), 'w') as f:
        json.dump(state, f)

    print('done!\n')
    

Epoch 1:
Training 7000: ..............
==>train loss = 0.69984
Validating loss 1000: ..........
==>validation loss = 0.69330
==>validation pearson = 0.04330
Backpropagation... done!
Saving to Google Drive... 

FileNotFoundError: ignored

In [0]:
class RNNChain(nn.Module):

    def __init__(self, *, vocab_size, emb_dim):
        super().__init__()
        self.vocab_size = vocab_size
        self.emb_dim = emb_dim

        self.embedding = nn.Embedding(self.vocab_size, self.emb_dim)
        self.rnn = nn.GRU(self.emb_dim, self.emb_dim, bidirectional=True)

    def forward(self, x, hidden):
        emb = self.embedding(x)
        output = emb.view(1, 1, -1)
        output, hidden = self.rnn(output, hidden)

        return output, hidden

    def init_hidden(self):
        return torch.zeros(2, 1, self.emb_dim, device=device)

class RegressorLayer(nn.Module):

    def __init__(self, *, emb_dim):
        super().__init__()
        self.emb_dim = emb_dim

        self.hidden = nn.Linear(self.emb_dim, 50)
        self.out = nn.Linear(50, 1)

    def forward(self, x):
        return self.out(F.relu(self.hidden(x)))

In [0]:
en_model = RNNChain(vocab_size=len(EN), emb_dim=200)
zh_model = RNNChain(vocab_size=len(ZH), emb_dim=200)
regressor = RegressorLayer(emb_dim=200)

en_model.to(device)
zh_model.to(device)
regressor.to(device)

#print(en_model)
#print(zh_model)

LR = 0.003

en_opt = torch.optim.Adam(en_model.parameters(), lr=LR)
zh_opt = torch.optim.Adam(zh_model.parameters(), lr=LR)
regressor_opt = torch.optim.Adam(regressor.parameters(), lr=LR)

def RMSELoss(pred, target):
    return torch.sqrt(torch.mean((pred - target) ** 2))

loss_fn = RMSELoss

In [0]:
def train(en_tensor, zh_tensor, score):
    en_model.zero_grad()
    zh_model.zero_grad()

    en_hidden = en_model.init_hidden()

    for word_idx in en_tensor:
        hids, en_hidden = en_model(word_idx, en_hidden)
        #print('Hids', hids.shape)
    
    # print('EN final hidden state', en_hidden)

    zh_hidden = en_hidden
    for word_idx in zh_tensor:
        _, zh_hidden = zh_model(word_idx, zh_hidden)
    
    # print('ZH final hidden state', zh_hidden)

    pred_score = regressor(zh_hidden).squeeze()

    loss = loss_fn(pred_score, score)
    
    # print('Loss', loss)    

    loss.backward()

    regressor_opt.step()
    zh_opt.step()
    en_opt.step()

    return loss.data

In [52]:

for eidx in range(100):
    loss = 0
    for (en_tensor, zh_tensor), score in train_set[:100]:
        loss += train(en_tensor.to(device), zh_tensor.to(device), score)
    loss /= 100
    print(loss)

tensor(0.8550, device='cuda:0')
tensor(2.4649, device='cuda:0')
tensor(0.9812, device='cuda:0')
tensor(6.9700, device='cuda:0')
tensor(1.0203, device='cuda:0')
tensor(1.1656, device='cuda:0')
tensor(1.9127, device='cuda:0')
tensor(0.8538, device='cuda:0')
tensor(0.7714, device='cuda:0')
tensor(0.8021, device='cuda:0')
tensor(0.7792, device='cuda:0')
tensor(0.7273, device='cuda:0')
tensor(0.8187, device='cuda:0')


KeyboardInterrupt: ignored