### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import re
import time
import random
import pickle
import json
from pprint import pprint
from typing import List
from array import array
import preprocessor as p
from itertools import cycle
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.ensemble import GradientBoostingClassifier
from functions import *
#from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

import tensorflow as tf
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchmetrics import F1Score
from torch.autograd import Function
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
from keras_preprocessing.sequence import pad_sequences

from transformers import BertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig, BertPreTrainedModel, BertModel
from transformers import get_linear_schedule_with_warmup

import nltk
import warnings

nltk.download('wordnet')
warnings.filterwarnings('ignore')

from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler

# seed_val = 42
# torch.manual_seed(42)
# random.seed(seed_val)
# np.random.seed(seed_val)

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/bharathia/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [47]:
torch.__version__

'1.13.0.dev20220922'

In [2]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    np.random.seed(seed)  # Numpy module.
    random.seed(seed)  # Python random module.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms(True)
    
seed_everything(12)

In [None]:
# https://github.com/pytorch/pytorch/issues/84288
# https://github.com/pytorch/pytorch/issues/7068
# https://github.com/facebookresearch/maskrcnn-benchmark/issues/376
# https://discuss.pytorch.org/t/nondeterminism-even-when-setting-all-seeds-0-workers-and-cudnn-deterministic/26080

In [17]:
torch.manual_seed(999)
print (torch.randn(3, device='mps'))
torch.manual_seed(999)
print (torch.randn(3, device='mps'))

tensor([-0.3380, -0.2398,  0.3864], device='mps:0')
tensor([-0.3380, -0.2398,  0.3864], device='mps:0')


### Loading the combined english training data

In [3]:
english_data = pd.read_csv('processed_datasets/combined_english_tweets.csv')
english_training_data = english_data[['processed_tweet', 'stance']]
english_train_data, english_test_data = train_test_split(english_training_data, 0.1)
english_train_data = english_train_data.dropna().reset_index(drop=True)
english_test_data = english_test_data.dropna().reset_index(drop=True)
english_train_data = english_train_data.sample(frac=1, random_state=42)

In [24]:
english_train_data['stance'].value_counts()

positive    2031
negative    1044
neutral      967
Name: stance, dtype: int64

### Loading translation augmented data

In [3]:
english_training_data = pd.read_csv('processed_datasets/backtranslated_train_data.csv')
english_train_data, english_test_data = train_test_split(english_training_data, 0.1)
english_train_data = english_train_data.dropna().reset_index(drop=True)
english_test_data = english_test_data.dropna().reset_index(drop=True)
english_train_data = english_train_data.sample(frac=1, random_state=42)

In [16]:
english_train_data.head()

Unnamed: 0.1,Unnamed: 0,processed_tweet,stance
1071,1181,This article by the great gets right into who ...,negative
7820,8717,LUTTE CONTRE LA TYRANNY!!! Le vaccin contre le...,negative
13614,15131,I ricercatori con l'Università del Texas e Pfi...,positive
291,320,Relieved to report that mum has just had her s...,positive
9292,10356,Ich hatte gerade die erste Dosis meines Pfizer...,positive


In [50]:
english_train_data.head()

Unnamed: 0,processed_tweet,stance
4003,"Under the leadership of , we have developed a ...",positive
149,Current(ish; new data always pouring in) effic...,positive
2025,Just think before you refuse to have the new C...,positive
2505,Champagne and questions greet first data showi...,neutral
3203,Canada made $1 billion upfront payments to Pfi...,neutral


In [5]:
english_train_data['stance'].value_counts()

positive    8208
negative    4078
neutral     3884
Name: stance, dtype: int64

### Data resampling - Oversampling minority classes

In [4]:
target_variable = 'stance'
minority_classes = ['negative', 'neutral']

majority_df = english_train_data[english_train_data[target_variable].isin(minority_classes)==False]
minority_df = english_train_data[english_train_data[target_variable].isin(minority_classes)]

le = LabelEncoder()
minority_df[target_variable] = le.fit_transform(minority_df[target_variable])
#print(minority_df.head())

oversampler = RandomOverSampler(sampling_strategy = {0:2031, 1:2031}, random_state = 42)
X_resampled, y_resampled = oversampler.fit_resample(minority_df.drop(target_variable, axis=1), minority_df[target_variable])
y_resampled = le.inverse_transform(y_resampled)

print(y_resampled)

resampled_df = pd.DataFrame({'processed_tweet':X_resampled['processed_tweet'], 'stance': y_resampled})
# X_resampled_df = pd.DataFrame(X_resampled, columns=minority_df.drop(target_variable, axis=1).columns)
# y_resampled_df = pd.DataFrame(y_resampled, columns=[target_variable])
english_train_data = pd.concat([majority_df, resampled_df])

# Print value counts of target variable to check if upsampling worked
print(english_train_data[target_variable].value_counts())


['neutral' 'neutral' 'negative' ... 'neutral' 'neutral' 'neutral']
positive    2031
neutral     2031
negative    2031
Name: stance, dtype: int64


In [5]:
english_train_data.head()

Unnamed: 0,processed_tweet,stance
4003,"Under the leadership of , we have developed a ...",positive
149,Current(ish; new data always pouring in) effic...,positive
2025,Just think before you refuse to have the new C...,positive
952,Just had my vaccine this morning and it was As...,positive
1041,"Unlike most drugs, vaccines are not static. Wi...",positive


In [28]:
english_train_data[english_train_data['stance'].isna()]

Unnamed: 0,processed_tweet,stance


In [9]:
english_train_balanced = english_train_data.groupby('stance')
english_train_balanced = english_train_balanced.apply(lambda x: x.sample(english_train_balanced.size().min()).reset_index(drop=True))

In [6]:
french_translations = pickle.load(open('processed_datasets/french_translations.pkl', 'rb'))
german_translations = pickle.load(open('processed_datasets/german_translations.pkl', 'rb'))
italian_translations = pickle.load(open('processed_datasets/italian_translations.pkl', 'rb'))

### Loading the unlabeled data for adversarial training

In [15]:
french_pro_ids = []
french_anti_ids = []
french_nlbl_ids = []

german_pro_ids = []
german_anti_ids = []
german_nlbl_ids = []

italian_pro_ids = []
italian_anti_ids = []
italian_nlbl_ids = []



with open('datasets/VaccinEU/french/french_anti_ids.txt', 'r') as file:
    lines = [line.strip() for line in file]
    french_anti_ids = lines[0:1500]
    
with open('datasets/VaccinEU/french/french_pro_ids.txt', 'r') as file:
    lines = [line.strip() for line in file]
    french_pro_ids = lines[0:1500]
    
with open('datasets/VaccinEU/french/french_ids.txt', 'r') as file:
    lines = [line.strip() for line in file]
    french_nlbl_ids = random.sample(lines, 4000)

french_ids = french_pro_ids + french_anti_ids + french_nlbl_ids

with open('datasets/VaccinEU/german/german_anti_ids.txt', 'r') as file:
    lines = [line.strip() for line in file]
    german_anti_ids = lines[0:1000]
    
with open('datasets/VaccinEU/german/german_pro_ids.txt', 'r') as file:
    lines = [line.strip() for line in file]
    german_pro_ids = lines[0:1000]
    
with open('datasets/VaccinEU/german/german_ids.txt', 'r') as file:
    lines = [line.strip() for line in file]
    german_nlbl_ids = random.sample(lines, 4000)

german_ids = german_pro_ids + german_anti_ids + german_nlbl_ids

with open('datasets/VaccinEU/italian/italian_anti_ids.txt', 'r') as file:
    lines = [line.strip() for line in file]
    italian_anti_ids = lines[0:1000]
    
with open('datasets/VaccinEU/italian/italian_pro_ids.txt', 'r') as file:
    lines = [line.strip() for line in file]
    italian_pro_ids = lines[0:1000]
    
with open('datasets/VaccinEU/italian/italian_ids.txt', 'r') as file:
    lines = [line.strip() for line in file]
    italian_nlbl_ids = random.sample(lines, 4000)

italian_ids = italian_pro_ids + italian_anti_ids + italian_nlbl_ids

df_french = pd.DataFrame(french_ids)
df_french.to_csv('processed_datasets/vaccinEU/french_adv_ids.csv', header=False, index=None)

df_german = pd.DataFrame(german_ids)
df_german.to_csv('processed_datasets/vaccinEU/german_adv_ids.csv', header=False, index=None)

df_italian = pd.DataFrame(italian_ids)
df_italian.to_csv('processed_datasets/vaccinEU/italian_adv_ids.csv', header=False, index=None)

In [4]:
french_adv_tweets = pd.read_csv('processed_datasets/vaccinEU/french_adv_ids_hydrated.csv')['text']
german_adv_tweets = pd.read_csv('processed_datasets/vaccinEU/german_adv_ids_hydrated.csv')['text']
italian_adv_tweets = pd.read_csv('processed_datasets/vaccinEU/italian_adv_ids_hydrated.csv')['text']

In [5]:
french_adv_tweets = french_adv_tweets.apply(process_tweet).tolist()
german_adv_tweets = german_adv_tweets.apply(process_tweet).tolist()
italian_adv_tweets = italian_adv_tweets.apply(process_tweet).tolist()

### Data Preparation

In [4]:
device_name = tf.test.gpu_device_name()
if device_name == '/device:GPU:0':
    print(f'Found GPU at: {device_name}')
    
if torch.has_mps:    
    device = torch.device("mps")
else:
    print('using the CPU')
    device = torch.device("cpu")

Metal device set to: Apple M1
Found GPU at: /device:GPU:0


2023-03-27 12:20:01.212948: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-27 12:20:01.213489: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [7]:
device = torch.device("cpu")
print(device)

cpu


In [8]:
MAX_LEN = 128 # max sequences length
batch_size = 32

In [9]:
def preprocessing_translated_data(translated_data: List[str]) -> List[int]:
    sentences = np.array(translated_data)

    tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased', do_lower_case=True)
    
    encoded_sentences = []
    for sent in sentences:
        encoded_sent = tokenizer.encode(
                            sent,
                            add_special_tokens = True,
                            truncation=True,
                            max_length = MAX_LEN
                    )
        
        encoded_sentences.append(encoded_sent)
    encoded_sentences = pad_sequences(encoded_sentences, maxlen=MAX_LEN, dtype="long", 
                            value=0, truncating="post", padding="post")
    
    return encoded_sentences

def _init_fn():
     np.random.seed(42)

In [10]:
train_encoded_sentences, train_labels = preprocessing(english_train_data)
train_attention_masks = attention_masks(train_encoded_sentences)

test_encoded_sentences, test_labels = preprocessing(english_test_data)
test_attention_masks = attention_masks(test_encoded_sentences)

train_inputs = torch.tensor(train_encoded_sentences)
train_labels = torch.tensor(train_labels)
train_masks = torch.tensor(train_attention_masks)

validation_inputs = torch.tensor(test_encoded_sentences)
validation_labels = torch.tensor(test_labels)
validation_masks = torch.tensor(test_attention_masks)

# data loader for training
train_data = TensorDataset(train_inputs, train_masks, train_labels)
#train_sampler = SequentialSampler(train_data)
train_dataloader = DataLoader(train_data, batch_size=batch_size)

# data loader for validation
validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
#validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(
    validation_data, batch_size=len(validation_data))


In [11]:
# data loader for translated text
translated_data = italian_adv_tweets
train_translated_sentences = preprocessing_translated_data(translated_data)
train_translated_attention_masks = attention_masks(train_translated_sentences)

train_translated_inputs = torch.tensor(train_translated_sentences)
train_translated_masks = torch.tensor(train_translated_attention_masks)

train_translated_data = TensorDataset(train_translated_inputs, train_translated_masks)
#train_translated_sampler = SequentialSampler(train_translated_data)
train_translated_dataloader = DataLoader(
    train_translated_data, batch_size=batch_size)

In [14]:
len(italian_adv_tweets)

5131

In [12]:
vaccin_eu_french = pd.read_csv('processed_datasets/VaccinEU/french_tweets.csv')
vaccin_eu_german = pd.read_csv('processed_datasets/VaccinEU/german_tweets.csv')
vaccin_eu_italian = pd.read_csv('processed_datasets/VaccinEU/italian_tweets.csv')

french_encoded_sentences, french_labels = preprocessing(vaccin_eu_french)
french_attention_masks = attention_masks(french_encoded_sentences)

french_inputs = torch.tensor(french_encoded_sentences)
french_labels = torch.tensor(french_labels)
french_masks = torch.tensor(french_attention_masks)

french_data = TensorDataset(french_inputs, french_masks, french_labels)
french_sampler = SequentialSampler(french_data)
french_dataloader = DataLoader(french_data, sampler=french_sampler, batch_size=len(french_data))


german_encoded_sentences, german_labels = preprocessing(vaccin_eu_german)
german_attention_masks = attention_masks(german_encoded_sentences)

german_inputs = torch.tensor(german_encoded_sentences)
german_labels = torch.tensor(german_labels)
german_masks = torch.tensor(german_attention_masks)

german_data = TensorDataset(german_inputs, german_masks, german_labels)
german_sampler = SequentialSampler(german_data)
german_dataloader = DataLoader(german_data, sampler=german_sampler, batch_size=len(german_data))


italian_encoded_sentences, italian_labels = preprocessing(vaccin_eu_italian)
italian_attention_masks = attention_masks(italian_encoded_sentences)

italian_inputs = torch.tensor(italian_encoded_sentences)
italian_labels = torch.tensor(italian_labels)
italian_masks = torch.tensor(italian_attention_masks)

italian_data = TensorDataset(italian_inputs, italian_masks, italian_labels)
italian_sampler = SequentialSampler(italian_data)
italian_dataloader = DataLoader(italian_data, sampler=italian_sampler, batch_size=len(italian_data))

https://www.kaggle.com/code/ludovicocuoghi/detecting-bullying-tweets-pytorch-lstm-bert#PyTorch-LSTM-modeling

### Model definition

In [13]:
num_labels = 3
hidden_size = 768
intermediate_size = 800

class BertEncoder(nn.Module):
    def __init__(self):
        super(BertEncoder, self).__init__()
        self.encoder = BertModel.from_pretrained('bert-base-multilingual-uncased')

    def forward(self, x, mask=None):
        outputs = self.encoder(x, attention_mask=mask)
        feat = outputs[0][:, 0, :]
#         feat = outputs[0]
        return feat
    
class LSTMClassifier(nn.Module):
    def __init__(self, lstm_hidden_size, num_classes, dropout=0.1):
        super(LSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size=768, hidden_size=lstm_hidden_size, dropout = dropout, batch_first=True)
        self.dropout = nn.Dropout(p=dropout)
        self.fc = nn.Linear(lstm_hidden_size, num_classes)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])
        return x
    
    
class BiLSTMAttentionClassifier(nn.Module):
    def __init__(self, lstm_hidden_size, num_classes, dropout=0.1):
        super(BiLSTMAttentionClassifier, self).__init__()
        self.lstm_hidden_size = lstm_hidden_size
        self.num_classes = num_classes

        self.bilstm = nn.LSTM(input_size=768, hidden_size=lstm_hidden_size, bidirectional=True, batch_first=True)
        self.dropout = nn.Dropout(p=dropout)
        self.fc = nn.Linear(2*lstm_hidden_size, num_classes)

        self.attention = nn.Linear(2*lstm_hidden_size, 1)

    def forward(self, x):
        lstm_output, _ = self.bilstm(x)

        # Apply attention mechanism
        attention_scores = self.attention(lstm_output).squeeze(-1)
        attention_weights = torch.softmax(attention_scores, dim=1).unsqueeze(-1)
        attention_output = torch.sum(lstm_output * attention_weights, dim=1)

        # Apply dropout and pass through the fully connected layer
        output = self.dropout(attention_output)
        logits = self.fc(output)

        return logits
    
    
class BertClassifier(nn.Module):
    def __init__(self, dropout=0.1):
        super(BertClassifier, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.classifier = nn.Linear(hidden_size, num_labels)

    def forward(self, x):
        x = self.dropout(x)
        out = self.classifier(x)
        return out

    def init_bert_weights(self, module):
        """ Initialize the weights.
        """
        if isinstance(module, (nn.Linear, nn.Embedding)):
            module.weight.data.normal_(mean=0.0, std=0.02)
        if isinstance(module, nn.Linear) and module.bias is not None:
            module.bias.data.zero_()
            
class Discriminator(nn.Module):
    """Discriminator model for source language"""

    def __init__(self):
        super(Discriminator, self).__init__()
        self.layer = nn.Sequential(
            nn.Linear(hidden_size, intermediate_size),
            nn.LeakyReLU(),
            nn.Linear(intermediate_size, intermediate_size),
            nn.LeakyReLU(),
            nn.Linear(intermediate_size, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        """Forward propagation"""
        out = self.layer(x)
        return out

In [14]:
alpha = 1.0
beta = 1.0
batch_size = 32
pre_epochs = 3
adapt_epochs = 2
pre_log_step = 5
adapt_log_step = 5
clip_value = 0.01
c_learning_rate = 5e-5
d_learning_rate = 1e-5
temperature = 20
lambd = 0.9
contrastive_temp = 0.3

src_encoder = BertEncoder()
src_classifier = BertClassifier()
discriminator = Discriminator()


src_encoder = src_encoder.to(device)
src_classifier = src_classifier.to(device)
lstm_classifier = LSTMClassifier(lstm_hidden_size=128, num_classes=3)
bi_lstm_classifier = BiLSTMAttentionClassifier(lstm_hidden_size = 128, num_classes=3)
discriminator = discriminator.to(device)
gbc = GradientBoostingClassifier()

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### Training

In [15]:
def compute_accuracy(preds, labels):
    p = np.argmax(preds, axis=1).flatten()
    l = labels.flatten()
    #p = preds.flatten()
#     print("True labels:", l)
#     print("Predictions:", p)
    return np.sum(p==l)/len(l)

def compute_f1(preds, labels):
    p = np.argmax(preds, axis=1).tolist()
    #p = preds.tolist()
    l = labels.tolist()
    f1_macro = f1_score(l, p, average='macro')
    f1_per_class = f1_score(l, p, average=None)
    return f1_macro, f1_per_class

def compute_precision_recall(preds, labels):
    p = np.argmax(preds, axis=1).tolist()
    #p = preds.tolist()
    l = labels.tolist()
    precision_per_class = precision_score(l, p, average=None)
    recall_per_class = recall_score(l, p, average=None)
    return precision_per_class, recall_per_class

In [16]:
def check_errors(encoder, classifier, data_loader):
    encoder.eval()
    classifier.eval()

    # init loss and accuracy
    loss = 0
    acc = 0

    # set loss function
    #criterion = nn.CrossEntropyLoss(weight = torch.FloatTensor([0.85,0.75,0.5]).to(device))
    criterion = nn.CrossEntropyLoss()

    # evaluate network
    for (inputs, mask, labels) in data_loader:
        inputs = inputs.to(device)
        mask = mask.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            feat = encoder(inputs, mask)
            preds = classifier(feat)
        loss += criterion(preds, labels).item()
        preds = preds.detach().cpu().numpy()
        labels = labels.to('cpu').numpy()
        
    preds = np.argmax(preds, axis=1).flatten()
    preds = preds.tolist()
    labels = labels.flatten()
    labels = labels.tolist()
    
    incorrect_indices = [i for i in range(len(labels)) if labels[i] != preds[i]]
    
    incorrect_preds = [preds[i] for i in incorrect_indices]
    
    correct_labels = [labels[i] for i in incorrect_indices]
    
    return incorrect_indices, correct_labels, incorrect_preds
    

In [16]:
def evaluate_validation(encoder, classifier, data_loader):
    """Evaluation for target language encoder by source classifier on target language dataset"""
    encoder.eval()
    classifier.eval()


    loss = 0
    acc = 0

    # set loss function
    #criterion = nn.CrossEntropyLoss(weight = torch.FloatTensor([0.85,0.75,0.5]).to(device))
    criterion = nn.CrossEntropyLoss()

    # evaluate network
    for (inputs, mask, labels) in data_loader:
        inputs = inputs.to(device)
        mask = mask.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            feat = encoder(inputs, mask)
            preds = classifier(feat)
#             feat = feat.detach().cpu().numpy()
        #preds = gbc_classifier.predict_proba(feat)
        loss += criterion(preds, labels).item()
        preds = preds.detach().cpu().numpy()
        #preds = gbc_classifier.predict(preds)
        labels = labels.to('cpu').numpy()
        
        batch_acc = compute_accuracy(preds, labels)
        batch_f1_macro, batch_f1_per_class = compute_f1(preds, labels)
        batch_precision, batch_recall = compute_precision_recall(preds, labels)

#         pred_cls = preds.data.max(1)[1]
#         acc += pred_cls.eq(labels.data).cpu().sum().item()
    print("Validation loss: ", loss)
    print(f"Accuracy: {batch_acc}")
    print(f"F1 score (Macro): {batch_f1_macro}")
    print(f"F1 score (Per class): {batch_f1_per_class}")
    print(f"Precision score (Per class): {batch_precision}")
    print(f"Recall score (Per class): {batch_recall}")


In [17]:
def compute_contrastive_loss(temp, embedding, label):
    """calculate the contrastive loss
    """
    # cosine similarity between embeddings
    cosine_sim = cosine_similarity(embedding, embedding)
    # remove diagonal elements from matrix
    dis = cosine_sim[~np.eye(cosine_sim.shape[0], dtype=bool)].reshape(cosine_sim.shape[0], -1)
    # apply temprature to elements
    dis = dis / temp
    cosine_sim = cosine_sim / temp
    # apply exp to elements
    dis = np.exp(dis)
    cosine_sim = np.exp(cosine_sim)

    # calculate row sum
    row_sum = []
    for i in range(len(embedding)):
        row_sum.append(sum(dis[i]))
    # calculate outer sum
    contrastive_loss = 0
    for i in range(len(embedding)):
        n_i = label.tolist().count(label[i]) - 1
        inner_sum = 0
        # calculate inner sum
        for j in range(len(embedding)):
            if label[i] == label[j] and i != j:
                inner_sum = inner_sum + np.log(cosine_sim[i][j] / row_sum[i])
        if n_i != 0:
            contrastive_loss += (inner_sum / (-n_i))
        else:
            contrastive_loss += 0
    return contrastive_loss


In [20]:
pre_epochs = 10

In [18]:
def evaluate_test(src_enc, src_class):
    tgt_enc = BertEncoder().to(device)
    tgt_enc.load_state_dict(src_enc.state_dict())
    max_grad_norm = 1.0
    
    print("French Test: \n")
    evaluate_validation(tgt_enc, src_class, french_dataloader)
    print("German Test:")
    evaluate_validation(tgt_enc, src_class, german_dataloader)
    print("Italian Test: \n")
    evaluate_validation(tgt_enc, src_class, italian_dataloader)
    

In [19]:
def pretrain(data_loader):
    """Train classifier for source language."""
    
    global src_encoder
#     global bi_lstm_classifier
    global src_classifier

    # setup criterion and optimizer
#     optimizer = torch.optim.AdamW(list(src_encoder.parameters()) + list(bi_lstm_classifier.parameters()),
#                            lr=c_learning_rate)
#     optimizer = torch.optim.AdamW(list(encoder.parameters()),lr=c_learning_rate)
    optimizer = torch.optim.AdamW(list(src_encoder.parameters()) + list(src_classifier.parameters()),
                              lr=c_learning_rate)
    class_weights = [1.39,1.29,0.66]
    class_weights = torch.FloatTensor(class_weights).to(device)
    CELoss = nn.CrossEntropyLoss(weight = class_weights)
#     CELoss = nn.CrossEntropyLoss()
    
    # set train state for Dropout and BN layers
    src_encoder.train()
#     bi_lstm_classifier.train()
    src_classifier.train()

    for epoch in range(pre_epochs):
        print(f"Epoch: {epoch}/{pre_epochs}")
        for step, (inputs, mask, labels) in enumerate(data_loader):
            inputs = inputs.to(device)
            mask = mask.to(device)
            labels = labels.to(device)

            # zero gradients for optimizer
            optimizer.zero_grad()

            # compute loss for discriminator
            feat = src_encoder(inputs, mask)
            preds = src_classifier(feat)
            CE_loss = CELoss(preds, labels)
#             contrastive_loss = compute_contrastive_loss(contrastive_temp, 
#                                                         feat.cpu().detach().numpy(), labels)
#             cls_loss = (lambd * contrastive_loss) + (1-lambd)*(CE_loss)
            cls_loss = CE_loss
            

            # optimize source classifier
            cls_loss.backward()
            optimizer.step()

            # print step info
            if (step) % pre_log_step == 0:
                print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: cls_loss=%.4f"
                      % (epoch,
                         pre_epochs,
                         step,
                         len(data_loader),
                         cls_loss.item()))
        print(f'At the end of Epoch: {epoch}')
            
        
        evaluate_validation(src_encoder, src_classifier, validation_dataloader)
        evaluate_test(src_encoder, src_classifier)
        torch.save(src_encoder.state_dict(), f"model/pretrain/src_encoder_12_11_bt_class_weights_epoch_{epoch}.pth")
        torch.save(src_classifier.state_dict(), f"model/pretrain/src_classifier_12_11_bt_class_weights_epoch_{epoch}.pth")

    return src_encoder, src_classifier

In [None]:
# Train CE only model only for 7 epochs

In [25]:
src_encoder, src_classifier = pretrain(train_dataloader)

Epoch: 0/10
Epoch [00/10] Step [000/506]: cls_loss=1.1000
Epoch [00/10] Step [005/506]: cls_loss=1.1221
Epoch [00/10] Step [010/506]: cls_loss=1.1192
Epoch [00/10] Step [015/506]: cls_loss=1.0528
Epoch [00/10] Step [020/506]: cls_loss=1.0721
Epoch [00/10] Step [025/506]: cls_loss=1.0746
Epoch [00/10] Step [030/506]: cls_loss=1.0581
Epoch [00/10] Step [035/506]: cls_loss=0.9930
Epoch [00/10] Step [040/506]: cls_loss=0.9892
Epoch [00/10] Step [045/506]: cls_loss=0.7411
Epoch [00/10] Step [050/506]: cls_loss=1.2604
Epoch [00/10] Step [055/506]: cls_loss=0.7803
Epoch [00/10] Step [060/506]: cls_loss=0.9303
Epoch [00/10] Step [065/506]: cls_loss=0.6414
Epoch [00/10] Step [070/506]: cls_loss=0.7542
Epoch [00/10] Step [075/506]: cls_loss=1.0886
Epoch [00/10] Step [080/506]: cls_loss=1.0352
Epoch [00/10] Step [085/506]: cls_loss=0.5419
Epoch [00/10] Step [090/506]: cls_loss=0.9129
Epoch [00/10] Step [095/506]: cls_loss=0.8157
Epoch [00/10] Step [100/506]: cls_loss=0.6255
Epoch [00/10] Step [10

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


French Test: 

Validation loss:  0.9932263493537903
Accuracy: 0.5594237695078031
F1 score (Macro): 0.4954965034965035
F1 score (Per class): [0.32867133 0.476      0.68181818]
Precision score (Per class): [0.31125828 0.53846154 0.65075922]
Recall score (Per class): [0.34814815 0.4265233  0.71599045]
German Test:
Validation loss:  0.8138483166694641
Accuracy: 0.6347087378640777
F1 score (Macro): 0.5428684621296759
F1 score (Per class): [0.34814815 0.52770449 0.75275275]
Precision score (Per class): [0.29012346 0.47619048 0.83185841]
Recall score (Per class): [0.43518519 0.59171598 0.68738574]
Italian Test: 

Validation loss:  1.12908136844635
Accuracy: 0.46045503791982667
F1 score (Macro): 0.44014364657914856
F1 score (Per class): [0.34173669 0.49859155 0.4801027 ]
Precision score (Per class): [0.2961165  0.70238095 0.40215054]
Recall score (Per class): [0.40397351 0.38646288 0.5955414 ]
Epoch: 1/10
Epoch [01/10] Step [000/506]: cls_loss=0.6154
Epoch [01/10] Step [005/506]: cls_loss=0.60

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


French Test: 

Validation loss:  1.4250339269638062
Accuracy: 0.5414165666266506
F1 score (Macro): 0.4892522921876184
F1 score (Per class): [0.3375     0.46987952 0.66037736]
Precision score (Per class): [0.29189189 0.53424658 0.65268065]
Recall score (Per class): [0.4        0.41935484 0.66825776]
German Test:
Validation loss:  1.1408612728118896
Accuracy: 0.5764563106796117
F1 score (Macro): 0.5068439074549844
F1 score (Per class): [0.34615385 0.4822335  0.69214437]
Precision score (Per class): [0.26470588 0.42222222 0.82531646]
Recall score (Per class): [0.5        0.56213018 0.59597806]
Italian Test: 

Validation loss:  1.5772686004638672
Accuracy: 0.46912242686890576
F1 score (Macro): 0.4519779113938438
F1 score (Per class): [0.36461126 0.49860724 0.49271523]
Precision score (Per class): [0.30630631 0.68846154 0.42176871]
Recall score (Per class): [0.45033113 0.39082969 0.59235669]
Epoch: 2/10
Epoch [02/10] Step [000/506]: cls_loss=0.3034
Epoch [02/10] Step [005/506]: cls_loss=0.3

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


French Test: 

Validation loss:  1.6949741840362549
Accuracy: 0.5150060024009604
F1 score (Macro): 0.47015485637882115
F1 score (Per class): [0.2829582  0.52881356 0.59869281]
Precision score (Per class): [0.25       0.50160772 0.66184971]
Recall score (Per class): [0.32592593 0.55913978 0.54653938]
German Test:
Validation loss:  1.2251203060150146
Accuracy: 0.5691747572815534
F1 score (Macro): 0.5021985918868412
F1 score (Per class): [0.35664336 0.47482014 0.67513228]
Precision score (Per class): [0.28651685 0.39919355 0.80150754]
Recall score (Per class): [0.47222222 0.58579882 0.58318099]
Italian Test: 

Validation loss:  1.7107231616973877
Accuracy: 0.5102925243770314
F1 score (Macro): 0.47691657592180836
F1 score (Per class): [0.37790698 0.61751152 0.43533123]
Precision score (Per class): [0.33678756 0.65365854 0.43125   ]
Recall score (Per class): [0.43046358 0.58515284 0.43949045]
Epoch: 3/10
Epoch [03/10] Step [000/506]: cls_loss=0.1806
Epoch [03/10] Step [005/506]: cls_loss=0.

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


French Test: 

Validation loss:  1.7718008756637573
Accuracy: 0.5366146458583433
F1 score (Macro): 0.4821961622791135
F1 score (Per class): [0.30872483 0.49710983 0.64075383]
Precision score (Per class): [0.28220859 0.5375     0.63255814]
Recall score (Per class): [0.34074074 0.46236559 0.64916468]
German Test:
Validation loss:  1.2511718273162842
Accuracy: 0.6104368932038835
F1 score (Macro): 0.5161901151742578
F1 score (Per class): [0.31944444 0.4957265  0.73339941]
Precision score (Per class): [0.25555556 0.47802198 0.8008658 ]
Recall score (Per class): [0.42592593 0.5147929  0.67641682]
Italian Test: 

Validation loss:  2.0755879878997803
Accuracy: 0.48537378114842905
F1 score (Macro): 0.4548150075702762
F1 score (Per class): [0.33116883 0.52400549 0.5092707 ]
Precision score (Per class): [0.32484076 0.70479705 0.41616162]
Recall score (Per class): [0.33774834 0.41703057 0.65605096]
Epoch: 4/10
Epoch [04/10] Step [000/506]: cls_loss=0.1482
Epoch [04/10] Step [005/506]: cls_loss=0.0

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


French Test: 

Validation loss:  1.8419431447982788
Accuracy: 0.5294117647058824
F1 score (Macro): 0.4959863088299669
F1 score (Per class): [0.33742331 0.56230032 0.58823529]
Precision score (Per class): [0.28795812 0.50720461 0.71186441]
Recall score (Per class): [0.40740741 0.63082437 0.50119332]
German Test:
Validation loss:  1.4090386629104614
Accuracy: 0.5934466019417476
F1 score (Macro): 0.5104546800975859
F1 score (Per class): [0.33201581 0.49187935 0.70746888]
Precision score (Per class): [0.28965517 0.40458015 0.8177458 ]
Recall score (Per class): [0.38888889 0.62721893 0.62340037]
Italian Test: 

Validation loss:  1.7900627851486206
Accuracy: 0.5222101841820151
F1 score (Macro): 0.4761883359197682
F1 score (Per class): [0.34650456 0.64088398 0.44117647]
Precision score (Per class): [0.32022472 0.64876957 0.45302013]
Recall score (Per class): [0.37748344 0.63318777 0.42993631]
Epoch: 5/10
Epoch [05/10] Step [000/506]: cls_loss=0.1399
Epoch [05/10] Step [005/506]: cls_loss=0.07

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


French Test: 

Validation loss:  2.495729446411133
Accuracy: 0.48619447779111646
F1 score (Macro): 0.4566990796878401
F1 score (Per class): [0.3125     0.57746479 0.48013245]
Precision score (Per class): [0.25345622 0.47563805 0.78378378]
Recall score (Per class): [0.40740741 0.73476703 0.34606205]
German Test:
Validation loss:  2.2633652687072754
Accuracy: 0.5024271844660194
F1 score (Macro): 0.46075182949387755
F1 score (Per class): [0.3081761  0.48627451 0.58780488]
Precision score (Per class): [0.23333333 0.36363636 0.88278388]
Recall score (Per class): [0.4537037  0.73372781 0.44058501]
Italian Test: 

Validation loss:  2.3215184211730957
Accuracy: 0.5037919826652221
F1 score (Macro): 0.4611306805642205
F1 score (Per class): [0.40198511 0.63577586 0.34563107]
Precision score (Per class): [0.32142857 0.62765957 0.44278607]
Recall score (Per class): [0.53642384 0.6441048  0.28343949]
Epoch: 6/10
Epoch [06/10] Step [000/506]: cls_loss=0.0223
Epoch [06/10] Step [005/506]: cls_loss=0.0

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


French Test: 

Validation loss:  3.070358991622925
Accuracy: 0.4837935174069628
F1 score (Macro): 0.47532232108350975
F1 score (Per class): [0.35341365 0.51072125 0.56183206]
Precision score (Per class): [0.24242424 0.55982906 0.77966102]
Recall score (Per class): [0.65185185 0.46953405 0.43914081]
German Test:
Validation loss:  2.619374990463257
Accuracy: 0.508495145631068
F1 score (Macro): 0.47449455606514723
F1 score (Per class): [0.31910112 0.48863636 0.61574618]
Precision score (Per class): [0.21068249 0.46994536 0.86184211]
Recall score (Per class): [0.65740741 0.50887574 0.47897623]
Italian Test: 

Validation loss:  3.4660730361938477
Accuracy: 0.419284940411701
F1 score (Macro): 0.41581883691343874
F1 score (Per class): [0.38596491 0.46906475 0.39242685]
Precision score (Per class): [0.26252983 0.68776371 0.42696629]
Recall score (Per class): [0.72847682 0.3558952  0.36305732]
Epoch: 7/10
Epoch [07/10] Step [000/506]: cls_loss=0.0052
Epoch [07/10] Step [005/506]: cls_loss=0.053

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


French Test: 

Validation loss:  2.591560125350952
Accuracy: 0.4957983193277311
F1 score (Macro): 0.46678525539646687
F1 score (Per class): [0.32957111 0.45217391 0.61861075]
Precision score (Per class): [0.23701299 0.57458564 0.68604651]
Recall score (Per class): [0.54074074 0.37275986 0.56324582]
German Test:
Validation loss:  1.94314444065094
Accuracy: 0.5728155339805825
F1 score (Macro): 0.49224644830863484
F1 score (Per class): [0.33160622 0.43598616 0.70914697]
Precision score (Per class): [0.23021583 0.525      0.80985915]
Recall score (Per class): [0.59259259 0.37278107 0.63071298]
Italian Test: 

Validation loss:  2.9077720642089844
Accuracy: 0.4442036836403034
F1 score (Macro): 0.4362126058078477
F1 score (Per class): [0.39112903 0.52835408 0.3891547 ]
Precision score (Per class): [0.28115942 0.72075472 0.38977636]
Recall score (Per class): [0.64238411 0.41703057 0.38853503]
Epoch: 8/10
Epoch [08/10] Step [000/506]: cls_loss=0.0309
Epoch [08/10] Step [005/506]: cls_loss=0.161

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


French Test: 

Validation loss:  2.586707830429077
Accuracy: 0.5090036014405762
F1 score (Macro): 0.48977902713854443
F1 score (Per class): [0.35714286 0.5271028  0.58509142]
Precision score (Per class): [0.26315789 0.55078125 0.71232877]
Recall score (Per class): [0.55555556 0.50537634 0.49642005]
German Test:
Validation loss:  1.897445559501648
Accuracy: 0.5825242718446602
F1 score (Macro): 0.4880255861926262
F1 score (Per class): [0.31944444 0.42818428 0.71644803]
Precision score (Per class): [0.25555556 0.395      0.79954955]
Recall score (Per class): [0.42592593 0.46745562 0.64899452]
Italian Test: 

Validation loss:  3.004633665084839
Accuracy: 0.4452871072589382
F1 score (Macro): 0.431499701848539
F1 score (Per class): [0.375      0.53488372 0.38461538]
Precision score (Per class): [0.28282828 0.65506329 0.38709677]
Recall score (Per class): [0.55629139 0.45196507 0.38216561]
Epoch: 9/10
Epoch [09/10] Step [000/506]: cls_loss=0.0205
Epoch [09/10] Step [005/506]: cls_loss=0.0297


Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


French Test: 

Validation loss:  2.974646806716919
Accuracy: 0.4417767106842737
F1 score (Macro): 0.43821037528627027
F1 score (Per class): [0.33399602 0.51492537 0.46570973]
Precision score (Per class): [0.22826087 0.53696498 0.70192308]
Recall score (Per class): [0.62222222 0.49462366 0.34844869]
German Test:
Validation loss:  2.328171968460083
Accuracy: 0.5315533980582524
F1 score (Macro): 0.49159179737695347
F1 score (Per class): [0.3254717  0.50857143 0.64073227]
Precision score (Per class): [0.21835443 0.49171271 0.85626911]
Recall score (Per class): [0.63888889 0.52662722 0.511883  ]
Italian Test: 

Validation loss:  2.801257848739624
Accuracy: 0.447453954496208
F1 score (Macro): 0.4306486992835978
F1 score (Per class): [0.37924152 0.55683269 0.35587189]
Precision score (Per class): [0.27142857 0.67076923 0.40322581]
Recall score (Per class): [0.62913907 0.47598253 0.31847134]


In [22]:
src_encoder, bi_lstm_classifier = pretrain(train_dataloader)

Epoch: 0/10
Epoch [00/10] Step [000/127]: cls_loss=1.0818
Epoch [00/10] Step [005/127]: cls_loss=1.0061
Epoch [00/10] Step [010/127]: cls_loss=0.8443
Epoch [00/10] Step [015/127]: cls_loss=1.0951
Epoch [00/10] Step [020/127]: cls_loss=0.9052
Epoch [00/10] Step [025/127]: cls_loss=0.7875
Epoch [00/10] Step [030/127]: cls_loss=0.8519
Epoch [00/10] Step [035/127]: cls_loss=0.9050
Epoch [00/10] Step [040/127]: cls_loss=0.8261
Epoch [00/10] Step [045/127]: cls_loss=0.9965
Epoch [00/10] Step [050/127]: cls_loss=0.9563
Epoch [00/10] Step [055/127]: cls_loss=0.6649
Epoch [00/10] Step [060/127]: cls_loss=0.8563
Epoch [00/10] Step [065/127]: cls_loss=0.8909
Epoch [00/10] Step [070/127]: cls_loss=0.6137
Epoch [00/10] Step [075/127]: cls_loss=0.8152
Epoch [00/10] Step [080/127]: cls_loss=0.5606
Epoch [00/10] Step [085/127]: cls_loss=0.6402
Epoch [00/10] Step [090/127]: cls_loss=0.7340
Epoch [00/10] Step [095/127]: cls_loss=0.7308
Epoch [00/10] Step [100/127]: cls_loss=0.5439
Epoch [00/10] Step [10

Epoch [05/10] Step [070/127]: cls_loss=0.0204
Epoch [05/10] Step [075/127]: cls_loss=0.0360
Epoch [05/10] Step [080/127]: cls_loss=0.1259
Epoch [05/10] Step [085/127]: cls_loss=0.0680
Epoch [05/10] Step [090/127]: cls_loss=0.5151
Epoch [05/10] Step [095/127]: cls_loss=0.0298
Epoch [05/10] Step [100/127]: cls_loss=0.0233
Epoch [05/10] Step [105/127]: cls_loss=0.1870
Epoch [05/10] Step [110/127]: cls_loss=0.0199
Epoch [05/10] Step [115/127]: cls_loss=0.1852
Epoch [05/10] Step [120/127]: cls_loss=0.0169
Epoch [05/10] Step [125/127]: cls_loss=0.0328
At the end of Epoch: 5
Validation loss:  0.8925796747207642
Accuracy: 0.7572383073496659
F1 score (Macro): 0.7182298040012317
F1 score (Per class): [0.71698113 0.60638298 0.8313253 ]
Precision score (Per class): [0.66086957 0.7125     0.81496063]
Recall score (Per class): [0.78350515 0.52777778 0.84836066]
Epoch: 6/10
Epoch [06/10] Step [000/127]: cls_loss=0.0363
Epoch [06/10] Step [005/127]: cls_loss=0.0284
Epoch [06/10] Step [010/127]: cls_lo

In [23]:
torch.save(src_encoder.state_dict(), 'model/src_encoder_12_11_bilstm.pth')
torch.save(bi_lstm_classifier.state_dict(), 'model/bi_lstm_classifier_12_11_bilstm_attention.pth')

In [23]:
torch.save(src_encoder.state_dict(), 'model/src_encoder_12_11_class_weights.pth')
torch.save(src_classifier.state_dict(), 'model/src_classifier_12_11_class_weights.pth')

In [22]:
torch.save(src_encoder.state_dict(), 'model/src_encoder_class_weights_oversampling.pth')
torch.save(src_classifier.state_dict(), 'model/src_classifier_class_weights_oversampling.pth')

In [18]:
torch.save(src_encoder.state_dict(), 'model/src_encoder_12_12_4.pth')
torch.save(src_classifier.state_dict(), 'model/src_classifier_12_12_4.pth')

In [32]:
torch.save(src_encoder.state_dict(), 'model/src_encoder.pth')
torch.save(src_classifier.state_dict(), 'model/src_classifier.pth')

In [20]:
src_encoder.load_state_dict(torch.load("model/pretrain/src_encoder_12_11_bt_class_weights_epoch_0.pth"))
src_classifier.load_state_dict(torch.load("model/pretrain/src_classifier_12_11_bt_class_weights_epoch_0.pth"))

<All keys matched successfully>

In [21]:
tgt_encoder = BertEncoder().to(device)
tgt_encoder.load_state_dict(src_encoder.state_dict())
max_grad_norm = 1.0

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [15]:
#src_classifier_11
for name, param in src_classifier.state_dict().items():
    if name == 'classifier.weight':
        print(param)

tensor([[ 0.0251,  0.0127,  0.0212,  ..., -0.0266, -0.0276,  0.0049],
        [-0.0043,  0.0267,  0.0187,  ...,  0.0135,  0.0216, -0.0220],
        [-0.0293, -0.0226,  0.0256,  ...,  0.0311, -0.0212, -0.0123]])


In [15]:
#src_classifier_11_2
for name, param in src_classifier.state_dict().items():
    if name == 'classifier.weight':
        print(param)

tensor([[ 0.0251,  0.0127,  0.0212,  ..., -0.0266, -0.0276,  0.0049],
        [-0.0043,  0.0267,  0.0187,  ...,  0.0135,  0.0216, -0.0220],
        [-0.0293, -0.0226,  0.0256,  ...,  0.0311, -0.0212, -0.0123]])


In [30]:
french_iindices, french_clabels, french_ipreds = check_errors(tgt_encoder, src_classifier, french_dataloader)

In [55]:
french_incorrect_examples = vaccin_eu_french.iloc[french_iindices]
french_incorrect_examples.reset_index(inplace=True)

labels_inv_encoding = {
    0: "negative",
    1: "neutral",
    2: "positive"
}

french_ipreds = pd.Series(french_ipreds)
french_clables = pd.Series(french_clabels)
french_incorrect_examples['preds'] = french_ipreds
french_incorrect_examples['labels'] = french_clabels
french_incorrect_examples['preds'] = french_incorrect_examples['preds'].map(labels_inv_encoding)
french_incorrect_examples['labels'] = french_incorrect_examples['labels'].map(labels_inv_encoding)
french_incorrect_examples.tail()

Unnamed: 0.1,index,Unnamed: 0,tweet,stance,processed_tweet,preds,labels
376,819,984,"Donc à Nice, on ""joue"" avec le #COVID19 en ess...",neutral,"Donc Nice, on ""joue"" avec le COVID19 en essaya...",positive,neutral
377,824,990,"Mauricette, veux-tu m'épouser ?\n- T'es vaccin...",positive,"Mauricette, veux-tu m'pouser ? - T'es vaccin ?...",negative,positive
378,825,991,Le seul pays qui se foire sur les vaccinations...,positive,Le seul pays qui se foire sur les vaccinations...,negative,positive
379,827,994,N'hésitez pas à vous faire vacciner : https://...,positive,N'hsitez pas vous faire vacciner,negative,positive
380,829,996,@ViteMaDose_off puisque astrazeneca n’est pas ...,positive,puisque astrazeneca nest pas disponible aux mo...,negative,positive


In [56]:
french_incorrect_examples.to_csv('processed_datasets/french_incorrect_preds.csv')

In [24]:
print("French Test: \n")
evaluate_validation(tgt_encoder, src_classifier, french_dataloader)
print("German Test:")
evaluate_validation(tgt_encoder, src_classifier, german_dataloader)
print("Italian Test: \n")
evaluate_validation(tgt_encoder, src_classifier, italian_dataloader)

French Test: 

Validation loss:  1.966426968574524
Accuracy: 0.4489795918367347
F1 score (Macro): 0.42400971292489986
F1 score (Per class): [0.32793522 0.37681159 0.56728232]
Precision score (Per class): [0.22562674 0.57777778 0.63421829]
Recall score (Per class): [0.6        0.27956989 0.51312649]
German Test:
Validation loss:  1.4950343370437622
Accuracy: 0.5327669902912622
F1 score (Macro): 0.4788164594568169
F1 score (Per class): [0.30392157 0.48101266 0.65151515]
Precision score (Per class): [0.20666667 0.5170068  0.79840849]
Recall score (Per class): [0.57407407 0.44970414 0.55027422]
Italian Test: 

Validation loss:  2.1447253227233887
Accuracy: 0.4247020585048754
F1 score (Macro): 0.42398383226545033
F1 score (Per class): [0.41338583 0.4338118  0.42475387]
Precision score (Per class): [0.29411765 0.80473373 0.38035264]
Recall score (Per class): [0.69536424 0.29694323 0.48089172]


In [20]:
# contrastive loss on backtranslated data
print("French Test: \n")
evaluate_validation(tgt_encoder, src_classifier, french_dataloader)
print("German Test:")
evaluate_validation(tgt_encoder, src_classifier, german_dataloader)
print("Italian Test: \n")
evaluate_validation(tgt_encoder, src_classifier, italian_dataloader)

French Test: 

Validation loss:  2.673867702484131
Accuracy: 0.56062424969988
F1 score (Macro): 0.5059018539262339
F1 score (Per class): [0.31939163 0.55782313 0.6404908 ]
Precision score (Per class): [0.328125   0.53074434 0.65909091]
Recall score (Per class): [0.31111111 0.58781362 0.62291169]
German Test:
Validation loss:  1.8969080448150635
Accuracy: 0.6468446601941747
F1 score (Macro): 0.5025464720522431
F1 score (Per class): [0.25301205 0.48309179 0.77153558]
Precision score (Per class): [0.36206897 0.40816327 0.79078695]
Recall score (Per class): [0.19444444 0.59171598 0.75319927]
Italian Test: 

Validation loss:  2.790982723236084
Accuracy: 0.5222101841820151
F1 score (Macro): 0.4756129016654886
F1 score (Per class): [0.32472325 0.60839161 0.49372385]
Precision score (Per class): [0.36666667 0.6525     0.43920596]
Recall score (Per class): [0.29139073 0.569869   0.56369427]


In [15]:
print("French Test: \n")
evaluate_validation(tgt_encoder, src_classifier, french_dataloader)
print("German Test:")
evaluate_validation(tgt_encoder, src_classifier, german_dataloader)
print("Italian Test: \n")
evaluate_validation(tgt_encoder, src_classifier, italian_dataloader)

French Test: 

Validation loss:  1.5251106023788452
Accuracy: 0.5414165666266506
F1 score (Macro): 0.4365032698567224
F1 score (Per class): [0.272      0.36040609 0.67710372]
Precision score (Per class): [0.29565217 0.6173913  0.57379768]
Recall score (Per class): [0.25185185 0.25448029 0.82577566]
German Test:
Validation loss:  0.9986060261726379
Accuracy: 0.6929611650485437
F1 score (Macro): 0.49419930366650383
F1 score (Per class): [0.25988701 0.40625    0.81646091]
Precision score (Per class): [0.33333333 0.59770115 0.74251497]
Recall score (Per class): [0.21296296 0.30769231 0.90676417]
Italian Test: 

Validation loss:  2.0272860527038574
Accuracy: 0.39978331527627303
F1 score (Macro): 0.3516716068985614
F1 score (Per class): [0.27540984 0.27106227 0.50854271]
Precision score (Per class): [0.27272727 0.84090909 0.37151248]
Recall score (Per class): [0.2781457  0.16157205 0.80573248]


In [22]:
adapt_epochs = 10

In [23]:
def adapt(src_encoder, discriminator,
          src_classifier, src_data_loader, tgt_data_train_loader):
    """Train encoder for target language."""
    
    global tgt_encoder
    
    src_encoder.eval()
#     bi_lstm_classifier.eval()
    src_classifier.eval()
    tgt_encoder.train()
    discriminator.train()

    # setup criterion and optimizer
#     BCELoss = nn.BCELoss()
    BCELoss = nn.BCEWithLogitsLoss()
    KLDivLoss = nn.KLDivLoss(reduction='batchmean')
    optimizer_G = torch.optim.AdamW(tgt_encoder.parameters(), lr=d_learning_rate)
    optimizer_D = torch.optim.AdamW(discriminator.parameters(), lr=d_learning_rate)
    len_data_loader = min(len(src_data_loader), len(tgt_data_train_loader))

    for epoch in range(adapt_epochs):
        print(f"Epoch: {epoch}/{adapt_epochs}")
        # zip source and target data pair
        data_zip = enumerate(zip(src_data_loader, tgt_data_train_loader))
        for step, ((inputs_src, src_mask, _), (inputs_tgt, tgt_mask)) in data_zip:
            inputs_src = inputs_src.to(device)
            src_mask = src_mask.to(device)

            inputs_tgt = inputs_tgt.to(device)
            tgt_mask = tgt_mask.to(device)

            # zero gradients for optimizer
            optimizer_D.zero_grad()

            # extract and concat features
            with torch.no_grad():
                feat_src = src_encoder(inputs_src, src_mask)
#                 feat_src_cls = feat_src[:, 0, :]
            feat_src_tgt = tgt_encoder(inputs_src, src_mask)
#             feat_src_tgt_cls = feat_src_tgt[:, 0, :]
            feat_tgt = tgt_encoder(inputs_tgt, tgt_mask)
#             feat_tgt_cls = feat_tgt[:, 0, :]
#             feat_concat = torch.cat((feat_src_tgt_cls, feat_tgt_cls), 0)
            feat_concat = torch.cat((feat_src_tgt, feat_tgt), 0)

            # predict on discriminator
            pred_concat = discriminator(feat_concat.detach())

            label_src = torch.ones(feat_src_tgt.size(0)).to(device).unsqueeze(1)
            label_tgt = torch.zeros(feat_tgt.size(0)).to(device).unsqueeze(1)
            label_concat = torch.cat((label_src, label_tgt), 0)

            # compute loss for discriminator
            dis_loss = BCELoss(pred_concat, label_concat)
            dis_loss.backward()

            for p in discriminator.parameters():
                p.data.clamp_(-clip_value, clip_value)
            # optimize discriminator
            optimizer_D.step()

            pred_cls = torch.squeeze(pred_concat.max(1)[1])
            acc = (pred_cls == label_concat).float().mean()

            # zero gradients for optimizer
            optimizer_G.zero_grad()
            T = temperature

            # predict on discriminator
#             pred_tgt = discriminator(feat_tgt_cls)
            pred_tgt = discriminator(feat_tgt)

            # logits for KL-divergence
            with torch.no_grad():
                src_prob = F.softmax(src_classifier(feat_src) / T, dim=-1)
            tgt_prob = F.log_softmax(src_classifier(feat_src_tgt) / T, dim=-1)
            kd_loss = KLDivLoss(tgt_prob, src_prob.detach()) * T * T

            # compute loss for target encoder
            try:
                gen_loss = BCELoss(pred_tgt.to(device), torch.ones(pred_tgt.size()).to(device))
            except:
                print(label_src.shape, "\n")
                print(feat_src_tgt.shape, "\n")
                print(feat_tgt.shape, "\n")
            loss_tgt = alpha * gen_loss + beta * kd_loss
            #loss_tgt = gen_loss
            loss_tgt.backward()
            torch.nn.utils.clip_grad_norm_(tgt_encoder.parameters(), max_grad_norm)
            # optimize target encoder
            optimizer_G.step()

            if (step) % adapt_log_step == 0:
                print("Epoch [%.2d/%.2d] Step [%.3d/%.3d]: "
                      "acc=%.4f g_loss=%.4f d_loss=%.4f kd_loss=%.4f"
                      % (epoch,
                         adapt_epochs,
                         step,
                         len_data_loader,
                         acc.item(),
                         gen_loss.item(),
                         dis_loss.item(),
                         kd_loss.item()))
#         print("French Test: \n")
#         evaluate_validation(tgt_encoder, src_classifier, french_dataloader)
#         print("German Test:")
#         evaluate_validation(tgt_encoder, src_classifier, german_dataloader)
        print("Italian Test: \n")
        evaluate_validation(tgt_encoder, src_classifier, italian_dataloader)
        torch.save(tgt_encoder.state_dict(), f"model/adapt/italian/tgt_encoder_12_11_bt_class_weights_epoch_{epoch}.pth")
        

    return tgt_encoder

In [24]:
# bilstm italian
tgt_encoder = adapt(src_encoder, discriminator,
                    src_classifier, train_dataloader, train_translated_dataloader)

Epoch: 0/10
Epoch [00/10] Step [000/161]: acc=0.5000 g_loss=0.4752 d_loss=0.7234 kd_loss=0.0269
Epoch [00/10] Step [005/161]: acc=0.5000 g_loss=0.4757 d_loss=0.7236 kd_loss=0.0180
Epoch [00/10] Step [010/161]: acc=0.5000 g_loss=0.4760 d_loss=0.7235 kd_loss=0.0462
Epoch [00/10] Step [015/161]: acc=0.5000 g_loss=0.4763 d_loss=0.7232 kd_loss=0.0384
Epoch [00/10] Step [020/161]: acc=0.5000 g_loss=0.4767 d_loss=0.7231 kd_loss=0.0223
Epoch [00/10] Step [025/161]: acc=0.5000 g_loss=0.4770 d_loss=0.7231 kd_loss=0.0172
Epoch [00/10] Step [030/161]: acc=0.5000 g_loss=0.4772 d_loss=0.7230 kd_loss=0.0331
Epoch [00/10] Step [035/161]: acc=0.5000 g_loss=0.4776 d_loss=0.7230 kd_loss=0.0157
Epoch [00/10] Step [040/161]: acc=0.5000 g_loss=0.4780 d_loss=0.7227 kd_loss=0.0270
Epoch [00/10] Step [045/161]: acc=0.5000 g_loss=0.4782 d_loss=0.7225 kd_loss=0.0345
Epoch [00/10] Step [050/161]: acc=0.5000 g_loss=0.4784 d_loss=0.7225 kd_loss=0.0393
Epoch [00/10] Step [055/161]: acc=0.5000 g_loss=0.4785 d_loss=0.

Epoch [02/10] Step [120/161]: acc=0.5000 g_loss=0.4887 d_loss=0.7200 kd_loss=0.0321
Epoch [02/10] Step [125/161]: acc=0.5000 g_loss=0.4888 d_loss=0.7197 kd_loss=0.0162
Epoch [02/10] Step [130/161]: acc=0.5000 g_loss=0.4892 d_loss=0.7198 kd_loss=0.0062
Epoch [02/10] Step [135/161]: acc=0.5000 g_loss=0.4886 d_loss=0.7202 kd_loss=0.0188
Epoch [02/10] Step [140/161]: acc=0.5000 g_loss=0.4900 d_loss=0.7187 kd_loss=0.0203
Epoch [02/10] Step [145/161]: acc=0.5000 g_loss=0.4909 d_loss=0.7187 kd_loss=0.0134
Epoch [02/10] Step [150/161]: acc=0.5000 g_loss=0.4911 d_loss=0.7189 kd_loss=0.0117
Epoch [02/10] Step [155/161]: acc=0.5000 g_loss=0.4912 d_loss=0.7184 kd_loss=0.0159
Epoch [02/10] Step [160/161]: acc=0.2558 g_loss=0.4855 d_loss=0.6095 kd_loss=0.0141
Italian Test: 

Validation loss:  1.157983422279358
Accuracy: 0.437703141928494
F1 score (Macro): 0.42793598572841135
F1 score (Per class): [0.35665914 0.4719764  0.45517241]
Precision score (Per class): [0.27054795 0.72727273 0.40145985]
Recal

Epoch [05/10] Step [060/161]: acc=0.5000 g_loss=0.4835 d_loss=0.7180 kd_loss=0.0198
Epoch [05/10] Step [065/161]: acc=0.5000 g_loss=0.4794 d_loss=0.7212 kd_loss=0.0203
Epoch [05/10] Step [070/161]: acc=0.5000 g_loss=0.4807 d_loss=0.7228 kd_loss=0.0180
Epoch [05/10] Step [075/161]: acc=0.5000 g_loss=0.4875 d_loss=0.7148 kd_loss=0.0076
Epoch [05/10] Step [080/161]: acc=0.5000 g_loss=0.4861 d_loss=0.7187 kd_loss=0.0186
Epoch [05/10] Step [085/161]: acc=0.5000 g_loss=0.4899 d_loss=0.7194 kd_loss=0.0074
Epoch [05/10] Step [090/161]: acc=0.5000 g_loss=0.4934 d_loss=0.7170 kd_loss=0.0082
Epoch [05/10] Step [095/161]: acc=0.5000 g_loss=0.4950 d_loss=0.7166 kd_loss=0.0133
Epoch [05/10] Step [100/161]: acc=0.5000 g_loss=0.4998 d_loss=0.7147 kd_loss=0.0118
Epoch [05/10] Step [105/161]: acc=0.5000 g_loss=0.5029 d_loss=0.7136 kd_loss=0.0136
Epoch [05/10] Step [110/161]: acc=0.5000 g_loss=0.5048 d_loss=0.7146 kd_loss=0.0213
Epoch [05/10] Step [115/161]: acc=0.5000 g_loss=0.5042 d_loss=0.7155 kd_loss

Epoch: 8/10
Epoch [08/10] Step [000/161]: acc=0.5000 g_loss=0.4909 d_loss=0.7211 kd_loss=0.0101
Epoch [08/10] Step [005/161]: acc=0.5000 g_loss=0.4910 d_loss=0.7194 kd_loss=0.0147
Epoch [08/10] Step [010/161]: acc=0.5000 g_loss=0.4890 d_loss=0.7185 kd_loss=0.0080
Epoch [08/10] Step [015/161]: acc=0.5000 g_loss=0.4887 d_loss=0.7221 kd_loss=0.0100
Epoch [08/10] Step [020/161]: acc=0.5000 g_loss=0.4929 d_loss=0.7183 kd_loss=0.0134
Epoch [08/10] Step [025/161]: acc=0.5000 g_loss=0.5016 d_loss=0.7120 kd_loss=0.0150
Epoch [08/10] Step [030/161]: acc=0.5000 g_loss=0.5008 d_loss=0.7128 kd_loss=0.0200
Epoch [08/10] Step [035/161]: acc=0.5000 g_loss=0.5025 d_loss=0.7163 kd_loss=0.0082
Epoch [08/10] Step [040/161]: acc=0.5000 g_loss=0.4962 d_loss=0.7204 kd_loss=0.0122
Epoch [08/10] Step [045/161]: acc=0.5000 g_loss=0.4896 d_loss=0.7224 kd_loss=0.0199
Epoch [08/10] Step [050/161]: acc=0.5000 g_loss=0.4956 d_loss=0.7157 kd_loss=0.0219
Epoch [08/10] Step [055/161]: acc=0.5000 g_loss=0.4959 d_loss=0.

In [24]:
tgt_encoder = adapt(src_encoder, discriminator,
                    src_classifier, train_dataloader, train_translated_dataloader)

Epoch: 0/10
Epoch [00/10] Step [000/127]: acc=0.5000 g_loss=0.5137 d_loss=0.7265 kd_loss=0.1420
Epoch [00/10] Step [005/127]: acc=0.5000 g_loss=0.5138 d_loss=0.7220 kd_loss=0.0734
Epoch [00/10] Step [010/127]: acc=0.5000 g_loss=0.5151 d_loss=0.7201 kd_loss=0.0795
Epoch [00/10] Step [015/127]: acc=0.5000 g_loss=0.5157 d_loss=0.7167 kd_loss=0.1060
Epoch [00/10] Step [020/127]: acc=0.5000 g_loss=0.5165 d_loss=0.7133 kd_loss=0.0770
Epoch [00/10] Step [025/127]: acc=0.5000 g_loss=0.5100 d_loss=0.7138 kd_loss=0.0945
Epoch [00/10] Step [030/127]: acc=0.5000 g_loss=0.5087 d_loss=0.7130 kd_loss=0.0648
Epoch [00/10] Step [035/127]: acc=0.5000 g_loss=0.5100 d_loss=0.7122 kd_loss=0.1691
Epoch [00/10] Step [040/127]: acc=0.5000 g_loss=0.5082 d_loss=0.7145 kd_loss=0.0583
Epoch [00/10] Step [045/127]: acc=0.5000 g_loss=0.5012 d_loss=0.7211 kd_loss=0.1341
Epoch [00/10] Step [050/127]: acc=0.5000 g_loss=0.5015 d_loss=0.7185 kd_loss=0.1420
Epoch [00/10] Step [055/127]: acc=0.5000 g_loss=0.5052 d_loss=0.

Epoch [03/10] Step [045/127]: acc=0.5000 g_loss=0.5589 d_loss=0.7069 kd_loss=0.0593
Epoch [03/10] Step [050/127]: acc=0.5000 g_loss=0.5552 d_loss=0.7102 kd_loss=0.0627
Epoch [03/10] Step [055/127]: acc=0.5000 g_loss=0.5406 d_loss=0.7076 kd_loss=0.0339
Epoch [03/10] Step [060/127]: acc=0.5000 g_loss=0.5281 d_loss=0.7271 kd_loss=0.0445
Epoch [03/10] Step [065/127]: acc=0.5000 g_loss=0.5320 d_loss=0.7396 kd_loss=0.0255
Epoch [03/10] Step [070/127]: acc=0.5000 g_loss=0.5142 d_loss=0.7431 kd_loss=0.0498
Epoch [03/10] Step [075/127]: acc=0.5000 g_loss=0.5080 d_loss=0.7475 kd_loss=0.0196
Epoch [03/10] Step [080/127]: acc=0.5000 g_loss=0.5132 d_loss=0.7497 kd_loss=0.0263
Epoch [03/10] Step [085/127]: acc=0.5000 g_loss=0.5153 d_loss=0.7408 kd_loss=0.0494
Epoch [03/10] Step [090/127]: acc=0.5000 g_loss=0.5186 d_loss=0.7322 kd_loss=0.0404
Epoch [03/10] Step [095/127]: acc=0.5000 g_loss=0.5164 d_loss=0.7390 kd_loss=0.0192
Epoch [03/10] Step [100/127]: acc=0.5000 g_loss=0.5156 d_loss=0.7325 kd_loss

Epoch [06/10] Step [090/127]: acc=0.5000 g_loss=0.5081 d_loss=0.7148 kd_loss=0.0333
Epoch [06/10] Step [095/127]: acc=0.5000 g_loss=0.5080 d_loss=0.7149 kd_loss=0.0187
Epoch [06/10] Step [100/127]: acc=0.5000 g_loss=0.5083 d_loss=0.7151 kd_loss=0.0194
Epoch [06/10] Step [105/127]: acc=0.5000 g_loss=0.5181 d_loss=0.7097 kd_loss=0.0207
Epoch [06/10] Step [110/127]: acc=0.5000 g_loss=0.5106 d_loss=0.7184 kd_loss=0.0275
Epoch [06/10] Step [115/127]: acc=0.5000 g_loss=0.5102 d_loss=0.7134 kd_loss=0.0160
Epoch [06/10] Step [120/127]: acc=0.5000 g_loss=0.5038 d_loss=0.7174 kd_loss=0.0222
Epoch [06/10] Step [125/127]: acc=0.5000 g_loss=0.5135 d_loss=0.7167 kd_loss=0.0631
French Test: 

Validation loss:  1.4308415651321411
Accuracy: 0.5546218487394958
F1 score (Macro): 0.48432030425102873
F1 score (Per class): [0.34074074 0.4469526  0.66526758]
Precision score (Per class): [0.34074074 0.60365854 0.59363296]
Recall score (Per class): [0.34074074 0.35483871 0.75656325]
Epoch: 7/10
Epoch [07/10] S

In [23]:
# bilstm german
tgt_encoder = adapt(src_encoder, discriminator,
                    bi_lstm_classifier, train_dataloader, train_translated_dataloader)

Epoch: 0/20
Epoch [00/20] Step [000/127]: acc=0.5000 g_loss=0.4759 d_loss=0.7195 kd_loss=0.0972
Epoch [00/20] Step [005/127]: acc=0.5000 g_loss=0.4762 d_loss=0.7231 kd_loss=0.0451
Epoch [00/20] Step [010/127]: acc=0.5000 g_loss=0.4765 d_loss=0.7230 kd_loss=0.2035
Epoch [00/20] Step [015/127]: acc=0.5000 g_loss=0.4771 d_loss=0.7224 kd_loss=0.0621
Epoch [00/20] Step [020/127]: acc=0.5000 g_loss=0.4773 d_loss=0.7224 kd_loss=0.1038
Epoch [00/20] Step [025/127]: acc=0.5000 g_loss=0.4773 d_loss=0.7223 kd_loss=0.1079
Epoch [00/20] Step [030/127]: acc=0.5000 g_loss=0.4779 d_loss=0.7220 kd_loss=0.0744
Epoch [00/20] Step [035/127]: acc=0.5000 g_loss=0.4776 d_loss=0.7221 kd_loss=0.0440
Epoch [00/20] Step [040/127]: acc=0.5000 g_loss=0.4779 d_loss=0.7215 kd_loss=0.0323
Epoch [00/20] Step [045/127]: acc=0.5000 g_loss=0.4778 d_loss=0.7216 kd_loss=0.0856
Epoch [00/20] Step [050/127]: acc=0.5000 g_loss=0.4783 d_loss=0.7210 kd_loss=0.0212
Epoch [00/20] Step [055/127]: acc=0.5000 g_loss=0.4786 d_loss=0.

Epoch [03/20] Step [045/127]: acc=0.5000 g_loss=0.5049 d_loss=0.7137 kd_loss=0.0474
Epoch [03/20] Step [050/127]: acc=0.5000 g_loss=0.4997 d_loss=0.7170 kd_loss=0.0356
Epoch [03/20] Step [055/127]: acc=0.5000 g_loss=0.5015 d_loss=0.7159 kd_loss=0.1408
Epoch [03/20] Step [060/127]: acc=0.5000 g_loss=0.4983 d_loss=0.7177 kd_loss=0.0300
Epoch [03/20] Step [065/127]: acc=0.5000 g_loss=0.4964 d_loss=0.7169 kd_loss=0.0544
Epoch [03/20] Step [070/127]: acc=0.5000 g_loss=0.4980 d_loss=0.7150 kd_loss=0.0443
Epoch [03/20] Step [075/127]: acc=0.5000 g_loss=0.4958 d_loss=0.7145 kd_loss=0.0414
Epoch [03/20] Step [080/127]: acc=0.5000 g_loss=0.4934 d_loss=0.7133 kd_loss=0.0045
Epoch [03/20] Step [085/127]: acc=0.5000 g_loss=0.4913 d_loss=0.7175 kd_loss=0.0165
Epoch [03/20] Step [090/127]: acc=0.5000 g_loss=0.4893 d_loss=0.7195 kd_loss=0.0201
Epoch [03/20] Step [095/127]: acc=0.5000 g_loss=0.4892 d_loss=0.7181 kd_loss=0.0154
Epoch [03/20] Step [100/127]: acc=0.5000 g_loss=0.4845 d_loss=0.7238 kd_loss

Epoch [06/20] Step [090/127]: acc=0.5000 g_loss=0.4860 d_loss=0.7209 kd_loss=0.0137
Epoch [06/20] Step [095/127]: acc=0.5000 g_loss=0.4858 d_loss=0.7208 kd_loss=0.0214
Epoch [06/20] Step [100/127]: acc=0.5000 g_loss=0.4858 d_loss=0.7211 kd_loss=0.0593
Epoch [06/20] Step [105/127]: acc=0.5000 g_loss=0.4857 d_loss=0.7208 kd_loss=0.0177
Epoch [06/20] Step [110/127]: acc=0.5000 g_loss=0.4854 d_loss=0.7209 kd_loss=0.0065
Epoch [06/20] Step [115/127]: acc=0.5000 g_loss=0.4853 d_loss=0.7211 kd_loss=0.0076
Epoch [06/20] Step [120/127]: acc=0.5000 g_loss=0.4849 d_loss=0.7213 kd_loss=0.0067
Epoch [06/20] Step [125/127]: acc=0.5000 g_loss=0.4848 d_loss=0.7211 kd_loss=0.0107
German Test:
Validation loss:  1.2111209630966187
Accuracy: 0.6783980582524272
F1 score (Macro): 0.5412046821196905
F1 score (Per class): [0.36363636 0.46315789 0.79681979]
Precision score (Per class): [0.34146341 0.56896552 0.77094017]
Recall score (Per class): [0.38888889 0.39053254 0.82449726]
Epoch: 7/20
Epoch [07/20] Step

Epoch [10/20] Step [000/127]: acc=0.5000 g_loss=0.4907 d_loss=0.7192 kd_loss=0.0129
Epoch [10/20] Step [005/127]: acc=0.5000 g_loss=0.4908 d_loss=0.7192 kd_loss=0.0143
Epoch [10/20] Step [010/127]: acc=0.5000 g_loss=0.4908 d_loss=0.7194 kd_loss=0.0369
Epoch [10/20] Step [015/127]: acc=0.5000 g_loss=0.4910 d_loss=0.7191 kd_loss=0.0087
Epoch [10/20] Step [020/127]: acc=0.5000 g_loss=0.4910 d_loss=0.7192 kd_loss=0.0144
Epoch [10/20] Step [025/127]: acc=0.5000 g_loss=0.4910 d_loss=0.7191 kd_loss=0.0130
Epoch [10/20] Step [030/127]: acc=0.5000 g_loss=0.4910 d_loss=0.7191 kd_loss=0.0288
Epoch [10/20] Step [035/127]: acc=0.5000 g_loss=0.4912 d_loss=0.7191 kd_loss=0.0168
Epoch [10/20] Step [040/127]: acc=0.5000 g_loss=0.4912 d_loss=0.7191 kd_loss=0.0103
Epoch [10/20] Step [045/127]: acc=0.5000 g_loss=0.4911 d_loss=0.7191 kd_loss=0.0083
Epoch [10/20] Step [050/127]: acc=0.5000 g_loss=0.4910 d_loss=0.7192 kd_loss=0.0084
Epoch [10/20] Step [055/127]: acc=0.5000 g_loss=0.4910 d_loss=0.7192 kd_loss

Epoch [13/20] Step [045/127]: acc=0.5000 g_loss=0.4953 d_loss=0.7179 kd_loss=0.0085
Epoch [13/20] Step [050/127]: acc=0.5000 g_loss=0.4957 d_loss=0.7180 kd_loss=0.0049
Epoch [13/20] Step [055/127]: acc=0.5000 g_loss=0.4960 d_loss=0.7178 kd_loss=0.0214
Epoch [13/20] Step [060/127]: acc=0.5000 g_loss=0.4966 d_loss=0.7176 kd_loss=0.0111
Epoch [13/20] Step [065/127]: acc=0.5000 g_loss=0.4968 d_loss=0.7176 kd_loss=0.0143
Epoch [13/20] Step [070/127]: acc=0.5000 g_loss=0.4969 d_loss=0.7175 kd_loss=0.0830
Epoch [13/20] Step [075/127]: acc=0.5000 g_loss=0.4971 d_loss=0.7175 kd_loss=0.0099
Epoch [13/20] Step [080/127]: acc=0.5000 g_loss=0.4971 d_loss=0.7175 kd_loss=0.0077
Epoch [13/20] Step [085/127]: acc=0.5000 g_loss=0.4968 d_loss=0.7176 kd_loss=0.0119
Epoch [13/20] Step [090/127]: acc=0.5000 g_loss=0.4963 d_loss=0.7177 kd_loss=0.0086
Epoch [13/20] Step [095/127]: acc=0.5000 g_loss=0.4959 d_loss=0.7178 kd_loss=0.0172
Epoch [13/20] Step [100/127]: acc=0.5000 g_loss=0.4959 d_loss=0.7178 kd_loss

Epoch [16/20] Step [090/127]: acc=0.5000 g_loss=0.4997 d_loss=0.7168 kd_loss=0.0073
Epoch [16/20] Step [095/127]: acc=0.5000 g_loss=0.4996 d_loss=0.7168 kd_loss=0.0060
Epoch [16/20] Step [100/127]: acc=0.5000 g_loss=0.4995 d_loss=0.7168 kd_loss=0.0081
Epoch [16/20] Step [105/127]: acc=0.5000 g_loss=0.4996 d_loss=0.7168 kd_loss=0.0107
Epoch [16/20] Step [110/127]: acc=0.5000 g_loss=0.4996 d_loss=0.7168 kd_loss=0.0030
Epoch [16/20] Step [115/127]: acc=0.5000 g_loss=0.4995 d_loss=0.7168 kd_loss=0.0039
Epoch [16/20] Step [120/127]: acc=0.5000 g_loss=0.4992 d_loss=0.7169 kd_loss=0.0059
Epoch [16/20] Step [125/127]: acc=0.5000 g_loss=0.4989 d_loss=0.7170 kd_loss=0.0080
German Test:
Validation loss:  1.218925952911377
Accuracy: 0.6735436893203883
F1 score (Macro): 0.5440293690539146
F1 score (Per class): [0.3776824  0.46052632 0.79387939]
Precision score (Per class): [0.352      0.51851852 0.78191489]
Recall score (Per class): [0.40740741 0.41420118 0.80621572]
Epoch: 17/20
Epoch [17/20] Step

In [27]:
tgt_encoder = adapt(src_encoder, discriminator,
                    bi_lstm_classifier, train_dataloader, train_translated_dataloader)

Epoch: 0/10
Epoch [00/10] Step [000/127]: acc=0.5000 g_loss=0.5002 d_loss=0.7170 kd_loss=0.0350
Epoch [00/10] Step [005/127]: acc=0.5000 g_loss=0.4990 d_loss=0.7181 kd_loss=0.0651
Epoch [00/10] Step [010/127]: acc=0.5000 g_loss=0.5015 d_loss=0.7163 kd_loss=0.0548
Epoch [00/10] Step [015/127]: acc=0.5000 g_loss=0.5041 d_loss=0.7150 kd_loss=0.0203
Epoch [00/10] Step [020/127]: acc=0.5000 g_loss=0.5032 d_loss=0.7132 kd_loss=0.0276
Epoch [00/10] Step [025/127]: acc=0.5000 g_loss=0.5040 d_loss=0.7117 kd_loss=0.0560
Epoch [00/10] Step [030/127]: acc=0.5000 g_loss=0.5046 d_loss=0.7135 kd_loss=0.0270
Epoch [00/10] Step [035/127]: acc=0.5000 g_loss=0.5022 d_loss=0.7147 kd_loss=0.0333
Epoch [00/10] Step [040/127]: acc=0.5000 g_loss=0.5003 d_loss=0.7155 kd_loss=0.0091
Epoch [00/10] Step [045/127]: acc=0.5000 g_loss=0.4879 d_loss=0.7244 kd_loss=0.0291
Epoch [00/10] Step [050/127]: acc=0.5000 g_loss=0.4892 d_loss=0.7219 kd_loss=0.0198
Epoch [00/10] Step [055/127]: acc=0.5000 g_loss=0.4887 d_loss=0.

Epoch [03/10] Step [045/127]: acc=0.5000 g_loss=0.4889 d_loss=0.7181 kd_loss=0.0053
Epoch [03/10] Step [050/127]: acc=0.5000 g_loss=0.4888 d_loss=0.7164 kd_loss=0.0044
Epoch [03/10] Step [055/127]: acc=0.5000 g_loss=0.4836 d_loss=0.7209 kd_loss=0.0285
Epoch [03/10] Step [060/127]: acc=0.5000 g_loss=0.4864 d_loss=0.7177 kd_loss=0.0113
Epoch [03/10] Step [065/127]: acc=0.5000 g_loss=0.4856 d_loss=0.7193 kd_loss=0.0078
Epoch [03/10] Step [070/127]: acc=0.5000 g_loss=0.4878 d_loss=0.7170 kd_loss=0.0385
Epoch [03/10] Step [075/127]: acc=0.5000 g_loss=0.4847 d_loss=0.7193 kd_loss=0.0089
Epoch [03/10] Step [080/127]: acc=0.5000 g_loss=0.4885 d_loss=0.7163 kd_loss=0.0056
Epoch [03/10] Step [085/127]: acc=0.5000 g_loss=0.4863 d_loss=0.7183 kd_loss=0.0107
Epoch [03/10] Step [090/127]: acc=0.5000 g_loss=0.4883 d_loss=0.7180 kd_loss=0.0068
Epoch [03/10] Step [095/127]: acc=0.5000 g_loss=0.4898 d_loss=0.7191 kd_loss=0.0063
Epoch [03/10] Step [100/127]: acc=0.5000 g_loss=0.4905 d_loss=0.7174 kd_loss

Epoch [06/10] Step [090/127]: acc=0.5000 g_loss=0.5018 d_loss=0.7205 kd_loss=0.0275
Epoch [06/10] Step [095/127]: acc=0.5000 g_loss=0.4991 d_loss=0.7201 kd_loss=0.0110
Epoch [06/10] Step [100/127]: acc=0.5000 g_loss=0.4953 d_loss=0.7194 kd_loss=0.0215
Epoch [06/10] Step [105/127]: acc=0.5000 g_loss=0.4953 d_loss=0.7190 kd_loss=0.0206
Epoch [06/10] Step [110/127]: acc=0.5000 g_loss=0.4947 d_loss=0.7191 kd_loss=0.0032
Epoch [06/10] Step [115/127]: acc=0.5000 g_loss=0.4956 d_loss=0.7176 kd_loss=0.0039
Epoch [06/10] Step [120/127]: acc=0.5000 g_loss=0.4974 d_loss=0.7193 kd_loss=0.0069
Epoch [06/10] Step [125/127]: acc=0.5000 g_loss=0.5017 d_loss=0.7197 kd_loss=0.0097
French Test: 

Validation loss:  1.763452172279358
Accuracy: 0.5438175270108043
F1 score (Macro): 0.49945463429548415
F1 score (Per class): [0.34640523 0.51086957 0.64108911]
Precision score (Per class): [0.30994152 0.51648352 0.66580977]
Recall score (Per class): [0.39259259 0.50537634 0.61813842]
Epoch: 7/10
Epoch [07/10] St

In [21]:
tgt_encoder = adapt(src_encoder, discriminator,
                    bi_lstm_classifier, train_dataloader, train_translated_dataloader)

Epoch: 0/15
Epoch [00/15] Step [000/146]: acc=0.5000 g_loss=0.7005 d_loss=0.6907 kd_loss=0.3532
Epoch [00/15] Step [005/146]: acc=0.5000 g_loss=0.7021 d_loss=0.6922 kd_loss=0.1627
Epoch [00/15] Step [010/146]: acc=0.5000 g_loss=0.7035 d_loss=0.6921 kd_loss=0.3191
Epoch [00/15] Step [015/146]: acc=0.5000 g_loss=0.7039 d_loss=0.6897 kd_loss=0.1806
Epoch [00/15] Step [020/146]: acc=0.5000 g_loss=0.7065 d_loss=0.6895 kd_loss=0.0698
Epoch [00/15] Step [025/146]: acc=0.5000 g_loss=0.7020 d_loss=0.6930 kd_loss=0.0240
Epoch [00/15] Step [030/146]: acc=0.5000 g_loss=0.7039 d_loss=0.6898 kd_loss=0.1064
Epoch [00/15] Step [035/146]: acc=0.5000 g_loss=0.6994 d_loss=0.6919 kd_loss=0.1433
Epoch [00/15] Step [040/146]: acc=0.5000 g_loss=0.6982 d_loss=0.6918 kd_loss=0.2200
Epoch [00/15] Step [045/146]: acc=0.5000 g_loss=0.6928 d_loss=0.6933 kd_loss=0.1643
Epoch [00/15] Step [050/146]: acc=0.5000 g_loss=0.6909 d_loss=0.6916 kd_loss=0.3013
Epoch [00/15] Step [055/146]: acc=0.5000 g_loss=0.6973 d_loss=0.

Validation loss:  1.8916826248168945
Accuracy: 0.6310679611650486
F1 score (Macro): 0.52744611601506
F1 score (Per class): [0.37162162 0.4604811  0.75023563]
Precision score (Per class): [0.29255319 0.54918033 0.77431907]
Recall score (Per class): [0.50925926 0.3964497  0.72760512]
Epoch: 3/15
Epoch [03/15] Step [000/146]: acc=0.5000 g_loss=0.5983 d_loss=0.7115 kd_loss=0.0591
Epoch [03/15] Step [005/146]: acc=0.5000 g_loss=0.5879 d_loss=0.7148 kd_loss=0.0776
Epoch [03/15] Step [010/146]: acc=0.5000 g_loss=0.5973 d_loss=0.7021 kd_loss=0.1140
Epoch [03/15] Step [015/146]: acc=0.5000 g_loss=0.6240 d_loss=0.6935 kd_loss=0.0547
Epoch [03/15] Step [020/146]: acc=0.5000 g_loss=0.6571 d_loss=0.6787 kd_loss=0.0311
Epoch [03/15] Step [025/146]: acc=0.5000 g_loss=0.6575 d_loss=0.6998 kd_loss=0.0151
Epoch [03/15] Step [030/146]: acc=0.5000 g_loss=0.6930 d_loss=0.6766 kd_loss=0.0577
Epoch [03/15] Step [035/146]: acc=0.5000 g_loss=0.6136 d_loss=0.7287 kd_loss=0.1000
Epoch [03/15] Step [040/146]: acc

Epoch [05/15] Step [135/146]: acc=0.5000 g_loss=0.6655 d_loss=0.6942 kd_loss=0.0619
Epoch [05/15] Step [140/146]: acc=0.5000 g_loss=0.6683 d_loss=0.6842 kd_loss=0.0359
Epoch [05/15] Step [145/146]: acc=0.2727 g_loss=0.6429 d_loss=0.6841 kd_loss=0.0538
German Test:
Validation loss:  1.9258086681365967
Accuracy: 0.6735436893203883
F1 score (Macro): 0.5320129484051602
F1 score (Per class): [0.35193133 0.4535316  0.79057592]
Precision score (Per class): [0.328      0.61       0.75626043]
Recall score (Per class): [0.37962963 0.36094675 0.82815356]
Epoch: 6/15
Epoch [06/15] Step [000/146]: acc=0.5000 g_loss=0.6185 d_loss=0.7186 kd_loss=0.0487
Epoch [06/15] Step [005/146]: acc=0.5000 g_loss=0.6240 d_loss=0.7142 kd_loss=0.0382
Epoch [06/15] Step [010/146]: acc=0.5000 g_loss=0.6346 d_loss=0.7038 kd_loss=0.0237
Epoch [06/15] Step [015/146]: acc=0.5000 g_loss=0.6483 d_loss=0.7053 kd_loss=0.0454
Epoch [06/15] Step [020/146]: acc=0.5000 g_loss=0.6662 d_loss=0.6961 kd_loss=0.0931
Epoch [06/15] Step

Epoch [08/15] Step [120/146]: acc=0.5000 g_loss=0.6714 d_loss=0.6923 kd_loss=0.0713
Epoch [08/15] Step [125/146]: acc=0.5000 g_loss=0.6723 d_loss=0.6935 kd_loss=0.0977
Epoch [08/15] Step [130/146]: acc=0.5000 g_loss=0.6821 d_loss=0.6920 kd_loss=0.0696
Epoch [08/15] Step [135/146]: acc=0.5000 g_loss=0.6815 d_loss=0.6937 kd_loss=0.1262
Epoch [08/15] Step [140/146]: acc=0.5000 g_loss=0.6811 d_loss=0.6895 kd_loss=0.0697
Epoch [08/15] Step [145/146]: acc=0.2727 g_loss=0.6647 d_loss=0.6893 kd_loss=0.0424
German Test:
Validation loss:  2.0403969287872314
Accuracy: 0.6881067961165048
F1 score (Macro): 0.5413818818267494
F1 score (Per class): [0.37614679 0.4469697  0.80102916]
Precision score (Per class): [0.37272727 0.62105263 0.75444265]
Recall score (Per class): [0.37962963 0.34911243 0.85374771]
Epoch: 9/15
Epoch [09/15] Step [000/146]: acc=0.5000 g_loss=0.6525 d_loss=0.7073 kd_loss=0.0415
Epoch [09/15] Step [005/146]: acc=0.5000 g_loss=0.6566 d_loss=0.7038 kd_loss=0.0736
Epoch [09/15] Step

Epoch [11/15] Step [105/146]: acc=0.5000 g_loss=0.6858 d_loss=0.6933 kd_loss=0.0766
Epoch [11/15] Step [110/146]: acc=0.5000 g_loss=0.6911 d_loss=0.6915 kd_loss=0.0172
Epoch [11/15] Step [115/146]: acc=0.5000 g_loss=0.6838 d_loss=0.6933 kd_loss=0.0368
Epoch [11/15] Step [120/146]: acc=0.5000 g_loss=0.6823 d_loss=0.6933 kd_loss=0.0604
Epoch [11/15] Step [125/146]: acc=0.5000 g_loss=0.6852 d_loss=0.6928 kd_loss=0.0232
Epoch [11/15] Step [130/146]: acc=0.5000 g_loss=0.6896 d_loss=0.6935 kd_loss=0.1214
Epoch [11/15] Step [135/146]: acc=0.5000 g_loss=0.6879 d_loss=0.6942 kd_loss=0.0302
Epoch [11/15] Step [140/146]: acc=0.5000 g_loss=0.6891 d_loss=0.6910 kd_loss=0.0436
Epoch [11/15] Step [145/146]: acc=0.2727 g_loss=0.6805 d_loss=0.6924 kd_loss=0.0425
German Test:
Validation loss:  2.0727107524871826
Accuracy: 0.6856796116504854
F1 score (Macro): 0.5282706646750389
F1 score (Per class): [0.34123223 0.44357977 0.8       ]
Precision score (Per class): [0.34951456 0.64772727 0.74565561]
Recall 

Epoch [14/15] Step [090/146]: acc=0.5000 g_loss=0.6908 d_loss=0.6944 kd_loss=0.1178
Epoch [14/15] Step [095/146]: acc=0.5000 g_loss=0.6914 d_loss=0.6931 kd_loss=0.0371
Epoch [14/15] Step [100/146]: acc=0.5000 g_loss=0.6903 d_loss=0.6928 kd_loss=0.0322
Epoch [14/15] Step [105/146]: acc=0.5000 g_loss=0.6915 d_loss=0.6932 kd_loss=0.0686
Epoch [14/15] Step [110/146]: acc=0.5000 g_loss=0.6951 d_loss=0.6922 kd_loss=0.0094
Epoch [14/15] Step [115/146]: acc=0.5000 g_loss=0.6913 d_loss=0.6931 kd_loss=0.0103
Epoch [14/15] Step [120/146]: acc=0.5000 g_loss=0.6902 d_loss=0.6932 kd_loss=0.0299
Epoch [14/15] Step [125/146]: acc=0.5000 g_loss=0.6911 d_loss=0.6933 kd_loss=0.0925
Epoch [14/15] Step [130/146]: acc=0.5000 g_loss=0.6937 d_loss=0.6935 kd_loss=0.0575
Epoch [14/15] Step [135/146]: acc=0.5000 g_loss=0.6932 d_loss=0.6937 kd_loss=0.0445
Epoch [14/15] Step [140/146]: acc=0.5000 g_loss=0.6935 d_loss=0.6918 kd_loss=0.0240
Epoch [14/15] Step [145/146]: acc=0.2727 g_loss=0.6889 d_loss=0.6943 kd_loss

In [24]:
torch.save(tgt_encoder.state_dict(), 'model/french_adapted_encoder_12_12_2.pth')

In [29]:
print("French Test: \n")
evaluate_validation(tgt_encoder, bi_lstm_classifier, french_dataloader)
print("\n\n")
print("German Test:")
evaluate_validation(tgt_encoder, bi_lstm_classifier, german_dataloader)
print("\n\n")
print("Italian Test: \n")
evaluate_validation(tgt_encoder, bi_lstm_classifier, italian_dataloader)

French Test: 

Validation loss:  1.8512077331542969
Accuracy: 0.5330132052821128
F1 score (Macro): 0.4866059870873501
F1 score (Per class): [0.33918129 0.46613546 0.65450122]
Precision score (Per class): [0.28019324 0.52466368 0.6674938 ]
Recall score (Per class): [0.42962963 0.41935484 0.64200477]



German Test:
Validation loss:  1.3572468757629395
Accuracy: 0.6298543689320388
F1 score (Macro): 0.5376748284908066
F1 score (Per class): [0.36781609 0.49867374 0.74653465]
Precision score (Per class): [0.31372549 0.45192308 0.81425486]
Recall score (Per class): [0.44444444 0.55621302 0.68921389]



Italian Test: 

Validation loss:  2.2205429077148438
Accuracy: 0.4626218851570964
F1 score (Macro): 0.44481746429594815
F1 score (Per class): [0.37220844 0.55120101 0.41104294]
Precision score (Per class): [0.29761905 0.65465465 0.3964497 ]
Recall score (Per class): [0.49668874 0.47598253 0.42675159]


In [18]:
print("German Test:")
evaluate_validation(tgt_encoder, src_classifier, german_dataloader)

German Test:
Validation loss:  2.7417564392089844
Accuracy: 0.36771844660194175
F1 score (Macro): 0.37011527679462075
F1 score (Per class): [0.27287854 0.41025641 0.42721088]
Precision score (Per class): [0.1663286  0.44755245 0.83510638]
Recall score (Per class): [0.75925926 0.37869822 0.28702011]


In [19]:
print("Italian Test: \n")
evaluate_validation(tgt_encoder, src_classifier, italian_dataloader)

Italian Test: 

Validation loss:  3.206312417984009
Accuracy: 0.3488624052004334
F1 score (Macro): 0.34640687429795575
F1 score (Per class): [0.37837838 0.35233161 0.30851064]
Precision score (Per class): [0.24094203 0.84297521 0.348     ]
Recall score (Per class): [0.8807947  0.22270742 0.27707006]
