In [None]:
#@title mount
from google.colab import drive
drive.mount('/gdrive')
%cd '/gdrive/My Drive/Colab Notebooks/Work'

In [None]:
#@title options.py

#!/usr/bin/env ipython

import tensorflow as tf

import os, sys, logging, argparse
from pathlib import Path

__file__ = '/gdrive/My Drive/Colab Notebooks/Work/LAN.ipynb'   # uncomment only when in notebook
os.chdir(os.path.dirname(__file__))
sys.argv = [__file__, '--learning_rate', '0.1', '--Q_learning_rate', '0.1', '--clipvalue', '10', '--epochs', '5', '--n_vecs', '-1', '--train_size_src', '-1', '--train_size_tgt', '-1', '--batch_size', '50000', '--no_F_bn', '--no_P_bn', '--no_Q_bn', '--vector_length', '10']
sys.argv += ['--notebook', 'True']
parser = argparse.ArgumentParser()

#platform arguments
parser.add_argument('--notebook', type=bool, default=False)

# dataset arguments
parser.add_argument('--data_path', default=None)
parser.add_argument('--src_lang', default='en')
parser.add_argument('--tgt_lang', default='fr')
parser.add_argument('--train_size_src', type=int, default=None)        # use all
parser.add_argument('--train_size_tgt', type=int, default=None)        # use all
parser.add_argument('--num_labels', type=int, default=5+1)            # max reviews rating
parser.add_argument('--iterate', action='store_true')                # read through iterations
parser.add_argument('--label_dtype', default=tf.int32)

# sequences and vocab arguments
parser.add_argument('--max_seq_len', type=int, default=100)            # None for no truncate
parser.add_argument('--unk_tok', type=str, default='<unk>')
parser.add_argument('--bos_tok', type=str, default='<s>')
parser.add_argument('--eos_tok', type=str, default='</s>')

# training arguments
parser.add_argument('--epochs', type=int, default=5)
parser.add_argument('--random_seed', type=int, default=1)
parser.add_argument('--model_save_file', default='./saved_models/adan')
parser.add_argument('--batch_size', type=int, default=10000)
parser.add_argument('--buffer_size', type=int, default=40000)
parser.add_argument('--learning_rate', type=float, default=0.05)
parser.add_argument('--Q_learning_rate', type=float, default=0.05)

# bwe arguments
parser.add_argument('--emb_filename', default='')
parser.add_argument('--n_vecs', type=int, default=-1)
parser.add_argument('--random_emb', action='store_true')
parser.add_argument('--fix_unk', action='store_true')                # use a fixed <unk> token for all words without pretrained embeddings when building vocab
parser.add_argument('--emb_size', type=int, default=300)
parser.add_argument('--pre_trained_src_emb_file', type=str, default='bwe/vectors/wiki.multi.en.vec')
parser.add_argument('--pre_trained_tgt_emb_file', type=str, default='bwe/vectors/wiki.multi.fr.vec')

# Feature Extractor
parser.add_argument('--model', default='dan')                        # dan or lstm or cnn
parser.add_argument('--fix_emb', action='store_true')
parser.add_argument('--vector_length', type=int, default=1)
# for LSTM model
parser.add_argument('--attn', default='dot')                        # attention mechanism (for LSTM): avg, last, dot
parser.add_argument('--bidir_rnn', dest='bidir_rnn', action='store_true', default=True)        # bi-directional LSTM
parser.add_argument('--sum_pooling/', dest='avg_pooling', action='store_false')
parser.add_argument('--avg_pooling/', dest='avg_pooling', action='store_true')
# for CNN model
parser.add_argument('--kernel_num', type=int, default=400)
parser.add_argument('--kernel_sizes', type=int, nargs='+', default=[3,4,5])

# for layers and all models
parser.add_argument('--F_layers', type=int, default=1)
parser.add_argument('--P_layers', type=int, default=1)
parser.add_argument('--Q_layers', type=int, default=1)

parser.add_argument('--q_critic', type=int, default=5)    # Q iterations
parser.add_argument('--_lambda', type=float, default=0.1)

parser.add_argument('--F_bn/', dest='F_bn', action='store_true')
parser.add_argument('--no_F_bn/', dest='F_bn', action='store_false')
parser.add_argument('--P_bn/', dest='P_bn', action='store_true', default=True)
parser.add_argument('--no_P_bn/', dest='P_bn', action='store_false')
parser.add_argument('--Q_bn/', dest='Q_bn', action='store_true', default=True)
parser.add_argument('--no_Q_bn/', dest='Q_bn', action='store_false')

parser.add_argument('--hidden_size', type=int, default=900)
parser.add_argument('--dropout', type=float, default=0)
parser.add_argument('--activation', type=str, default='linear')

parser.add_argument('--clip_Q', type=bool, default=False)
parser.add_argument('--clipvalue', type=float, default=0.01)
parser.add_argument('--clip_lim_FP', type=float, default=None)

parser.add_argument('--device', type=str, default='cuda')
parser.add_argument('--debug/', dest='debug', action='store_true')

opt = parser.parse_args()

if not tf.config.list_physical_devices('GPU'):
    opt.device = 'CPU'

logging.basicConfig(stream=sys.stderr, level=logging.DEBUG if opt.debug else logging.INFO)
log = logging.getLogger(__name__)
import errno
filename = Path(opt.model_save_file) / 'log.txt'
if not os.path.exists(os.path.dirname(filename)):
    try: os.makedirs(os.path.dirname(filename))
    except OSError as exc:
        if exc.errno != errno.EEXIST: raise # Guard against race condition
with open(filename, "w") as f: pass
fh = logging.FileHandler(Path(opt.model_save_file) / 'log.txt')
log.addHandler(fh)

if __name__ == "__main__":
    print("src_embeddings: ", opt.pre_trained_src_emb_file)
    print("tgt_embeddings: ", opt.pre_trained_tgt_emb_file)
    print("debugging: ", opt.debug)
    print(opt.n_vecs)
    log.info('start...')

In [7]:
#@title vocab.py

#!/usr/bin/env ipython

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, preprocessing

import numpy as np
import os, io, sys, logging
from pathlib import Path
from tqdm import tqdm, trange
os.chdir(os.path.dirname(__file__))

#from options import *

class Vocab:
    """
attributes: (self)
        vocab_size
        emb_size
        embeddings 
        w2vvocab = {word : idx}
        v2wvocab = [idx => emb_vector]
        pt_w2vvocab = {word : idx}
        pt_v2wvocab = [idx => emb_vector]
        unk_tok
        unk_idx
        eos_tok
        eos_idx

methods: (self)
        __init__(self, pre_train_infile)
        base_form(word)
        new_rand_emb(self)
        init_embed_layer(self)
        add_word(self, word)
        clear_pretrained_vectors(self)
        lookup(self, word)
        get_word(self, i)

    """
    
    def __init__(self, pre_train_infile = None, vecs = opt.n_vecs):
        """
        load pre-trained words - embedding vectors in pt_***vocabs and initialise ***vocabs

        """
        self.vocab_size = 0
        self.emb_size = opt.emb_size
        self.embeddings = []
        self.w2vvocab = {}
        self.v2wvocab = []
        
        self.pt_v2wvocab = []
        self.pt_w2vvocab = {}
        self.cnt = 0
        
        # add <unk>
        self.unk_tok = opt.unk_tok
        self.add_word(self.unk_tok)
        opt.unk_idx = self.unk_idx = self.w2vvocab[self.unk_tok]
        self.embeddings[self.unk_idx][:] = 0
        # add BOS token
        self.bos_tok = opt.bos_tok
        self.add_word(self.bos_tok)
        opt.bos_idx = self.bos_idx = self.unk_idx #self.w2vvocab[self.bos_tok]
        #self.embeddings[self.bos_idx][:] = 0
        # add EOS token
        self.eos_tok = opt.eos_tok
        self.add_word(self.eos_tok)
        opt.eos_idx = self.eos_idx = self.unk_idx #self.w2vvocab[self.eos_tok]
        #self.embeddings[self.eos_idx][:] = 1    # 0
        log.info("vocab initializing...done.")
        # add pre trained embeddings
        self.add_pre_trained_emb(pre_train_infile, vecs)

    def add_pre_trained_emb(self, pre_train_infile = None, vecs = opt.n_vecs):
        # load pretrained embedings
        if(pre_train_infile is None): raise Exception('file not specified...')
        if(os.path.isfile(pre_train_infile)):
            log.info('reading pre-trained embeddings from ' + pre_train_infile + '...')
            with io.open(Path(pre_train_infile), 'r', encoding='utf-8') as infile:
                first_line = infile.readline().split()
                assert len(first_line) == 2
                n_vecs, emb_dim = map(int, first_line)    # first line has total number of vectors and embedding dimensions
                assert emb_dim == self.emb_size
                self.emb_size = emb_dim
                if vecs is not None and vecs > 0: n_vecs = min(n_vecs, vecs)
                if not hasattr(self, 'pretrained'):    self.pretrained = np.empty(shape=(n_vecs, emb_dim), dtype=np.float)
                else: self.pretrained = np.append(self.pretrained, np.empty(shape=(n_vecs, emb_dim), dtype=np.float), axis=0)
                for _ in trange(n_vecs):
                    line = infile.readline()
                    if not line: break
                    parts = line.rstrip().split(' ')
                    word = parts[0]
                    #if word in self.pt_v2wvocab: continue        # no need to check if assumed no repetition mistake
                    # add to vocabs
                    self.pt_v2wvocab.append(word)
                    self.pt_w2vvocab[word] = self.cnt
                    vector = [float(x) for x in parts[1:]]
                    self.pretrained[self.cnt] = vector
                    self.cnt += 1
            log.info("embedding vectors imported...")
        else:
            raise FileNotFoundError(log.info("pre_train_file ", Path(pre_train_infile), " does not exist..."))

    def base_form(self, word):
        """
        return stripped and lowercased word
        """
        return word.strip().lower()


    def new_rand_emb(self):
        """
        return a normal random emb_vector
        """
        vec = np.random.normal(-1, 1, size=self.emb_size)
        vec /= sum(x*x for x in vec) ** .5
        return vec


    def add_word(self, word, vec=None):
        """
        add new word to the ***vocab. \nUse this to only add new words or used words from pt_***vocabs to ***vocabs.
        """
        word = self.base_form(word=word)
        if word not in self.w2vvocab:
            if not opt.random_emb and hasattr(self, 'pt_w2vvocab'):
                if opt.fix_unk and word not in self.pt_w2vvocab:
                    # use fixed unk token, do not update vocab
                    return
                if word in self.pt_w2vvocab:
                    vector = self.pretrained[self.pt_w2vvocab[word]].copy()
                else:
                    vector = self.new_rand_emb() if vec is None else vec
            else:
                vector = self.new_rand_emb() if vec is None else vec
            self.v2wvocab.append(word)
            self.w2vvocab[word] = self.vocab_size
            self.embeddings.append(vector)
            self.vocab_size += 1
        return self.w2vvocab[word]


    def lookup(self, word):
        """
        return value of word (word_idx) from w2vvocab
        """
        word = Vocab.base_form(word)
        if word in self.w2vvocab:
            return self.w2vvocab[word]
        return self.unk_idx


    def get_word(self, i):
        """
        return emb_vector at index i
        """
        return self.v2wvocab[i]
    

    def hash_fit_on_text(self, line):
        """
        fit on text (sentence) with tf.keras.preprocessing.text.hashing_trick\n(NOT TESTED)
        """
        return preprocessing.text.hashing_trick(line, n=self.vocab_size, hash_function=self.lookup, filters='')


    def text_to_sequence(self, line, update_vocab=True):
        """
        convert text (line) to sequence
        """
        return [self.add_word(w) for w in line.strip().split()] if update_vocab else\
            [self.lookup(w) for w in line.strip().split()]


    def text_list_to_sequence(self, text_list, update_vocab=True):
        """
        convert text (word list) to sequence
        """
        return [self.add_word(w) for w in text_list] if update_vocab else\
            [self.lookup(w) for w in text_list]


    def fit_on_text(self, line_list):    # tf.keras.preprocessing.text.hashing_trick
        """
        add new text (sentence) to the vocabularies
        """
        return [[self.add_word(w) for w in line.strip().split()] for line in line_list]


    def fit_on_text_list(self, texts_list):    # tf.keras.preprocessing.text.hashing_trick
        """
        add new text (sentence) to the vocabularies
        """
        return [[self.add_word(w) for w in line] for line in texts_list]


    def pad_text_list(self, text_list, max_len=opt.max_seq_len, pad='pre', truncate='post', add_eos_tok=False):
        """
        pad single text (words) list
        """
        if add_eos_tok: max_len -= 1
        text_list = text_list[:max_len]
        text_list = ['<s>' for _ in range(max_len - len(text_list))] + text_list
        if add_eos_tok: text_list += ['</s>']
        return text_list


    def pad_sequences(self, dataset, max_len=opt.max_seq_len, pad='pre', truncate='post', add_eos=False):
        """
        pad list of sequences (sequence : list of int) with keras.preprocessing.sequence.pad_sequences
        """
        if add_eos: max_len -= 1
        seq_list, lengths, stars = zip(*dataset)
        seq_list = self.fit_on_text_list(seq_list)
        seq_list = preprocessing.sequence.pad_sequences(tqdm(seq_list), maxlen=max_len, truncating='post', value=opt.bos_idx)
        if add_eos: seq_list = preprocessing.sequence.pad_sequences(tqdm(seq_list), maxlen=max_len+1, padding='post', value=opt.eos_idx)
        return tf.data.Dataset.from_tensor_slices((tf.convert_to_tensor(seq_list, name='seq'), tf.convert_to_tensor(lengths, name='len'), tf.convert_to_tensor(stars, name='label')))

    def clear_pretrained_vectors(self):
        """
        clear the pretrained vectors and pt_***vocab
        """
        if hasattr(self, 'pretrained'): del self.pretrained
        if hasattr(self, 'pt_w2vvocab'): del self.pt_w2vvocab
        if hasattr(self, 'pt_v2wvocab'): del self.pt_v2wvocab
    

    def init_embed_layer(self, clear_pt=True):
        """
        clear pretrained vectors and return an embedding layer initialized with self.embeddings
        """
        if clear_pt: self.clear_pretrained_vectors()
        emb_layer = layers.Embedding(input_dim=self.vocab_size, output_dim=self.emb_size, input_length=opt.max_seq_len, name='vocab_embedding')
        emb_layer.build(input_shape=(None, self.vocab_size, self.emb_size))
        emb_layer.set_weights(np.array([self.embeddings], dtype=float))
        emb_layer.trainable = False
        assert emb_layer.weights[0].shape[0] == self.vocab_size, "layer weights len not equal to vocab size in layer " + emb_layer.name
        return emb_layer


if __name__ == "__main__" and not opt.notebook:
    """
    run as main
    """
    print(opt.pre_trained_src_emb_file)
    vocab = Vocab(opt.pre_trained_src_emb_file, vecs=5000)
    vocab.add_pre_trained_emb(opt.pre_trained_tgt_emb_file, vecs=5000)
    vocab.add_word('the')
    vocab.add_word('of')
    vocab.add_word('this')
    emb_layer = vocab.init_embed_layer(clear_pt=False)
    print(emb_layer.variables)

# imp link https://www.tensorflow.org/tfx/tutorials/transform/census , https://www.tensorflow.org/api_docs/python/tf/numpy_function , https://www.tensorflow.org/api_docs/python/tf/py_function

In [8]:
#@title utils.py

#!/usr/bin/env ipython

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, preprocessing

import pdb
import numpy as np

#from options import *

DEBUG = lambda x: print(x)

def freeze(net):
    net.trainable = False

def unfreeze(net):
    net.trainable = True

def get_lines(infile, encoding='utf-8'):
    if os.sep != '\\': return int(subprocess.Popen(f"wc -l \"{str(Path(infile))}\"", shell=True, stdout=subprocess.PIPE).stdout.read().split()[0])
    with io.open(Path(infile), encoding=encoding) as foo:
        lines = sum(1 for line in foo)  #os.path.getsize(infile)
    return lines

def argmax32(arr, axis=-1, dtype=opt.label_dtype):
    return tf.cast(np.argmax(arr, axis=-1), dtype=dtype)

def pad(x : list, y : list, eos_idx : int, sort:bool=False):
    #inputs, lengths = zip(*x)
    inputs = x
    lengths = [len(l) for l in x]
    max_len = max(lengths)
    # pad sequences
    padded_inputs = tf.fill((len(inputs), max_len), eos_idx, dtype=tf.int64)
    for i, row in enumerate(inputs):
        assert eos_idx not in row, f'EOS in sequence {row}'
        padded_inputs[i][:len(row)] = tf.convert_to_tensor(row, dtype=tf.int64)
    lengths = tf.convert_to_tensor(lengths, dtype=tf.int64)
    y = tf.reshape(tf.convert_to_tensor(y, dtype=tf.int64), -1)
    if sort:
        # sort by length
        sorted_lengths = lengths.sort(axis=0, direction='DESCENDING')
        sorting_idx = keras.backend.eval(sorted_lengths)
        padded_inputs = tf.gather(params=padded_inputs, indices=sorting_idx, axis=0)
        y = tf.gather(params=y, indices=sorting_idx, axis=0)
        return (padded_inputs, sorted_lengths), y
    else:
        return (padded_inputs, lengths), y


def my_collate(batch : list, sort : bool):
    x, y = zip(*batch)
    with tf.device(opt.device):
        x, y = pad(x, y, opt.eos_idx, sort)
    return (x, y)


def sorted_collate(batch):
    return my_collate(batch, sort=True)

def unsorted_collate(batch):
    return my_collate(batch, sort=False)


if __name__ == "__main__" and not opt.notebook:
    print(get_lines("bwe/vectors/wiki.multi.en.vec"))

In [9]:
#@title data.py

#!/usr/bin/env ipython

import tensorflow as tf

import sys, os, io, json, subprocess
from pathlib import Path
from tqdm import tqdm, trange
os.chdir(os.path.dirname(__file__))

#from options import opt
#from vocab import *

label_dtype = opt.label_dtype

def decode_json(infile, lines=None, reviews_data=None, max_seq_len=None):
    assert os.path.isfile(Path(infile)), str(os.getcwd() / infile) + " doesn't exist, extract_data first"
    if lines is None or lines < 0: lines = get_lines(infile)
    log.info(f'Reading {lines} lines from {infile}')
    with io.open(Path(infile), 'r', encoding='utf-8') as infile:
        if reviews_data == 'Amazon reviews':
            ret = []
            max_stars = 0
            for _ in trange(lines): # line in infile
                dic = json.loads(infile.readline())
                line = str(dic["review_title"] + dic["review_body"]).strip().split()[:max_seq_len]
                ret += [[line, len(line), tf.cast(int(dic['stars']), dtype=opt.label_dtype)]]
                max_stars = max(max_stars, int(dic['stars']))
            return ret, max_stars
        return [json.loads(line) for line in tqdm(infile.read().strip().split('\n')[:lines])]


def decode_json_iterate(infile, lines=None, reviews_data=None, max_seq_len=None):
    assert os.path.isfile(Path(infile)), str(os.getcwd() / infile) + " doesn't exist, extract_data first"
    with tqdm(total=get_lines(infile) if not lines else lines) as pbar:
        with io.open(Path(infile), 'r', encoding='utf-8') as infile:
            if lines is not None and lines > 0: z = zip(trange(1, lines+1), infile)
            else: z = enumerate(infile)
            for num, line in z:
                if not line or lines is not None and num > lines: break;
                dic = json.loads(line)
                pbar.update(len(line) if not lines else 1)
                yield dic if reviews_data != 'Amazon reviews' \
                    else [str(dic["review_title"] + dic["review_body"]).strip().split()[:max_seq_len], len(str(dic["review_title"] + dic["review_body"]).strip().split()[:max_seq_len]), tf.cast(int(dic["stars"]), dtype=opt.label_dtype)]


class AmazonReviews:
    """
    get Amazon reviews data from the extracted data => review title + review body + eos_tok : stars
parameters:
    path : str => path to 'Amazon reviews' directory with '/' as separator
    eos_tok : str
    max_seq_len : int
    """
    def __init__(self, path:str=None, eos_tok=opt.eos_tok, max_seq_len=opt.max_seq_len, star_rating=5):
        super(AmazonReviews, self).__init__()
        self.path = Path('Amazon reviews') if not path else Path(path)
        self.dats = {}
        self.dats['train'] = self.path / 'train'
        self.dats['dev'] = self.path / 'dev'
        self.dats['test'] = self.path / 'test'
        self.eos_tok = eos_tok
        self.max_seq_len = max_seq_len
        opt.labels = self.star_rating = star_rating


    def load_data(self, lang, dat, lines=-1):
        """
        load all data in one go
parametrs:
        lang : str => de, en, es, fr, ja, zh
        dat : str => train, dev, test
        lines : int
return:
        tuple of
            dataset of reviews (split) and their star ratings
            max_seq_length
        """
        infile = self.dats[dat] / str('dataset_' + lang + '_' + dat + '.json')
        data, self.star_rating = decode_json(infile, lines=lines, reviews_data='Amazon reviews', max_seq_len=self.max_seq_len)
        return data


    def load_data_generator(self, lang, dat, lines=-1):
        """
        iterate over the data file line by line (for less RAM devices like this one - not completely implemented)
parametrs:
        lang : str => de, en, es, fr, ja, zh
        dat : str => train, dev, test
        lines : int
yield:
        generator witch generates a list of one review and its star rating at a time
        """
        infile = self.dats[dat] / str('dataset_' + lang + '_' + dat + '.json')
        return decode_json_iterate(infile, lines=lines, reviews_data='Amazon reviews', max_seq_len=self.max_seq_len)


if __name__ == "__main__" and not opt.notebook:
    infile = 'Amazon reviews/test/dataset_en_test.json'
    #assert os.path.isfile(infile), str(os.getcwd() / infile) + " doesn't exist"
    #for x in decode_json_iterate(infile, 30): print(x)
    #print(decode_json(infile, 30))
    #vocab = Vocab(opt.pre_trained_src_emb_file)
    rev = AmazonReviews()
    data = rev.load_data(lang='en', dat='train')
    data = vocab.pad_sequences(data)
    print('\n', data)
    for dat in data.take(3):
        print(dat)
    #sequence_input = keras.Input(input_shape=(opt.max_seq_len,), dtype='int32')(np.array([x for x, y in data.as_numpy_iterator()], dtype='int32'))
    #emb_layer = vocab.init_embed_layer()
    #emb_layer(sequence_input)
    #print(emb_layer(tf.convert_to_tensor([x for x, y in data.as_numpy_iterator()], dtype='int32')))

In [None]:
#@title train_data.py

#!/usr/bin/env ipython

#import torch
#import torch.nn as nn
#import torch.nn.functional as functional
#import torch.optim as optim
#from torch.utils.data import DataLoader
#from torchnet.meter import ConfusionMeter


import tensorflow as tf
from tensorflow.keras import optimizers, losses
import json

import os, random, sys, logging, argparse
from tqdm import tqdm
from pathlib import Path
os.chdir(os.path.dirname(__file__))

#from options import *
#from data import *
#from vocab import *
#from utils import *
#from models import *

#tf.logging.set_verbosity(tf.logging.INFO)
#tf.logging.set_verbosity(True)

#random.seed(opt.random_seed)
#torch.manual_seed(opt.random_seed)

# save logs
if not os.path.exists(opt.model_save_file): os.makedirs(opt.model_save_file)
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG if opt.debug else logging.INFO)
log = logging.getLogger(__name__)
fh = logging.FileHandler(os.path.join(opt.model_save_file, 'log.txt'))
log.addHandler(fh)

# output options
log.info('Training ADAN with options:')
log.info(opt)

def get_train_data(opt):
    #opt.n_vecs = 20000; opt.train_size_src = -1; opt.train_size_tgt = -1
    # vocab
    log.info(f'Loading Embeddings...')
    vocab = Vocab(opt.pre_trained_src_emb_file, opt.n_vecs)
    vocab.add_pre_trained_emb(opt.pre_trained_tgt_emb_file, opt.n_vecs)
    log.info(f'Done.')

    # datasets
    length = {}

    # src_lang datasets
    log.info(f'Loading src datasets...')
    reviews_src_obj = AmazonReviews(path=opt.data_path, max_seq_len=opt.max_seq_len)
    train_src = reviews_src_obj.load_data(lang=opt.src_lang, dat='train', lines=opt.train_size_src); length['train_src'] = len(train_src)
    dev_src = reviews_src_obj.load_data(lang=opt.src_lang, dat='dev', lines=-1); length['dev_src'] = len(dev_src)
    test_src = reviews_src_obj.load_data(lang=opt.src_lang, dat='test', lines=-1); length['test_src'] = len(test_src)
    log.info('Done loading src datasets.')

    # tgt_lang datasets
    log.info(f'Loading tgt datasets...')
    reviews_tgt_obj = AmazonReviews(path=opt.data_path, max_seq_len=opt.max_seq_len)
    train_tgt = reviews_tgt_obj.load_data(lang=opt.tgt_lang, dat='train', lines=opt.train_size_tgt); length['train_tgt'] = len(train_tgt)
    dev_tgt = reviews_tgt_obj.load_data(lang=opt.tgt_lang, dat='dev', lines=-1); length['dev_tgt'] = len(dev_tgt)
    test_tgt = reviews_tgt_obj.load_data(lang=opt.tgt_lang, dat='test', lines=-1); length['test_tgt'] = len(test_tgt)
    
    log.info('Done loading tgt datasets.')

    #opt.num_labels = max(reviews_src_obj.star_rating, reviews_tgt_obj.star_rating)
    if opt.max_seq_len < 0 or not opt.max_seq_len:
        maxlen_src, maxlen_tgt = max(list(len(x) for x in train_src)), max(list(len(x) for x in train_tgt))
        opt.max_seq_len = max(maxlen_src, maxlen_tgt)
    del reviews_src_obj, reviews_tgt_obj

    # pad src datasets (-> Dataset)
    log.info('Padding src datasets...')
    train_src = vocab.pad_sequences(train_src, max_len=opt.max_seq_len)
    dev_src = vocab.pad_sequences(dev_src, max_len=opt.max_seq_len)
    test_src = vocab.pad_sequences(test_src, max_len=opt.max_seq_len)
    log.info('Done padding src datasets...')

    # pad tgt datasets (-> Dataset)
    log.info('Padding tgt datasets...')
    train_tgt = vocab.pad_sequences(train_tgt, max_len=opt.max_seq_len)
    dev_tgt = vocab.pad_sequences(dev_tgt, max_len=opt.max_seq_len)
    test_tgt = vocab.pad_sequences(test_tgt, max_len=opt.max_seq_len)
    log.info('Done padding tgt datasets...')

    # dataset loaders
    log.info('Shuffling and batching...')
    train_src = train_src.shuffle(buffer_size=opt.buffer_size, reshuffle_each_iteration=True).batch(opt.batch_size).shuffle(length['train_src']//opt.batch_size).shuffle(length['train_src']//opt.batch_size).shuffle(length['train_src']//opt.batch_size)
    train_tgt = train_tgt.shuffle(buffer_size=opt.buffer_size, reshuffle_each_iteration=True).batch(opt.batch_size).shuffle(length['train_tgt']//opt.batch_size).shuffle(length['train_tgt']//opt.batch_size).shuffle(length['train_tgt']//opt.batch_size)
    with tf.device('CPU'):
        train_src_Q = tf.identity(train_src)
        train_tgt_Q = tf.identity(train_src)
    train_src_Q_iter = iter(train_src_Q)
    train_tgt_Q_iter = iter(train_tgt_Q)
    
    dev_src = dev_src.shuffle(buffer_size=opt.buffer_size, reshuffle_each_iteration=True).batch(opt.batch_size)
    dev_tgt = dev_tgt.shuffle(buffer_size=opt.buffer_size, reshuffle_each_iteration=True).batch(opt.batch_size)
    
    test_src = test_src.shuffle(buffer_size=opt.buffer_size, reshuffle_each_iteration=True).batch(opt.batch_size)
    test_tgt = test_tgt.shuffle(buffer_size=opt.buffer_size, reshuffle_each_iteration=True).batch(opt.batch_size)
    log.info('Done shuffling and batching.')

    return vocab, train_src, dev_src, test_src, train_tgt, dev_tgt, test_tgt, train_src_Q, train_tgt_Q, train_src_Q_iter, train_tgt_Q_iter, length

if __name__ == "__main__" and opt.notebook:
    # clear dumps
    tf.keras.backend.clear_session()
    tf.keras.backend.set_learning_phase(0)
    print(tf.keras.backend.learning_phase())
    vocab, train_src, dev_src, test_src, train_tgt, dev_tgt, test_tgt, train_src_Q, train_tgt_Q, train_src_Q_iter, train_tgt_Q_iter, length = get_train_data(opt)

In [12]:
#@title layers.py

#!/usr/bin/env ipython

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models

import numpy as np

#from options import *
#from vocab import *
#from data import *
#from utils import *

class Averaging(layers.Layer):
    def __init__(self, toks=None, vector_length=1, **kwargs):
        super(Averaging, self).__init__(**kwargs)
        self.toks = [opt.unk_idx, opt.bos_idx, opt.eos_idx] if toks is None else toks
        self.vl = vector_length

    def call(self, embeddings, lengths):
        self.W = tf.cast(tf.reduce_sum(embeddings, axis=1), dtype=tf.float32)            # (BSZ, EMBDIM)
        return self.W/tf.cast(tf.reshape(lengths, (-1, 1)), dtype=tf.float32) * self.vl   # (BSZ, EMBDIM)    #list(lengths.numpy())
    
if __name__ == "__main__" and not opt.notebook:
    infile = 'Amazon reviews/test/dataset_en_test.json'
    vocab = Vocab(opt.pre_trained_src_emb_file, vecs=10000)
    rev = AmazonReviews()
    data = rev.load_data(lang='en', dat='train', lines=100)
    data = vocab.pad_sequences(data)
    emb_layer = vocab.init_embed_layer()
    avg = Averaging()
    for x, l, y in data.take(10):
        print(avg(emb_layer(x), l))

In [13]:
#@title models.py

#!/usr/bin/env ipython

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers, preprocessing, losses
from tensorflow.keras import backend as K
from tensorflow.data import Dataset

import numpy as np
import os, io
from pathlib import Path
os.chdir(os.path.dirname(__file__))

#from options import opt
#from layers import *

def absexp_1(x):
    return tf.clip_by_value(tf.math.expm1(tf.cast(tf.abs(x), dtype=tf.float64)) * tf.sign(x), -1.7e+308, 1.7e+308)

def scce(y_true, y_pred):
    return losses.SparseCategoricalCrossentropy()(y_true, y_pred)

def hinge(ll_lang, ll_pred):
    return losses.Hinge()(ll_true, ll_pred)

def total_loss(y_ll_true, y_ll_pred):
    y_true, ll_true = zip(*y_ll_true)
    y_pred, ll_pred = zip(*y_ll_pred)
    return scce(y_true, y_pred) + opt._lambda * hinge(ll_true, ll_pred)

opt.num_labels = 6
opt._lambda = 1.
opt.F_layers = 2
opt.P_layers = 2
opt.Q_layers = 2
opt.F_activation = 'relu'
opt.P_activation = 'relu'
opt.Q_activation = 'relu'

num_layers, hidden_size, dropout, batch_norm, activation = opt.F_layers, opt.hidden_size, opt.dropout, opt.F_bn, opt.F_activation
F = keras.Sequential()
for i in range(num_layers):
    if dropout > 0: F.add(layers.Dropout(rate=dropout, name=f'Dropout_{i}'))
    if i == 0: F.add(layers.Dense(units=hidden_size, input_shape=(vocab.emb_size,), activation=activation, name=f'DenseAbsExp_{i}'))
    else: F.add(layers.Dense(units=hidden_size, input_shape=(hidden_size,), activation=activation, name=f'DenseAbsExp_{i}'))
    if batch_norm: F.add(layers.BatchNormalization(input_shape=(hidden_size,), name=f'BatchNorm_{i}'))    # same shape as input    # use training=False when making inference from model (model.predict, model.evaluate?)
#F.add(layers.LeakyReLU(alpha=0.3))
#F.add(layers.ReLU())
F.add(layers.Dense(units=hidden_size, input_shape=(hidden_size,), activation=activation, name=f'DenseAbsExpFinal_{i}'))

num_layers, hidden_size, output_size, dropout, batch_norm, activation = opt.P_layers, opt.hidden_size, opt.num_labels, opt.dropout, opt.P_bn, opt.P_activation
P = models.Sequential()
P.add(keras.Input((opt.hidden_size,)))
for i in range(num_layers):
    if dropout > 0: P.add(layers.Dropout(rate=dropout))
    P.add(layers.Dense(units=hidden_size, input_shape=(hidden_size,), activation=opt.activation, name=f'DenseAbsExp_{i}'))
    if batch_norm: P.add(layers.BatchNormalization())
    #P.add(layers.ReLU())
#P.add(layers.Dense(units=output_size, input_shape=(hidden_size,), activation='tanh'))
P.add(layers.Dense(units=output_size, input_shape=(hidden_size,), activation='softmax', name=f'DenseSoftmax_{i}'))

num_layers, hidden_size, dropout, batch_norm, activation = opt.Q_layers, opt.hidden_size, opt.dropout, opt.Q_bn, opt.Q_activation
Q = keras.Sequential()
for i in range(num_layers):
    if dropout > 0: Q.add(layers.Dropout(rate=dropout, name=f'Dropout_{i}'))
    Q.add(layers.Dense(units=hidden_size, input_shape=(hidden_size,), activation=activation, name=f'DenseAbsExp_{i}'))
    if batch_norm: Q.add(layers.BatchNormalization(input_shape=(hidden_size,), name=f'BathcNorm_{i}'))
Q.add(layers.Dense(units=hidden_size, input_shape=(hidden_size,), activation=activation, name=f'DenseAbsExpFinal_{i}'))
Q.add(layers.Dense(units=1, input_shape=(hidden_size,), activation='tanh', name=f'DenseTanh'))
#Q.add(layers.Softmax())

E = vocab.init_embed_layer()
A = Averaging(toks=[vocab.unk_idx, vocab.bos_idx, vocab.eos_idx], vector_length=opt.vector_length)

shape = (opt.max_seq_len,)
inputs, lengths = keras.Input(shape), keras.Input(())

embeddings = E(inputs)
outputs_EA = A(embeddings, lengths)
EA = keras.Model(inputs=[inputs, lengths], outputs=outputs_EA)

outputs_EAF = F(outputs_EA)
EAF = keras.Model(inputs=[inputs, lengths], outputs=outputs_EAF, name="FeatureExtractor_AE")

outputs_EAFP = P(outputs_EAF)
EAFP = keras.Model(inputs=[inputs, lengths], outputs=outputs_EAFP, name="SemanticClassifier_FAE")

outputs_EAFQ = Q(outputs_EAF)
EAFQ = keras.Model(inputs=[inputs, lengths], outputs=outputs_EAFQ, name="LanguageDetector_FAE")

LAN = keras.Model(inputs=[inputs, lengths], outputs=[outputs_EAFP, outputs_EAFQ], name="LAN")

In [None]:
#@title train.py : setup
TRAIN1 = True
if TRAIN1:
    log.info('Checking outputs and initializing...')
    E.trainable=False
    EA.trainable=False
    for (inputs, lengths, labels) in train_src.take(1).take(1):
        print(inputs)
        print(LAN([inputs, lengths]))

In [None]:
#@title Training statistics : learning_rates
opt.learning_rate, opt.Q_learning_rate = 1e-2, 1e-4  # 0.001 is default
opt.learning_rate, opt.Q_learning_rate

In [None]:
#@title Setting the embeddings non-trainable
E.trainable = True
EA.trainable = True

In [44]:
#@title Trainable layers for EAFP training with fixed embeddings
[x.name for x in EAFP.trainable_variables]

['DenseAbsExp_0/kernel:0',
 'DenseAbsExp_0/bias:0',
 'DenseAbsExp_1/kernel:0',
 'DenseAbsExp_1/bias:0',
 'DenseAbsExpFinal_1/kernel:0',
 'DenseAbsExpFinal_1/bias:0',
 'DenseAbsExp_0_1/kernel:0',
 'DenseAbsExp_0_1/bias:0',
 'DenseAbsExp_1_1/kernel:0',
 'DenseAbsExp_1_1/bias:0',
 'DenseSoftmax_1/kernel:0',
 'DenseSoftmax_1/bias:0']

In [None]:
#@title train.py : Training F and P : sparse categorical
EAFP.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
epochs = 5
if TRAIN1:
    log.info('Training EAFP model with src data with fixed embeddings...')
    for epoch in trange(epochs):
        batch_no = 0
        for (inputs, lengths, labels) in train_src:
            log.info(f"Training on batch no : {batch_no}"); batch_no += 1
            print(scce(labels, EAFP.predict([inputs, lengths])))
            history = EAFP.fit(x=[inputs, lengths], y=labels, epochs=1)

In [None]:
#@title  P results : Unseen target data loss and  accuracy
EAFP.evaluate([inputs_tgt, lengths_tgt], labels_tgt, verbose=2)

In [43]:
#@title Trainable layers for EAFQ training with fixed embeddings
[x.name for x in EAFQ.trainable_variables]

['DenseAbsExp_0/kernel:0',
 'DenseAbsExp_0/bias:0',
 'DenseAbsExp_1/kernel:0',
 'DenseAbsExp_1/bias:0',
 'DenseAbsExpFinal_1/kernel:0',
 'DenseAbsExpFinal_1/bias:0',
 'DenseAbsExp_0_2/kernel:0',
 'DenseAbsExp_0_2/bias:0',
 'DenseAbsExp_1_2/kernel:0',
 'DenseAbsExp_1_2/bias:0',
 'DenseAbsExpFinal_1_1/kernel:0',
 'DenseAbsExpFinal_1_1/bias:0',
 'DenseTanh/kernel:0',
 'DenseTanh/bias:0']

In [None]:
#@title train.py : Training F and Q : adversarial : hinge loss
#opt.Q_learning_rate = 1e-5
EAFQ.compile(optimizer=optimizers.Adam(opt.Q_learning_rate), loss='hinge', metrics=['accuracy'])
opt.Q_iterations = 5
if TRAIN1:
    log.info('Training EAFQ model with src and tgt data with lang_labels and fixed embeddings...')
    for epoch in trange(opt.Q_iterations):
        batch_no = 0
        for (inputs_src, lengths_src, labels_src), (inputs_tgt, lengths_tgt, labels_tgt) in zip(train_src, train_tgt):
            log.info(f"Training on batch no : {batch_no}"); batch_no += 1
            inputs = tf.concat([inputs_src, inputs_tgt], axis=0)
            lengths = tf.concat([lengths_src, lengths_tgt], axis=0)
            lang_labels_src = tf.broadcast_to([1], shape=labels_src.shape)
            lang_labels_tgt = tf.broadcast_to([-1], shape=labels_tgt.shape)
            lang_labels = tf.concat([lang_labels_src, lang_labels_tgt], axis=0)
            #print(lang_labels)
            history = EAFQ.fit(x=[inputs, lengths], y=lang_labels, epochs=1)

In [None]:
#@title Q results
EAFQ.compile(optimizer=optimizers.Adam(opt.Q_learning_rate), loss='hinge', metrics=['accuracy', keras.metrics.Hinge()])
print(EAFQ.predict([inputs_src, lengths_src]), '\n', EAFQ.evaluate([inputs_src, lengths_src], lang_labels_src, verbose=2), '\n', EAFQ.predict([inputs_tgt, lengths_tgt]), '\n', EAFQ.evaluate([inputs_tgt, lengths_tgt], lang_labels_tgt, verbose=2))

In [None]:
#@title  P results : Unseen target data loss and  accuracy after F-Q training
EAFP.evaluate([inputs_tgt, lengths_tgt], labels_tgt, verbose=2)

In [54]:
#@title Setting the embeddings trainable
E.trainable = True
EA.trainable = True

In [56]:
#@title Trainable layers for EAFP training with trainable embeddings
[x.name for x in EAFP.trainable_variables]

['embeddings:0',
 'DenseAbsExp_0/kernel:0',
 'DenseAbsExp_0/bias:0',
 'DenseAbsExp_1/kernel:0',
 'DenseAbsExp_1/bias:0',
 'DenseAbsExpFinal_1/kernel:0',
 'DenseAbsExpFinal_1/bias:0',
 'DenseAbsExp_0_1/kernel:0',
 'DenseAbsExp_0_1/bias:0',
 'DenseAbsExp_1_1/kernel:0',
 'DenseAbsExp_1_1/bias:0',
 'DenseSoftmax_1/kernel:0',
 'DenseSoftmax_1/bias:0']

In [None]:
#@title train.py : Training Embeddings, F and P : sparse categorical
EAFP.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
epochs = 5
if TRAIN1:
    log.info('Training EAFP model with src data with fixed embeddings...')
    for epoch in trange(epochs):
        batch_no = 0
        for (inputs, lengths, labels) in train_src:
            log.info(f" Training on batch no : {batch_no}"); batch_no += 1
            print(scce(labels, EAFP.predict([inputs, lengths])))
            history = EAFP.fit(x=[inputs, lengths], y=labels, epochs=1)

INFO:__main__:Training EAFP model with src data with fixed embeddings...


  0%|          | 0/5 [00:00<?, ?it/s][A[AINFO:__main__:Training on batch no : 0


tf.Tensor(2.8555007, shape=(), dtype=float32)


INFO:__main__:Training on batch no : 1


tf.Tensor(8.91328, shape=(), dtype=float32)
 328/1563 [=====>........................] - ETA: 29:36 - loss: 1.2935 - accuracy: 0.4892

In [None]:
#@title Trainable layers for EAFQ training with trainable embeddings
[x.name for x in EAFQ.trainable_variables]

In [None]:
#@title train.py : Training Embeddings, F and Q : adversarial : hinge loss
#opt.Q_learning_rate = 1e-5
EAFQ.compile(optimizer=optimizers.Adam(opt.Q_learning_rate), loss='hinge', metrics=['accuracy'])
opt.Q_iterations = 5
if TRAIN1:
    log.info('Training EAFQ model with src and tgt data with lang_labels and fixed embeddings...')
    for epoch in trange(opt.Q_iterations):
        batch_no = 0
        for (inputs_src, lengths_src, labels_src), (inputs_tgt, lengths_tgt, labels_tgt) in zip(train_src, train_tgt):
            log.info(f" Training on batch no : {batch_no}"); batch_no += 1
            inputs = tf.concat([inputs_src, inputs_tgt], axis=0)
            lengths = tf.concat([lengths_src, lengths_tgt], axis=0)
            lang_labels_src = tf.broadcast_to([1], shape=labels_src.shape)
            lang_labels_tgt = tf.broadcast_to([-1], shape=labels_tgt.shape)
            lang_labels = tf.concat([lang_labels_src, lang_labels_tgt], axis=0)
            history = EAFQ.fit(x=[inputs, lengths], y=lang_labels, epochs=1)

In [None]:
#@title Trainable layers for LAN training with trainable embeddings
[x.name for x in LAN.trainable_variables]

In [None]:
#@title train.py : Training F and Q : sparse categorical + adversarial : total loss
#opt.Q_learning_rate = 1e-5
EAFQ.compile(optimizer=optimizers.Adam(opt.learning_rate), loss='hinge', metrics=['accuracy'])
LAN.compile(optimizer=optimizers.Adam(opt.learning_rate), loss=total_loss, metrics=['accuracy'])
opt.Q_iterations = 5
if TRAIN1:
    log.info('Training EAFQ model with src and tgt data with lang_labels and fixed embeddings...')
    for epoch in trange(opt.Q_iterations):
        batch_no = 0
        for (inputs_src, lengths_src, labels_src), (inputs_tgt, lengths_tgt, labels_tgt) in zip(train_src, train_tgt):
            log.info(f" Training on batch no : {batch_no}"); batch_no += 1
            inputs = tf.concat([inputs_src, inputs_tgt], axis=0)
            lengths = tf.concat([lengths_src, lengths_tgt], axis=0)
            labels = tf.concat([labels_src, labels_tgt], axis=0)
            lang_labels_src = tf.broadcast_to([1], shape=labels_src.shape)
            lang_labels_tgt = tf.broadcast_to([-1], shape=labels_tgt.shape)
            lang_labels = tf.concat([lang_labels_src, lang_labels_tgt], axis=0)
            history_EAFQ = EAFQ.fit(x=[inputs, lengths], y=lang_labels, epochs=1)
            history_LAN = LAN.fit(x=[inputs_src, lengths_src], y=[labels_src, lang_labels_src], epochs=1)

In [None]:
#@title Sentiment classifier results on unseen target data
EAFP.evaluate(x=[inputs_tgt, lengths_tgt], y=labels_tgt)

In [None]:
#@title Overall LAN results on unseen target data
LAN.evaluate(x=[inputs_tgt, lengths_tgt], y=[labels_tgt, lang_labels_tgt])

In [None]:
#@title train.py (original thesis model - incomplete) - DO NOT RUN

#!/usr/bin/env ipython
# foo = open('train.py', 'r'); foo.readline(); exec(foo.read()); foo.close()
#import torch
#import torch.nn as nn
#import torch.nn.functional as functional
#import torch.optim as optim
#from torch.utils.data import DataLoader
#from torchnet.meter import ConfusionMeter


import tensorflow as tf
from tensorflow.keras import optimizers, losses
import json

import os, random, sys, logging, argparse
from tqdm import tqdm
from pathlib import Path
os.chdir(os.path.dirname(__file__))

#from options import *
#from data import *
#from vocab import *
#from utils import *
#from models import *
#from train_data import *

#tf.logging.set_verbosity(tf.logging.INFO)
#tf.logging.set_verbosity(True)

#random.seed(opt.random_seed)
#torch.manual_seed(opt.random_seed)

# save logs
if not os.path.exists(opt.model_save_file): os.makedirs(opt.model_save_file)
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG if opt.debug else logging.INFO)
log = logging.getLogger(__name__)
fh = logging.FileHandler(os.path.join(opt.model_save_file, 'log.txt'))
log.addHandler(fh)

#vars = ['vocab', 'train_src', 'dev_src', 'test_src', 'train_tgt', 'dev_tgt', 'test_tgt', 'train_src_Q', 'train_tgt_Q', 'train_src_Q_iter', 'train_tgt_Q_iter', 'length']
#for var in vars:
#    if var not in locals() and var not in globals(): print(var, 'not imported '); exit()
#    print(var, 'imported')


#def train(opt):
if __name__ == "__main__" and not TRAIN1:
    """Train Process:
Require => labeled SOURCE corpus Xsrc; unlabeled TARGET corpus Xtgt; Hyperpamameter λ > 0, k ∈ N, c > 0; Lp(ˆy, y) crossentropy loss.
=> Main iteration
repeat
    => Q iterations
    for qiter = 1 to k do
        Sample unlabeled batch x_src ~ X_src
        Sample unlabeled batch x_tgt ~ X_tgt
        f_src = F (x_src)
        f_tgt = F (x_tgt) . feature vectors
        lossq = -Q(f_src) + Q(f_tgt)
        Update Q parameters to minimize lossq
        ClipWeights(Q, -c, c)
    
    => F&P iteration
    Sample labeled batch (x_src, y_src) ~ Xsrc
    Sample unlabeled batch xtgt ~ Xtgt
    f_src = F (x_src)
    f_tgt = F (x_tgt)
    loss = Lp(P(f_src); y_src) + λ * (Q(f_src) - Q(f_tgt))
    Update F , P parameters to minimize loss
until convergence
    """
    DEBUG = lambda x : print('__DEBUG__ : ', x)
    # data
    if not opt.notebook: vocab, train_src, dev_src, test_src, train_tgt, dev_tgt, test_tgt, train_src_Q, train_tgt_Q, train_src_Q_iter, train_tgt_Q_iter, length = get_train_data(opt)

    # models
    log.info('Initializing models...')
    if opt.model.lower() == 'dan': F = DAN_Feature_Extractor(vocab, opt.F_layers, opt.hidden_size, opt.dropout, opt.F_bn)
    elif opt.model.lower() == 'lstm': F = LSTM_Feature_Extractor(vocab, opt.F_layers, opt.hidden_size, opt.dropout, opt.bdrnn, opt.attn)
    elif opt.model.lower() == 'cnn': F = CNN_Feature_Extractor(vocab, opt.F_layers, opt.hidden_size, opt.kernel_num, opt.kernel_sizes, opt.dropout)
    else: raise Exception('Unknown model')

    P = Sentiment_Classifier(opt.P_layers, opt.hidden_size, opt.num_labels, opt.dropout, opt.P_bn)
    Q = Language_Detector(opt.Q_layers, opt.hidden_size, opt.dropout, opt.Q_bn)
    log.info('Done...')

    optimizer_FP = Optimizer_FP(models=[F, P, Q], lr=opt.learning_rate, clip_lim=opt.clip_lim_FP)
    if not opt.clip_Q: optimizer_Q = optimizers.Adam(lr=opt.Q_learning_rate)
    else: optimizer_Q = optimizers.Adam(lr=opt.Q_learning_rate, clipvalue=opt.clipvalue)
    
    F.fcnet.compile(optimizer=optimizer_FP)
    P.net.compile(optimizer=optimizer_FP)
    Q.compile(optimizer=optimizer_Q)

    best_acc = 0.0
    # train tgt iterator
    train_tgt_iter = iter(train_tgt)
    log.info('Main Iteration begin...')
    """ Main iterations """
    for epoch in trange(opt.epochs):
        F.unfreeze()
        P.unfreeze()
        Q.unfreeze()
        F.freeze_emb_layer()
        
        # for training accuracy
        correct, total = 0, 0
        sum_src_q, sum_tgt_q = (0, 0.0), (0, 0.0)    # qiter number, loss_q
        grad_norm_p, grad_norm_q = (0, 0.0), (0, 0.0)
        
        # train src iterator
        train_src_iter = iter(train_src)
        log.info('Q iteration begin...')
        for i, (inputs_src, lengths_src, labels_src) in tqdm(enumerate(train_src_iter), total=(length['train_src'] + opt.batch_size - 1)//opt.batch_size):
            """ sample batches: labeled (xsrc, ysrc) in Xsrc """
            """ sample unlabeled xtgt in Xtgt """
            try:
                inputs_tgt, _, _ = next(train_tgt_iter)  # tgt labels not used
            except:
                # check if tgt data is exhausted
                train_tgt_iter = iter(train_tgt)
                inputs_tgt, _, _ = next(train_tgt_iter)
            
            """ Q iterations: """
            q_critic = 1 #opt.q_critic
            #if q_critic>0 and ((epoch==0 and i<=25) or (i%500==0)): q_critic = 10
            #F.freeze()
            #P.freeze()
            #Q.unfreeze()
            #F.freeze_emb_layer()
            #Q.clip_weights()

            for qiter in range(q_critic):
                """ sample unlabeled batches: xsrc in Xsrc, xtgt in Xtgt """
                # get a minibatch of data
                try:
                    # labels are not used
                    inputs_src_Q, lengths_src_Q, _ = next(train_src_Q_iter)
                except StopIteration:
                    # check if dataloader is exhausted
                    train_src_Q_iter = iter(train_src_Q)
                    inputs_src_Q, lengths_src_Q, _ = next(train_src_Q_iter)
                try:
                    inputs_tgt_Q, lengths_tgt_Q, _ = next(train_tgt_Q_iter)
                except StopIteration:
                    train_tgt_Q_iter = iter(train_tgt_Q)
                    inputs_tgt_Q, lengths_tgt_Q, _ = next(train_tgt_Q_iter)
                
                DEBUG(""" extract features : f_src, f_tgt = F(x_src), F(x_tgt) """)
                features_src = F(inputs_src_Q, lengths_src_Q)
                features_tgt = F(inputs_tgt_Q, lengths_tgt_Q)
                
                """ calculate loss_q : loss_q = -Q(f_src) + Q(f_tgt) """
                DEBUG(""" update Q to minimise loss_q """)
                l_src_ad = Q.train_step(features_src, 'src', loss='scce')['loss']
                l_tgt_ad = Q.train_step(features_tgt, 'tgt', loss='scce')['loss']
                # summed Q losses
                #sum_src_q = (sum_src_q[0] + 1, sum_src_q[1] + l_src_ad)
                #sum_tgt_q = (sum_tgt_q[0] + 1, sum_tgt_q[1] + l_tgt_ad)

                DEBUG(""" clip Q weights """)
                #Q.clip_weights()
            
            log.info('Q iteration done.')

            """ F&P iteration """
            #F.unfreeze()
            #P.unfreeze()
            #Q.freeze()
            #if opt.fix_emb: F.freeze_emb_layer()
            #elif epoch>3: F.unfreeze_emb_layer()
            #F.unfreeze_emb_layer()

            """ extract features : f_src, f_tgt = F(x_src), F(x_tgt) """
            DEBUG(""" calculate loss : loss = Lp(P(f_src); y_src) + λ * (Q(f_src) - Q(f_tgt)) """)
            #metrices = optimizer_FP.call(inputs_src, inputs_tgt, labels_src, labels_tgt=None, _lambda=opt._lambda, supervised=False)
            #pred = argmax32(o_src_sent)
            #total += len(labels_src)
            #correct += np.sum(pred == labels_src)

        #log.info('\n\nl_src_ad = \n' + str(l_src_ad))
        #log.info('\n\nl_tgt_ad = \n' + str(l_tgt_ad))
        #log.info(f'\n\n result :\n' + str(metrices))

    log.info('\nMain iteration done.')
    log.info(f' (Q(features_src) < Q(features_tgt)) : {np.sum(Q(features_src) < Q(features_tgt))}')
    log.info(f' (Q(features_src) > Q(features_tgt)) : {np.sum(Q(features_src) > Q(features_tgt))}')
    log.info(f' Q precision in differentiating src-tgt : {np.sum(Q(features_src) > Q(features_tgt)) / (np.sum(Q(features_src) < Q(features_tgt)) + np.sum(Q(features_src) > Q(features_tgt)))}')
    log.info(f' Q accuracy : unknown')
    #log.info(f'\n\n RESULT :\n' + str(metrices))

# train.py
#if __name__ == "__main__":
#    train(opt)