In [None]:
!pip install -q parlai
!pip install wandb

In [None]:
# !pip install git+https://github.com/thunlp/OpenMatch.git
!pip install transformers

In [None]:
# Import the Interactive script
from parlai.scripts.interactive import Interactive
import os
working_dir = "/content/drive/MyDrive/Dev/ECEN689/ECEN689"
print(working_dir)


# call it with particular args
Interactive.main(
    model_file=os.path.join(working_dir, "from_pretrained/model"),
    # model_file='zoo:dodecadialogue/empathetic_dialogues_ft/model',
    inference="beam", 
    beam_size=5, beam_min_length=10, beam_block_ngram=3, beam_context_block_ngram=3
)
# this is used to try my pre-trained transformer

In [6]:
import logging
import sys
import numpy as np
from sklearn.metrics import ndcg_score

class User:
    """
    this is used to simuated user in response to agent
    the user class is essentially a database of conversations
    it search for the generated response like greedy method, 
    """
    def __init__(self, dataset, cq_reward, cq_penalty, tolerance = 2, patience = 5):
        self.dataset = dataset
        self.tolerance = tolerance
        self.patience = patience
        self.anger = 0
        self.cq_reward = cq_reward
        self.cq_penalty = cq_penalty

    def respond_to_question(self, conversation_id, question):
        """
        responds to a sequence within the context of a give nconverstation
        returns 0 if the question is not in the converstation
        return 1 if the converstation in given converstaiton 
        """
        is_off_topic = True
        question_pos = 10000
        for pos, utterance in enumerate(self.dataset[conversation_id]):
            if question == utterance:
                is_off_topic = False
                question_pos = pos
        
        if is_off_topic:
            return 0
        else:
            try:
                return self.dataset[conversation_id][question_pos + 1]
            except:
                #logging.info('The question is the last utterance in the conversation.')
                return 1

    def initialize_state(self, conversation_id):
        '''
        Initialize the user state given the conversation id.
        '''            
        initial_query = self.dataset[conversation_id][0]
        try:
            initial_query = initial_query[:-1] if initial_query[-1] == '\n' else initial_query
        except:
            return ''
        self.anger = 0
        return initial_query
    
    def update_state(self, conversation_id, context, action, top_n_question, top_n_answer, use_top_k):
        '''
        Read the agent action and update the user state, compute reward and return them for save.
        The agent action should be 0 (retrieve an answer) or 1 (ask clarifying question)
        '''
        patience_used = 0
        if action == 0:
            # agent answer the question, evaluate the answer
            n = len(top_n_answer)
            context_ = context + ' [SEP] ' + top_n_answer[0]
            true_rel = [0] * n
            for i in range(n):
                try:
                    true_rel[i] = self.respond_to_question(conversation_id, top_n_answer[i])
                except:
                    print("Error in conversation: " + conversation_id)
            reward = 0
            for i, rel in enumerate(true_rel):
                try:
                    reward += rel/(i+1)
                except:
                    reward += 0
            
            if reward > 1:
                reward = 1
            return context_, reward, True, None, patience_used
        elif action == 1:
            # agent asks clarifying question, find corresponding answer in the dataset and return
            done = False
            correct_question_id = -1
            user_response_text = ''
            for qid in range(len(top_n_question)):
                response = self.respond_to_question(conversation_id, top_n_question[qid])
                if type(response) == int:
                    continue
                else:
                    if correct_question_id == -1:
                        #logging.info("Good CQ.")
                        correct_question_id = qid
                        user_response_text = response
            if 0 <= correct_question_id <= (use_top_k - 1):
                reward = self.cq_reward
                context_ = context + ' [SEP] ' + top_n_question[correct_question_id] + ' [SEP] ' + user_response_text
                patience_used = correct_question_id
            else:
                # the agent asks a bad question  
                reward = self.cq_penalty
                done = True
                context_ = context + ' [SEP] ' + top_n_question[0] + ' [SEP] ' + 'This question is not relevant.'
            return context_, reward, done, top_n_question[correct_question_id], patience_used


In [7]:
import json
import csv
import re
import glob

class ConversationDataset():
    '''
    The conversation database class. 
    '''
    def __init__(self, path_to_dataset, batch_size, max_size):
        self.batches = []
        self.max_len = 512
        print("Reading data from", path_to_dataset, "batch size", batch_size)
        all_data_list = glob.glob(path_to_dataset + '*')
        all_data_list.sort()
        all_data_list = all_data_list[:max_size] # max size
        files_in_batch = 0
        for data_file in all_data_list:
            f = open(data_file)
            data = f.readlines()
            data = [d.strip() for d in data]
            data_id = data_file.split('/')[-1]
            if files_in_batch == 0:
                self.batches.append({'conversations':{}, 'responses_pool':[], 'answers_pool':[]})
            
            self.batches[-1]['conversations'][data_id] = data
            for ut_num in range(len(data)):
                if ut_num % 2 and ut_num != (len(data) - 1) :
                    self.batches[-1]['responses_pool'].append(data[ut_num])
            self.batches[-1]['answers_pool'].append(data[-1])
            files_in_batch += 1
            if files_in_batch == batch_size:
                files_in_batch = 0

In [14]:
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch as T
import transformers
import warnings 
import random
import time
warnings.filterwarnings("ignore")

class LinearDeepQNetwork(nn.Module):
    '''
    The linear deep Q network used by the agent.
    '''
    def __init__(self, lr, lr_decay, weight_decay, n_actions, input_dims, hidden_size = 16):
        super(LinearDeepQNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dims, hidden_size)
        self.fc2 = nn.Linear(hidden_size, n_actions)

        self.optimizer = optim.Adam(self.parameters(), lr=lr, weight_decay = weight_decay)
        self.loss = nn.MSELoss()
        self.device = T.device('cuda' if T.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state):
        hidden = F.relu(self.fc1(state))
        score = self.fc2(hidden)

        return score

class LinearDeepNetwork(nn.Module):
    '''
    The linear deep network used by the agent.
    '''
    def __init__(self, lr, lr_decay, weight_decay, n_actions, input_dims, hidden_size = 16):
        super(LinearDeepNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dims, hidden_size)
        self.fc2 = nn.Linear(hidden_size, n_actions)

        self.optimizer = optim.Adam(self.parameters(), lr=lr, weight_decay = weight_decay)
        self.loss = nn.MSELoss()
        self.device = T.device('cuda' if T.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state):
        hidden = F.relu(self.fc1(state))
        score = F.softmax(self.fc2(hidden))

        return score


class Agent():
    '''
    The conversational QA agent.
    '''
    def __init__(self, input_dims, n_actions, lr, gamma=0.25, lr_decay = 1e-10, weight_decay = 1e-3,
                 epsilon=1.0, eps_dec=1e-3, eps_min=0.01, top_k = 1, data_augment = 10):
        self.lr = lr
        self.lr_decay = lr_decay
        self.input_dims = input_dims
        self.n_actions = n_actions
        self.gamma = gamma
        self.weight_decay = weight_decay 
        self.epsilon = epsilon
        self.eps_dec = eps_dec
        self.eps_min = eps_min
        self.top_k = top_k
        self.data_augment = data_augment
        self.action_space = [i for i in range(self.n_actions)]
        self.experiences = []
        self.experiences_replay_times = 3
        self.loss_history = []

        self.Q = LinearDeepQNetwork(self.lr, self.lr_decay, self.weight_decay, self.n_actions, self.input_dims)
        self.device = T.device("cuda")
        self.Q.to(self.device)

    def choose_action(self, query_embedding, context_embedding, questions_embeddings, answers_embeddings, question_scores, answer_scores):
        encoded_q = questions_embeddings[0]
        for i in range(1, self.top_k):
            encoded_q = T.cat((encoded_q, questions_embeddings[i]), dim=0)
            
        encoded_state = T.cat((query_embedding, context_embedding), dim=0)
        encoded_state = T.cat((encoded_state, encoded_q), dim=0)
        encoded_state = T.cat((encoded_state, answers_embeddings[0]), dim=0)
        encoded_state = T.cat((encoded_state, question_scores[:self.top_k]), dim=0)
        encoded_state = T.cat((encoded_state, answer_scores[:1]), dim=0)
    
        if np.random.random() > self.epsilon:
            state = T.tensor(encoded_state, dtype=T.float).to(self.device)
            actions = self.Q.forward(state)
            action = T.argmax(actions).item()
        else:
            action = np.random.choice(self.action_space)
        return action

    def decrement_epsilon(self):
        self.epsilon = self.epsilon - self.eps_dec if self.epsilon > self.eps_min else self.eps_min


    def joint_learn(self, state, a_reward, q_reward, state_):
        # save to experiences for experience replay
        
        self.experiences.append([state, a_reward, q_reward, state_])
        
        if a_reward < q_reward:
            for da in range(self.data_augment):
                self.experiences.append([state, a_reward, q_reward, state_])
        
        # sample from past experiences
        exps = random.sample(self.experiences, min(self.experiences_replay_times, len(self.experiences)))
        exps.append([state, a_reward, q_reward, state_])

        for exp in exps:
            state, a_reward, q_reward, state_ = exp[0], exp[1], exp[2], exp[3]

            query_embedding, context_embedding, questions_embeddings, answers_embeddings, question_scores, answer_scores = state[0], state[1], state[2], state[3], state[4], state[5]
            query_embedding, context_embedding_, questions_embeddings_, answers_embeddings_, question_scores_, answer_scores_ = state_[0], state_[1], state_[2], state_[3], state_[4], state_[5]

            encoded_q = questions_embeddings[0]
            for i in range(1, self.top_k):
                encoded_q = T.cat((encoded_q, questions_embeddings[i]), dim=0)

            encoded_state = T.cat((query_embedding, context_embedding), dim=0)
            encoded_state = T.cat((encoded_state, encoded_q), dim=0)
            encoded_state = T.cat((encoded_state, answers_embeddings[0]), dim=0)
            encoded_state = T.cat((encoded_state, question_scores[:self.top_k]), dim=0)
            encoded_state = T.cat((encoded_state, answer_scores[:1]), dim=0) 

            encoded_state_ = None
            if questions_embeddings_ is not None and answers_embeddings_ is not None:
                encoded_q_ = questions_embeddings_[0]
                for i in range(1, self.top_k):
                    encoded_q_ = T.cat((encoded_q_, questions_embeddings_[i]), dim=0)
                
                encoded_state_ = T.cat((query_embedding, context_embedding_), dim=0)
                encoded_state_ = T.cat((encoded_state_, encoded_q_), dim=0)
                encoded_state_ = T.cat((encoded_state_, answers_embeddings_[0]), dim=0)
                encoded_state_ = T.cat((encoded_state_, question_scores_[:self.top_k]), dim=0)
                encoded_state_ = T.cat((encoded_state_, answer_scores_[:1]), dim=0)
            
            self.Q.optimizer.zero_grad()
            states = T.tensor(encoded_state, dtype=T.float).to(self.device)
            a_rewards = T.tensor(a_reward).to(self.device)
            q_rewards = T.tensor(q_reward).to(self.device)
            states_ = T.tensor(encoded_state_, dtype=T.float).to(self.device) if encoded_state_ is not None else None

            pred = self.Q.forward(states)
            q_next = self.Q.forward(states_).max() if encoded_state_ is not None else T.tensor(0).to(self.device)
            q_target = T.tensor([a_rewards, q_rewards + self.gamma*q_next]).to(self.device) if encoded_state_ is not None else T.tensor([a_rewards, q_rewards]).to(self.device)

            loss = self.Q.loss(q_target, pred).to(self.device)
            # l1 penalty
            l1 = 0
            for p in self.Q.parameters():
                l1 += p.abs().sum()
            
            loss = loss + self.weight_decay * l1
            self.loss_history.append(loss.item())
            loss.backward()
            self.Q.optimizer.step()     
            
        self.decrement_epsilon()


class BaseAgent():
    '''
    The Baseline conversational QA agent.
    '''
    def __init__(self, input_dims, n_actions, lr, lr_decay = 1e-10, weight_decay = 1e-3):
        self.lr = lr
        self.lr_decay = lr_decay
        self.input_dims = input_dims
        self.n_actions = n_actions
        self.weight_decay = weight_decay 
        self.loss_history = []

        self.Q = LinearDeepNetwork(self.lr, self.lr_decay, self.weight_decay, self.n_actions, self.input_dims)
        self.device = T.device("cuda")
        self.Q.to(self.device)

    def choose_action(self, query_embedding, context_embedding):
        
        encoded_state = T.cat((query_embedding, context_embedding), dim=0)
        state = T.tensor(encoded_state, dtype=T.float).to(self.device)
        actions = self.Q.forward(state)
        action = T.argmax(actions).item()
        
        return action

    def learn(self, query_embedding, context_embedding, true_label):
        # save to experiences for experience replay     
        encoded_state = T.cat((query_embedding, context_embedding), dim=0)
       
        self.Q.optimizer.zero_grad()
        states = T.tensor(encoded_state, dtype=T.float).to(self.device)
        
        pred = self.Q.forward(states)
        q_target = T.tensor([1, 0]).to(self.device) if true_label == 0 else T.tensor([0, 1]).to(self.device)
        loss = self.Q.loss(q_target, pred).to(self.device)
            
        # l1 penalty
        l1 = 0
        for p in self.Q.parameters():
            l1 += p.abs().sum()
            
        loss = loss + self.weight_decay * l1
            
        self.loss_history.append(loss.item())

        loss.backward()
        self.Q.optimizer.step()     
            


class ScoreAgent():
    '''
    using only the ranking scores.
    '''
    def __init__(self, input_dims, n_actions, lr, gamma=0.25, lr_decay = 1e-10, weight_decay = 1e-3,
                 epsilon=1.0, eps_dec=1e-3, eps_min=0.01, top_k = 1, data_augment = 10):
        self.lr = lr
        self.lr_decay = lr_decay
        self.input_dims = input_dims
        self.n_actions = n_actions
        self.gamma = gamma
        self.weight_decay = weight_decay 
        self.epsilon = epsilon
        self.eps_dec = eps_dec
        self.eps_min = eps_min
        self.top_k = top_k
        self.data_augment = data_augment
        self.action_space = [i for i in range(self.n_actions)]
        self.experiences = []
        self.experiences_replay_times = 3
        self.loss_history = []

        self.Q = LinearDeepQNetwork(self.lr, self.lr_decay, self.weight_decay, self.n_actions, self.input_dims)
        self.device = T.device("cuda")
        self.Q.to(self.device)

    def choose_action(self, question_scores, answer_scores):
        question_scores = T.tensor(question_scores)
        answer_scores = T.tensor(answer_scores)
        encoded_state = T.cat((question_scores[:self.top_k], answer_scores[:1]), dim=0)

        if np.random.random() > self.epsilon:
            'if the random number is greater than exploration threshold, choose the action maximizing Q'
            state = T.tensor(encoded_state, dtype=T.float).to(self.device)
            actions = self.Q.forward(state)
            #print(actions)
            action = T.argmax(actions).item()
        else:
            'randomly choosing an action'
            action = np.random.choice(self.action_space)
        return action

    def decrement_epsilon(self):
        self.epsilon = self.epsilon - self.eps_dec if self.epsilon > self.eps_min else self.eps_min


    def joint_learn(self, state, a_reward, q_reward, state_):
        # save to experiences for experience replay
        
        self.experiences.append([state, a_reward, q_reward, state_])
        
        if a_reward < q_reward:
            for da in range(self.data_augment):
                self.experiences.append([state, a_reward, q_reward, state_])
        
        # sample from past experiences
        exps = random.sample(self.experiences, min(self.experiences_replay_times, len(self.experiences)))
        exps.append([state, a_reward, q_reward, state_])

        for exp in exps:
            state, a_reward, q_reward, state_ = exp[0], exp[1], exp[2], exp[3]

            question_scores, answer_scores = state[0], state[1]
            question_scores_, answer_scores_ = state_[0], state_[1]

            encoded_state = T.cat((question_scores[:self.top_k], answer_scores[:1]), dim=0)
            if question_scores_ is not None and answer_scores_ is not None:
                encoded_state_ = T.cat((question_scores_[:self.top_k], answer_scores_[:1]), dim=0)
            else:
                encoded_state_ = None

            self.Q.optimizer.zero_grad()
            states = T.tensor(encoded_state, dtype=T.float).to(self.device)
            a_rewards = T.tensor(a_reward).to(self.device)
            q_rewards = T.tensor(q_reward).to(self.device)
            states_ = T.tensor(encoded_state_, dtype=T.float).to(self.device) if encoded_state_ is not None else None

            pred = self.Q.forward(states)
            q_next = self.Q.forward(states_).max() if encoded_state_ is not None else T.tensor(0).to(self.device)
            q_target = T.tensor([a_rewards, q_rewards + self.gamma*q_next]).to(self.device) if encoded_state_ is not None else T.tensor([a_rewards, q_rewards]).to(self.device)

            loss = self.Q.loss(q_target, pred).to(self.device) 
            # l1 penalty
            l1 = 0
            for p in self.Q.parameters():
                l1 += p.abs().sum()
            
            loss = loss + self.weight_decay * l1
            self.loss_history.append(loss.item())
            loss.backward()
            self.Q.optimizer.step()     
        self.decrement_epsilon()



class TextAgent():
    '''
    Using only the encoded text.
    '''
    def __init__(self, input_dims, n_actions, lr, gamma=0.25, lr_decay = 1e-10, weight_decay = 1e-3,
                 epsilon=1.0, eps_dec=1e-3, eps_min=0.01, top_k = 1, data_augment = 10):
        self.lr = lr
        self.lr_decay = lr_decay
        self.input_dims = input_dims
        self.n_actions = n_actions
        self.gamma = gamma
        self.weight_decay = weight_decay 
        self.epsilon = epsilon
        self.eps_dec = eps_dec
        self.eps_min = eps_min
        self.top_k = top_k
        self.data_augment = data_augment
        self.action_space = [i for i in range(self.n_actions)]
        self.experiences = []
        self.experiences_replay_times = 3
        self.loss_history = []

        self.Q = LinearDeepQNetwork(self.lr, self.lr_decay, self.weight_decay, self.n_actions, self.input_dims)
        self.device = T.device("cuda")
        self.Q.to(self.device)

    def choose_action(self, query_embedding, context_embedding, questions_embeddings, answers_embeddings):
        # Encode text
        
        encoded_q = questions_embeddings[0]
        for i in range(1, self.top_k):
            encoded_q = T.cat((encoded_q, questions_embeddings[i]), dim=0)
        encoded_state = T.cat((query_embedding, context_embedding), dim=0)
        encoded_state = T.cat((encoded_state, encoded_q), dim=0)
        encoded_state = T.cat((encoded_state, answers_embeddings[0]), dim=0)
        
        if np.random.random() > self.epsilon:
            'if the random number is greater than exploration threshold, choose the action maximizing Q'
            state = T.tensor(encoded_state, dtype=T.float).to(self.device)
            actions = self.Q.forward(state)
            #print(actions)
            action = T.argmax(actions).item()
        else:
            'randomly choosing an action'
            action = np.random.choice(self.action_space)
        return action

    def decrement_epsilon(self):
        self.epsilon = self.epsilon - self.eps_dec if self.epsilon > self.eps_min else self.eps_min


    def joint_learn(self, state, a_reward, q_reward, state_):
        # save to experiences for experience replay
        
        self.experiences.append([state, a_reward, q_reward, state_])
        
        if a_reward < q_reward:
            for da in range(self.data_augment):
                self.experiences.append([state, a_reward, q_reward, state_])
        
        # sample from past experiences
        exps = random.sample(self.experiences, min(self.experiences_replay_times, len(self.experiences)))
        exps.append([state, a_reward, q_reward, state_])

        for exp in exps:
            state, a_reward, q_reward, state_ = exp[0], exp[1], exp[2], exp[3]

            query_embedding, context_embedding, questions_embeddings, answers_embeddings= state[0], state[1], state[2], state[3]
            query_embedding, context_embedding_, questions_embeddings_, answers_embeddings_ = state_[0], state_[1], state_[2], state_[3]

            encoded_q = questions_embeddings[0]
            for i in range(1, self.top_k):
                encoded_q = T.cat((encoded_q, questions_embeddings[i]), dim=0)

            encoded_state = T.cat((query_embedding, context_embedding), dim=0)
            encoded_state = T.cat((encoded_state, encoded_q), dim=0)
            encoded_state = T.cat((encoded_state, answers_embeddings[0]), dim=0)

            encoded_state_ = None
            if questions_embeddings_ is not None and answers_embeddings_ is not None:

                encoded_q_ = questions_embeddings_[0]
                for i in range(1, self.top_k):
                    encoded_q_ = T.cat((encoded_q_, questions_embeddings_[i]), dim=0)
                
                encoded_state_ = T.cat((query_embedding, context_embedding_), dim=0)
                encoded_state_ = T.cat((encoded_state_, encoded_q_), dim=0)
                encoded_state_ = T.cat((encoded_state_, answers_embeddings_[0]), dim=0)
            
            self.Q.optimizer.zero_grad()
            states = T.tensor(encoded_state, dtype=T.float).to(self.device)
            a_rewards = T.tensor(a_reward).to(self.device)
            q_rewards = T.tensor(q_reward).to(self.device)
            states_ = T.tensor(encoded_state_, dtype=T.float).to(self.device) if encoded_state_ is not None else None

            pred = self.Q.forward(states)
            q_next = self.Q.forward(states_).max() if encoded_state_ is not None else T.tensor(0).to(self.device)
            q_target = T.tensor([a_rewards, q_rewards + self.gamma*q_next]).to(self.device) if encoded_state_ is not None else T.tensor([a_rewards, q_rewards]).to(self.device)

            loss = self.Q.loss(q_target, pred).to(self.device)
            # l1 penalty
            l1 = 0
            for p in self.Q.parameters():
                l1 += p.abs().sum()
            
            loss = loss + self.weight_decay * l1
            self.loss_history.append(loss.item())
            loss.backward()
            self.Q.optimizer.step()     
        self.decrement_epsilon()

In [28]:
cp /content/drive/MyDrive/Dev/ECEN689/ECEN689/interactive.py ./

In [29]:
import logging
import numpy as np
import random
import json
import resource
import csv
from transformers import AutoTokenizer, AutoModel
from scipy.special import softmax
import sys
import time
from interactive import Interactive, rerank
from copy import deepcopy
import argparse
import psutil


observation_dim = 768
action_num = 2
cq_reward = 0.11
cq_penalty = cq_reward - 1
agent_gamma = -cq_penalty
train_iter = 50
batch_size = 100
max_round = 5 # max conversation round
max_train_size = 10000
max_test_size = int(0.25*max_train_size)

def limit_memory(maxsize): 
    soft, hard = resource.getrlimit(resource.RLIMIT_AS) 
    resource.setrlimit(resource.RLIMIT_AS, (maxsize, hard)) 

def generate_embedding_no_grad(text, tokenizer, embedding_model):
    with T.no_grad():
        tokenized_context_ = T.tensor([tokenizer.encode(text, add_special_tokens=True)])
        context_embedding_ = T.squeeze(embedding_model(tokenized_context_)[0])[0] 
        del tokenized_context_
        return context_embedding_

def read_from_memory(query, context, memory):
    return memory[query]['embedding'], memory[query][context]['embedding'],\
        memory[query][context]['questions'], memory[query][context]['answers'],\
        memory[query][context]['questions_embeddings'],memory[query][context]['answers_embeddings'],\
        memory[query][context]['questions_scores'], memory[query][context]['answers_scores']

def save_to_memory(query, context, memory, questions, answers, questions_scores, answers_scores, tokenizer, embedding_model):
    if query not in memory.keys():
        memory[query] = {}
        with T.no_grad():
            tokenized_query = T.tensor([tokenizer.encode(query, add_special_tokens=True)])
            memory[query]['embedding'] = T.squeeze(embedding_model(tokenized_query)[0])[0]
    
    memory[query][context] = {}
    with T.no_grad():
        memory[query][context]['embedding'] = T.squeeze(embedding_model(T.tensor([tokenizer.encode(context, add_special_tokens=True)]))[0])[0]
        memory[query][context]['questions_embeddings'] = [T.squeeze(embedding_model(T.tensor([tokenizer.encode(questions[i], add_special_tokens=True)]))[0])[0] for i in range(3)]
        memory[query][context]['answers_embeddings'] = [T.squeeze(embedding_model(T.tensor([tokenizer.encode(answers[0], add_special_tokens=True)]))[0])[0]]
    memory[query][context]['questions'] = questions
    memory[query][context]['answers'] = answers
    memory[query][context]['questions_scores'] = T.tensor(questions_scores)
    memory[query][context]['answers_scores'] = T.tensor(answers_scores)
    return memory

def generate_batch_question_candidates(batch, conversation_id, ignore_questions, total_candidates):
    positives = [batch['conversations'][conversation_id][turn_id] for turn_id in range(len(batch['conversations'][conversation_id])) if turn_id % 2 == 1 and turn_id != len(batch['conversations'][conversation_id])-1]
    filtered_positives = [cand for cand in positives if cand not in ignore_questions]
    negatives = [response for response in batch['responses_pool'] if response not in positives][:total_candidates - len(filtered_positives)]
    return filtered_positives + negatives

def generate_batch_answer_candidates(batch, conversation_id, total_candidates):
    positives = [batch['conversations'][conversation_id][-1]]
    negatives = [answer for answer in batch['answers_pool'] if answer not in positives][:total_candidates - len(positives)] 
    return positives + negatives

def main(args):
    logging.getLogger().setLevel(logging.INFO)
    limit_memory(1e11)

    random.seed(2020)
    somestuff = None
    if args.cv != -1:
        train_dataset = somestuff
        test_dataset = somestuff
    else:
        train_dataset = somestuff
        test_dataset = seomstuff
    agent = Agent(lr = 1e-4, 
                  input_dims = (3 + args.topn) * observation_dim + 1 + args.topn, 
                  top_k = args.topn, 
                  n_actions=action_num, gamma = agent_gamma, weight_decay = 0.01)
    score_agent = ScoreAgent(lr = 1e-4, 
                             input_dims = 1 + args.topn, 
                             top_k = args.topn, 
                             n_actions=action_num, 
                             gamma = agent_gamma, 
                             weight_decay = 0.0)
    text_agent = TextAgent(lr = 1e-4, 
                           input_dims = (3 + args.topn) * observation_dim,
                           top_k = args.topn, n_actions=action_num, 
                           gamma = agent_gamma, 
                           weight_decay = 0.01)
    base_agent = BaseAgent(lr = 1e-4, 
                           input_dims = 2 * observation_dim, 
                           n_actions = 2, weight_decay = 0.01)
    question_reranker = Interactive.main(
        model_file=os.path.join(working_dir, "from_pretrained/model"),
    )
    



In [34]:
# print(Interactive.help(model_file=os.path.join(working_dir, "from_pretrained/model")))
Interactive.main(
    model_file=os.path.join(working_dir, "from_pretrained/model")
)

22:19:16 | [33mOverriding opt["model_file"] to /content/drive/MyDrive/Dev/ECEN689/ECEN689/from_pretrained/model (previously: from_pretrained/model)[0m
22:19:16 | Using CUDA
22:19:16 | loading dictionary from /content/drive/MyDrive/Dev/ECEN689/ECEN689/from_pretrained/model.dict
22:19:16 | num words = 54944
22:19:16 | TransformerGenerator: full interactive mode on.
22:19:18 | Total parameters: 87,508,992 (87,508,992 trainable)
22:19:18 | Loading existing model params from /content/drive/MyDrive/Dev/ECEN689/ECEN689/from_pretrained/model
[1;31mEnter [DONE] if you want to end the episode, [EXIT] to quit.[0;0m
22:19:35 | creating task(s): interactive


<parlai.tasks.interactive.worlds.InteractiveWorld at 0x7f9cb716ac90>