# Interview experiments for blenderbot

In order to get Freja to act as an inteviewer we have to constrain her. Blenderbot(the model) is trained as an open-domain chat bot, which makes her very explorative when it comes to asking questions.

#### Approach:
- Hard code a set of interview questions
- System for determining when it's time to move on to next question
- Experiment with the dialogue context she gets at each pass of the conversation


## Google translate for translating text back and forth

In [1]:
from googletrans import Translator

translator = Translator()
def sv_to_en(text):
    # Translates text from swedish to english
    out = translator.translate(text,src='sv',dest='en')
    return out.text

def en_to_sv(text):
    # Translate text from english to swedish
    out = translator.translate(text,src='en',dest='sv')
    return out.text


# Blenderbot

In [2]:
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
from torch import no_grad
mname = 'facebook/blenderbot-1B-distill' # options: 'facebook/blenderbot_small-90M' , 'facebook/blenderbot-400M-distill' ,'facebook/blenderbot-3B'
model = BlenderbotForConditionalGeneration.from_pretrained(mname)
tokenizer = BlenderbotTokenizer.from_pretrained(mname)

## BlenderConversation is a class for storing the conversation with blenderbot

In [15]:
class BlenderConversation:
    
    def __init__(self,lang,description='No description'):
        self.lang = lang
        self.description = description
        self.bot_text = []
        self.user_text = []
        self.user_turn = True
        
        
    def add_user_text(self,text):
        if self.user_turn:
            self.user_text.append(text)
            self.user_turn = False
        else:
            raise ValueError("It's the bot's turn to add a reply to the conversation")
        return
    
    def add_bot_text(self,text):
        if not self.user_turn:
            self.bot_text.append(text)
            self.user_turn = True
        else:
            raise ValueError("It's the user's turn to add an input to the conversation")
        return
    
    def pop(self):
        if self.user_turn:
            self.bot_text.pop()
            self.user_turn = False
        else:
            self.user_text.pop()
            self.user_turn = True
        return
    
    def get_bot_replies():
        return self.bot_text
    
    def get_user_replies():
        return self.user_text
        
    def get_dialogue_history(self,max_len=100):
        # Returns string of the dialogue history with bot and user inputs separated with '\n'
        # max_len set to default 110 as model has max input length 128 and we want some space for new input 
        history = ''
        tokens_left = max_len
        if self.user_turn:
            # Start backwards from bot_text
            for i in reversed(range(len(self.user_text))):
                bot_text = self.bot_text[i]
                user_text = self.user_text[i]
                nbr_tokens = len(tokenizer(bot_text)['input_ids'])  + len(tokenizer(user_text)['input_ids'])
                if  nbr_tokens < tokens_left: # This is not fool proof as the model tokenizer tokenizes differently
                    history = user_text + '\n' + bot_text + '\n' + history
                    tokens_left -= (nbr_tokens + 2)
                else:
                    break
                                
        else:
            # Start backwards from user_text
            history = self.user_text[-1]
            tokens_left -= len(tokenizer(history)['input_ids'])
            for i in reversed(range(len(self.user_text)-1)):
                bot_text = self.bot_text[i]
                user_text = self.user_text[i]
                nbr_tokens = len(bot_text.split()) + len(user_text.split())
                if  nbr_tokens < tokens_left: # This is not fool proof as the model tokenizer tokenizes differently
                    history = user_text + '\n' + bot_text + '\n' + history
                    tokens_left -= (nbr_tokens + 2)
                else:
                    break
        return history
        
    def to_txt(self,file='None'):
        # Writes the dialogue to txt file in subdirectory
        text = '####################################\n' + 'Conversation description: ' + self.description + '\n\n'
        if self.user_turn:
            for i in range(len(self.user_text)):
                text = text + 'User>>> '+ self.user_text[i] + '\n Bot>>> ' + self.bot_text[i] + '\n'
        else:
            for i in range(len(self.bot_text)):
                text = text + 'User>>> '+ self.user_text[i] + '\n Bot>>> ' + self.bot_text[i] + '\n'
            text = text + 'User>>> ' + self.user_text[-1]
        
        if file is None:
            if self.lang == 'sv':
                file = 'interview_sv.txt'
            else:
                file = 'interview_en.txt'
        
        text = text + '\n\n'
        file_path = '02_interview_output/' + file
        with open(file_path,'a') as f:
            f.write(text)
        return
         
    
    def print_dialogue(self):
        # Prints the dialogue 
        text = ''
        if self.user_turn:
            for i in range(len(self.user_text)):
                text = text + 'User>>> '+ self.user_text[i] + '\n Bot>>> ' + self.bot_text[i] + '\n'
        else:
            for i in range(len(self.bot_text)):
                text = text + 'User>>> '+ self.user_text[i] + '\n Bot>>> ' + self.bot_text[i] + '\n'
            text = text + 'User>>> ' + self.user_text[-1]
        print(text)
        return


def strip_token(line):
    # Removes SOS and EOS tokens from blenderbot reply
    line = line.replace('<s>','')
    line = line.replace('</s>','')
    return line


##  Helper functions for regulating the context 
#### First a helper function for taking care of the conversation

In [5]:
def blender_history_chatting(user_input,convo_sv,convo_en):
    #assert convo_sv.lang == 'en' and convo_en.lang =='sv'
    
    translated_user_input = sv_to_en(user_input)
    nbr_tokens = len(tokenizer(translated_user_input)['input_ids'])
    max_len = 126 - nbr_tokens
    #set_trace()
    context = convo_en.get_dialogue_history(max_len=max_len)
    
    convo_sv.add_user_text(user_input)
    convo_en.add_user_text(translated_user_input)
    
    model_input = context + '\n' + translated_user_input

    with no_grad():
        inputs = tokenizer([model_input], return_tensors='pt')
        reply_ids = model.generate(**inputs)

        reply = strip_token(tokenizer.batch_decode(reply_ids)[0]) # Decodes tokens and strips <s> and </s>  
    
    convo_en.add_bot_text(reply)
    convo_sv.add_bot_text(en_to_sv(reply))
    convo_sv.print_dialogue()
    return

####  Subject chatting helper

In [4]:
def blender_subject_chatting(user_input,convo_sv,convo_en):
    #assert convo_sv.lang == 'en' and convo_en.lang =='sv'
    
    translated_user_input = sv_to_en(user_input)
    
    # Add inputs to conversations
    convo_sv.add_user_text(user_input)
    convo_en.add_user_text(translated_user_input)
    
    if len(convo_en.bot_text) == 0:
        model_input = subject + '\n' +  translated_user_input
    else:
        model_input = subject + '\n' + convo_en.bot_text[-1] + '\n' +  translated_user_input

    with no_grad():
        inputs = tokenizer([model_input], return_tensors='pt')
        reply_ids = model.generate(**inputs)

        reply = strip_token(tokenizer.batch_decode(reply_ids)[0]) # Decodes tokens and strips <s> and </s>  
    
    convo_en.add_bot_text(reply)
    convo_sv.add_bot_text(en_to_sv(reply))
    convo_sv.print_dialogue()
    return



## Class for keeping track of the interview and 

In [14]:
# Just a place to store questions instead of having txt files for now
questions = ['Du har sökt jobbet som {}. Vad är det som du tycker verkar vara roligt med detta arbetet?', 
                       'Om du ska arbeta som {} så är det bra om du har erfarenhet från YY. Kan du berätta lite om du har sådan erfarenhet?',
                       'Vad är din bästa erfarenhet från dina tidigare arbeten?',
                       'Vad gör att du skulle passa bra som {}?',
                       'Är det någonting som du vill fråga om detta arbetet?']
format_question = [1,1,0,1,0]
interview_questions= zip(questions,format_question)
test = [question.format('car mech') if to_format else question for (question, to_format) in interview_questions]
test

['Du har sökt jobbet som car mech. Vad är det som du tycker verkar vara roligt med detta arbetet?',
 'Om du ska arbeta som car mech så är det bra om du har erfarenhet från YY. Kan du berätta lite om du har sådan erfarenhet?',
 'Vad är din bästa erfarenhet från dina tidigare arbeten?',
 'Vad gör att du skulle passa bra som car mech?',
 'Är det någonting som du vill fråga om detta arbetet?']

In [7]:
class InterviewWorld:
# Class that keeps

    def __init__(self,job,name):
        # TODO: More sophisticated questions/greeting drawn from txt file(?) and formated with name and job
        self.questions = [if to_format question.format(job) else question for (question, to_format) in interview_questions]
        self.greeting = 'Hello, and welcome to this interview. How are you today, {}?'.format(name)
        
        self.job = job
        self.human_name = name
        self.freja = model
        self.tokenizer = tokenizer
        self.translator = translator
        self.episode_done = False
        
        desc = 'InterviewWorld: ' + job + ' and  ' + name
        self.conversation_sv = BlenderConversation(lang='sv',description=desc)
        self.conversation_en = BlenderConversation(lang='en',description=desc)
        
    def observe(user_input):
        # Observe the user input and update internal states
        # Check if user wants to quit/no questions left --> self.episode_done = True
        
        translated
        return
        
    def act():
        # Pull message from queue
        
        if episode_done:
            self.conversation_sv.to_txt()
            self.conversation_en.to_txt()
            return 'Tack för din intervju'
        
        else:
        return
        
        
    def _strip_token(line):
    # Removes SOS and EOS tokens from blenderbot reply
    line = line.replace('<s>','')
    line = line.replace('</s>','')
    return line

    def _interview_chat():
        return

    def _check_answer():
        # TODO: 
        self.conversation_en.get_bot_replies()
        return

In [None]:
questions = []
job = 
name = 