# Lab 4.3 - Chatbot to detect emotions

Copyright, Vrije Universiteit Amsterdam, Faculty of Humanities, CLTL

In this notebook, we will create a Telegram chatbot that will answer questions, given a predefined question-answer set. 

**Main goal of this notebook**: The most important goal of this notebook is to have a Telegram chatbot that you can ask factual questions to, and receive predefined answers.

**At the end of this notebook, you will**:
* **Use a predefined question - answering dataset**: 

## Using a question-answer dictionary

In [1]:
import json
import nltk
import random
import pickle
import datetime
import requests
from pprint import PrettyPrinter
from gensim.models import KeyedVectors

In [2]:
def read_token():
    tokens_path = './data/tokens.json'
    with open(tokens_path) as f:
        tokens = json.load(f)

    return tokens['CLTL_token']

def read_qa():
    qa_path = './data/emotions.json'
    with open(qa_path) as f:
        qa_data = json.load(f)

    return qa_data

In [3]:
class BotHandler:
    def __init__(self, token):
        self.token = token
        self.api_url = "https://api.telegram.org/bot{}/".format(token)

    def get_all_messages(self, offset=None, timeout=200):
        """ Function to get all messages sent to the bot """
        method = 'getUpdates'
        params = {'timeout': timeout, 'offset': offset}
        resp = requests.get(self.api_url + method, params)
        
        return resp.json()['result']
    
    def filter_messages_by(self, update, chat_id):
        """ Function to filter messages by user id"""
        return 'message' in update.keys() and update['message']['chat']['id'] == chat_id

    def get_last_message_by(self, chat_id):
        """ Function to get the last message sent to the bot by a specific user"""
        messages = self.get_all_messages()
        messages_by_user = list(filter(lambda m: self.filter_messages_by(m, chat_id), messages))

        last_message = None
        if messages_by_user and 'message' in messages_by_user[-1].keys():
            last_message = messages_by_user[-1]['message']['text']

        return last_message
    
    def send_message_to(self, chat_id, text):
        """ Function to send a message from the bot to a specific user"""
        params = {'chat_id': chat_id, 'text': text}
        method = 'sendMessage'
        resp = requests.post(self.api_url + method, params)

In [4]:
def classify_emotion(chat, vectorizer, transformer, classifier, label_encoder):
    counts = vectorizer.transform(chat)
    tfidf = transformer.transform(counts)
    predictions = classifier.predict(tfidf)

    for review, predicted_label in zip(chat, predictions):
        predicted_emotion = label_encoder.classes_[predicted_label]
        
    return predicted_emotion

In [5]:
def get_similar_words(embedding_model, message):
    # TODO filte rby content words
    words = nltk.tokenize.word_tokenize(message)
    
    similar_words = []
    for word in words:
        try:
            word_neighborhood = embedding_model.most_similar(positive=[word], topn=10)
            word_neighborhood = [item[0].lower() for item in word_neighborhood]
        except KeyError as e:
            print("word '%s' not in embedding vocabulary" % word)
            
        similar_words.extend(word_neighborhood)
    
    return similar_words

In [6]:
def create_response(message, qa_data, vectorizer, transformer, classifier, label_encoder, embedding_model):
    response = "I cannot respond to this"
    emotion = classify_emotion([message], vectorizer, transformer, classifier, label_encoder)
    similar_words = get_similar_words(embedding_model, message)
    # PrettyPrinter(indent=4).pprint(similar_words)
    
    for i in qa_data['intents']:
        if emotion == i['category']:
            print("Emotion detected: {emotion}".format(emotion=emotion))
            word_intersection = list(set(i['questions']) & set(similar_words))

            if word_intersection:
                # TODO interpretability: keyword retrieval due to ...
                print("Keywords detected: {intersection}".format(intersection=word_intersection))
                response = random.choice(i['responses'])
                break
    
    return response

In [7]:
def load_embeddings():
    path_to_model = '/Users/selbaez/Documents/PhD/CLTL/data/word_embeddings/GoogleNews-vectors-negative300.bin'
    embedding_model = KeyedVectors.load_word2vec_format(path_to_model, binary=True)
    
    return embedding_model

In [8]:
def load_models():
    filename_vectorizer = '../lab3.machine_learning/models/utterance_vec.sav'
    filename_transformer = '../lab3.machine_learning/models/utterance_transf.sav'
    filename_encoder = '../lab3.machine_learning/models/label_encoder.sav'
    filename_classifier = '../lab3.machine_learning/models/svm_linear_clf_bow.sav'

    # load the classifier and the vectorizer from disk
    loaded_classifier = pickle.load(open(filename_classifier, 'rb'))
    loaded_vectorizer = pickle.load(open(filename_vectorizer, 'rb'))
    loaded_transformer = pickle.load(open(filename_transformer, 'rb'))
    loaded_label_encoder = pickle.load(open(filename_encoder, 'rb'))
    
    return loaded_vectorizer, loaded_transformer, loaded_classifier, loaded_label_encoder

In [9]:
CLTL_TOKEN = read_token()
user_id = 408043639

qa_data = read_qa()
vectorizer, transformer, classifier, label_encoder = load_models()
embedding_model = load_embeddings()
bot = BotHandler(CLTL_TOKEN)

In [11]:
last_message = bot.get_last_message_by(user_id)
response = create_response(last_message, qa_data, vectorizer, transformer, classifier, label_encoder, embedding_model)
bot.send_message_to(user_id, response)


print("Received: {message}".format(message=last_message))
print("Responded: {response}".format(response=response))

word 'of' not in embedding vocabulary
Emotion detected: neutral
Emotion detected: neutral
Keywords detected: ['music']
Received: I hate hip hop music because of the beats that are too much for me
Responded: That music is fine


## End of this notebook