## Esteira de similaridade de textos por Jaccard, Cosseno e Cosseno com Embedding

In [1]:
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import pandas as pd
import re
import gensim
import numpy as np
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import load_model
from scipy.spatial import distance
import math
import random
import bz2
import itertools
import pickle
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import wordnet

In [2]:
#expand jupyter cells
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

### Opening movie reviews

In [3]:
messages = pd.read_csv('./chatdata/movie_lines_pre_processed_for_test.tsv', delimiter="\t", header = None, quoting=3, encoding='ISO-8859-2')

In [4]:
messages.columns = ['msg_line', 'user_id', 'movie_id', 'msg', 'msg_pre_processed', 'msg_2', 'target']

In [5]:
messages.head()

Unnamed: 0,msg_line,user_id,movie_id,msg,msg_pre_processed,msg_2,target
0,L49,u0,m0,Did you change your hair?,change hair,No.,1
1,L59,u9,m0,I missed you.,miss,It says here you exposed yourself to a group o...,0
2,L60,u8,m0,It says here you exposed yourself to a group o...,say expose group freshman girl,It was a bratwurst. I was eating lunch.,0
3,L61,u9,m0,It was a bratwurst. I was eating lunch.,bratwurst eat lunch,With the teeth of your zipper?,0
4,L63,u7,m0,You the new guy?,new guy,So they tell me...,1


### Defining the list of questions and answers

In [6]:
questions = set(messages[messages['target'] == 1]['msg_pre_processed'])

In [7]:
answers = set(messages[messages['target'] == 0]['msg_pre_processed'])

In [8]:
len(answers)

20262

In [9]:
len(questions)

9738

In [10]:
def get_wordnet_pos(word):
    """Map POS tag to first character lemmatize() accepts"""
    tag = nltk.pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}

    return tag_dict.get(tag, wordnet.NOUN)

In [11]:
lemmatizer = WordNetLemmatizer()
def pre_processing_text(corpus):
    #remove html tags
    corpus = re.sub(r'<.*?>', '', str(corpus))
    
    #remove non-alphanumeric characters
    corpus = re.sub(r'[^a-z A-Z 0-9 \s]', '', str(corpus))
    
    #remove duplicated spaces
    corpus = re.sub(r' +', ' ', str(corpus))
    
    #capitalization
    corpus = corpus.lower()
    
    #tokenization
    corpus = re.findall(r"\w+(?:'\w+)?|[^\w\s]", corpus)
    
    #lammatization
    corpus = [lemmatizer.lemmatize(c, get_wordnet_pos(c)) for c in corpus]
    
    #remove punctuation
    corpus = [t for t in corpus if t not in string.punctuation]
    
    #remove stopwords
    #it makes the model worst
    #stopwords_ = stopwords.words("english")
    #corpus = [t for t in corpus if t not in stopwords_]
    
    corpus = ' '.join(corpus)

    return corpus

In [12]:
msg_raw = 'I heard you are a good guy. Is it right?'
#msg_raw = 'yes i heard you all right 20000000 thats quite a lot isnt it'
msg = pre_processing_text(msg_raw)
print(msg)

with open('./chatdata/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
p = tokenizer.texts_to_matrix([msg])

model = load_model('./chatdata/chatbot_model.h5')
res = model.predict(p)

print(res)

i heard you be a good guy be it right
[[0.61740196]]


## Returning the conversation for the message using Jaccard Similarity

In [13]:
def jaccard_similarity(f1, f2):    
    f1 = set(str(f1).split(' '))
    f2 = set(str(f2).split(' '))
    
    intersecao = f1.intersection(f2)
    uniao = f1.union(f2)
    
    return len(intersecao) / len(uniao)

In [14]:
def return_conversation_by_jaccard(msg, res, questions, answers, threshold=None):
    """
    Return a dictionary of message and similarity sorted by highter similarity
    """
    if res >= 0.5:
        msg_list = questions
        similarity = [jaccard_similarity(msg, str(m)) for m in questions]        
    else:
        similarity = [jaccard_similarity(msg, str(m)) for m in answers]
        msg_list = answers
    
    result = dict(zip(msg_list, similarity))
    
    return {k: v for k, v in sorted(result.items(), key=lambda item: item[1], reverse=True)}    

In [15]:
%%time
conversations = return_conversation_by_jaccard(msg, res, questions, answers)
conversations

CPU times: user 46.9 ms, sys: 15.6 ms, total: 62.5 ms
Wall time: 51.3 ms


{'heard good bunch killer': 0.18181818181818182,
 'guy right get london': 0.18181818181818182,
 'theyre good guy see holly': 0.16666666666666666,
 'cmere get good blow right': 0.16666666666666666,
 'good week mama treat right': 0.16666666666666666,
 'guy television alvy singer right right': 0.16666666666666666,
 'thats right thats say good memory': 0.16666666666666666,
 'youre say guy total wacko right': 0.15384615384615385,
 'good cant mom right middle term': 0.15384615384615385,
 'well one shoot guy right scar kid say real good get every available gun city': 0.14285714285714285,
 'alright good thanks trouble leave sally alone right': 0.13333333333333333,
 'right youve heard emperor caligula perhaps jack ripper': 0.13333333333333333,
 'right love ya listen ya go give u good eh': 0.125,
 'body im afraid guy would disappointed good night doc': 0.125,
 'go back armor right mean guy outer space suppose beat': 0.11764705882352941,
 'well little early id say thursday would good ill know rig

In [16]:
#get the first item in the dict
def get_the_next_conversation(conversations):
    keys_view = conversations.keys()
    keys_iterator = iter(keys_view)
    conversation = next(keys_iterator)
    return conversation

In [17]:
conversation = get_the_next_conversation(conversations)
conversation

'heard good bunch killer'

### The returned message

In [18]:
print('>>> '+msg_raw)
msg2 = list(messages[messages['msg_pre_processed'] == conversation]['msg_2'])[0]
print('<<< '+msg2)

>>> I heard you are a good guy. Is it right?
<<< And I don't intend to get some of them chewed up just to get your tub put in the mouth of the goddman Nung River. You say you don't know Kurtz?


## Calculate PageRank
- create a bi-directional graph of messages using similarity bigger then a threshould

In [19]:
"""
Classe para criação de um nó (página) do grafo.
O cosntrutuor recebe recebe o nome do nó, a lista de nós de entrada e saída.
"""
class Node(object):
    def __init__(self, node_name: str, inlinks: list, outlinks: list):
        self.node_name = node_name
        self.inlinks = inlinks
        self.outlinks = outlinks

"""
Classe para criação do grafo de páginas.
O construtor inicializa um dicionário vazio de lista de adjacência. 
"""
class Graph(object):
    def __init__(self):
        self.adj_list = dict()

    #Adiciona um nó ao grafo com seus nós de entrada e sapida
    def add_node(self, node_name: str, inlinks: list, outlinks: list):
        node = Node(node_name = node_name, inlinks=inlinks, outlinks=outlinks)
        self.adj_list[node_name] = node

    #Imprime os dados do grafo criado
    def print_graph(self):
        for key in self.adj_list:
            print(f"{key}:")
            print(f"\tEntrada: {self.adj_list[key].inlinks}")
            print(f"\tSaída: {self.adj_list[key].outlinks}")

In [20]:
import numpy as np

"""
A classe PageRank possui métodos para computar o Page Rank de cada página dado um número de iterações.
"""
class PageRank(object):
    #Construtor da classe que recebe um objeto Graph, inicializa um dicionário vazio de scores
    def __init__(self, graph: Graph):
        self.graph = graph
        self.scores = dict()
        self.__initialize_scores()

    #Inicializa os scores do Page Rank com o valor inicial 1/n, onde n é o número de nós (páginas) do grafo
    def __initialize_scores(self):
        n = len(self.graph.adj_list)
        for key in self.graph.adj_list:
            self.scores[key] = 1/n

    #Calcula o Page Rank para cada página dado o número de iterações. Ainda não utiliza a convergence_rate no cálculos
    def compute(self, iterations: int = 10, convergence_rate: float = 0.01):
        new_scores = dict()
        for i in range(iterations):
            for  node in self.graph.adj_list:
                in_to_node = np.asarray([
                    self.scores[x] for x in self.graph.adj_list[node].inlinks
                ])
                out_to_node = self.graph.adj_list[node].inlinks
                amount_out_to_node = np.asarray([
                    len(self.graph.adj_list[x].outlinks) for x in out_to_node
                ])
                score = np.sum(in_to_node / amount_out_to_node)
                new_scores[node] = score
            #print(self.scores)
            self.scores = new_scores.copy()
        return self.scores

    def power_method(self, iterations: int = 10):
        #Gera a matrix de probabilidades de navegação a cada nó
        lenght = len(self.graph.adj_list)
        matrix = np.zeros((lenght, lenght))
        i = j = 0
        for node in self.graph.adj_list:
            for link in self.graph.adj_list[node].outlinks:
                for row in self.graph.adj_list:
                    if link == row:
                        matrix[i][j] = 1/len(self.graph.adj_list[node].outlinks)
                    i = i + 1
                i = 0
            j = j + 1
        print("Matriz de probabilidades ")
        print(matrix)
        #muliplica a matrix pelo score inicial
        print("Page Rank das iterações")
        scores_arr = np.asarray([
                    self.scores[key] for key in self.graph.adj_list
                ])
        itn = np.dot(matrix, scores_arr)
        print(itn)
        for i in range(iterations - 1):
            itn = np.dot(matrix, itn)
            print(itn)

In [21]:
def make_in_links(conversations, threshold=0.3):
    li = list()
    if threshold is None:
        li = conversations.keys()
    else:
        dic = {k: v for k, v in conversations.items() if v >= threshold}
        li = [c for c in dic.keys()]
        
    return li

In [22]:
def get_conversations(msg, res):
    return return_conversation_by_jaccard(msg, res, questions, answers)

In [23]:
def make_graph(qea, res, threshold=None):
    g = Graph()
    lenght = len(qea)
    i = 1
    
    for k in qea:
        conversations = get_conversations(k, res)
        if conversations is not None:
            in_links = make_in_links(conversations, threshold=threshold)
            in_links.remove(k)
            g.add_node(k, in_links, in_links)
        if (i % 100) == 0:
            print('Processed '+ str(i) +' of '+ str(lenght))    
        i += 1
    return g

In [24]:
def save_page_compute(qea, res, file_name, threshold=None, iterations=3):

    g = make_graph(qea=qea, res=res, threshold=threshold)

    p = PageRank(graph=g)
    pc = p.compute(iterations=iterations)
    pc = {k: v for k, v in sorted(pc.items(), key=lambda item: item[1], reverse=True)}

    f = open( './chatdata/' + file_name + '.txt', 'w' )
    f.write( repr(pc) )
    f.close()
    return pc

In [25]:
threshold=0.01

In [26]:
%%time
pc_q = save_page_compute(qea=questions, res=1, threshold=threshold, file_name='page_rank_questions')

Processed 100 of 9738
Processed 200 of 9738
Processed 300 of 9738
Processed 400 of 9738
Processed 500 of 9738
Processed 600 of 9738
Processed 700 of 9738
Processed 800 of 9738
Processed 900 of 9738
Processed 1000 of 9738
Processed 1100 of 9738
Processed 1200 of 9738
Processed 1300 of 9738
Processed 1400 of 9738
Processed 1500 of 9738
Processed 1600 of 9738
Processed 1700 of 9738
Processed 1800 of 9738
Processed 1900 of 9738
Processed 2000 of 9738
Processed 2100 of 9738
Processed 2200 of 9738
Processed 2300 of 9738
Processed 2400 of 9738
Processed 2500 of 9738
Processed 2600 of 9738
Processed 2700 of 9738
Processed 2800 of 9738
Processed 2900 of 9738
Processed 3000 of 9738
Processed 3100 of 9738
Processed 3200 of 9738
Processed 3300 of 9738
Processed 3400 of 9738
Processed 3500 of 9738
Processed 3600 of 9738
Processed 3700 of 9738
Processed 3800 of 9738
Processed 3900 of 9738
Processed 4000 of 9738
Processed 4100 of 9738
Processed 4200 of 9738
Processed 4300 of 9738
Processed 4400 of 97

In [27]:
%%time
pc_a = save_page_compute(qea=answers, res=0, threshold=threshold, file_name='page_rank_answers')

Processed 100 of 20262
Processed 200 of 20262
Processed 300 of 20262
Processed 400 of 20262
Processed 500 of 20262
Processed 600 of 20262
Processed 700 of 20262
Processed 800 of 20262
Processed 900 of 20262
Processed 1000 of 20262
Processed 1100 of 20262
Processed 1200 of 20262
Processed 1300 of 20262
Processed 1400 of 20262
Processed 1500 of 20262
Processed 1600 of 20262
Processed 1700 of 20262
Processed 1800 of 20262
Processed 1900 of 20262
Processed 2000 of 20262
Processed 2100 of 20262
Processed 2200 of 20262
Processed 2300 of 20262
Processed 2400 of 20262
Processed 2500 of 20262
Processed 2600 of 20262
Processed 2700 of 20262
Processed 2800 of 20262
Processed 2900 of 20262
Processed 3000 of 20262
Processed 3100 of 20262
Processed 3200 of 20262
Processed 3300 of 20262
Processed 3400 of 20262
Processed 3500 of 20262
Processed 3600 of 20262
Processed 3700 of 20262
Processed 3800 of 20262
Processed 3900 of 20262
Processed 4000 of 20262
Processed 4100 of 20262
Processed 4200 of 20262
P

In [28]:
#checking page rank tends to 1
s = 0
#pc_q = pc_a
for p in pc_q:
    s += pc_q[p]
    
s

0.9835695214623157

## Similary of Jaccard based on Page Rank

In [29]:
def return_conversation_by_page_rank(msg, conversations, page_compute):
    """
    Return a dictionary of message and similarity sorted by highter similarity
    """  
    similarity = {k: v for k, v in conversations.items()}
    
    result = dict()
    for k, v in similarity.items():        
        result[k] = page_compute[k] * v
    
    result = {k: v for k, v in sorted(result.items(), key=lambda item: item[1], reverse=True)}
    return next(iter(result))

In [30]:
conversation = return_conversation_by_page_rank(msg, conversations, page_compute=pc_q)
print('Conversation: '+ conversation)
print('Page compute: '+ str(pc_q[conversation]))
print('Similarity: '+ str(conversations[conversation]))

Conversation: well one shoot guy right scar kid say real good get every available gun city
Page compute: 0.00034386130104737203
Similarity: 0.14285714285714285


In [31]:
print('Original: '+ msg)
print('Most similar: '+conversation)

Original: i heard you be a good guy be it right
Most similar: well one shoot guy right scar kid say real good get every available gun city


In [32]:
print('>>> '+msg_raw)
msg2 = list(messages[messages['msg_pre_processed'] == conversation]['msg_2'])[0]
print('<<< '+msg2)

>>> I heard you are a good guy. Is it right?
<<< Up where?


## Return the result using the Cossine Similarity

In [33]:
bow = CountVectorizer()

In [34]:
def return_conversation_by_cossine(msg, res, questions, answers, bow):
    """
    Return a dictionary of message and similarity sorted by highter similarity
    """
    if res >= 0.5:
        msg_list = questions    
    else:
        msg_list = answers
       
    similarity = []
    for m in msg_list:
        new_msg_list = [msg, m]
        vector_bow = bow.fit_transform(new_msg_list)
        msg_bow = vector_bow.todense()[0]
        m_bow   = vector_bow.todense()[1]
        similarity.append(distance.cosine(msg_bow, m_bow))
    
    result = {} 
    for key in msg_list: 
        for value in similarity: 
            result[key] = value
            similarity.remove(value) 
            break 
    
    return {k: v for k, v in sorted(result.items(), key=lambda item: item[1], reverse=False)}
    

In [35]:
conversations = return_conversation_by_cossine(msg, res, questions, answers, bow)
conversations

  dist = 1.0 - uv / np.sqrt(uu * vv)


{'guy television alvy singer right right': 0.6645898033750315,
 'heard good bunch killer': 0.6837722339831621,
 'guy guy': 0.6837722339831621,
 'guy right get london': 0.6837722339831621,
 'theyre good guy see holly': 0.717157287525381,
 'cmere get good blow right': 0.717157287525381,
 'good week mama treat right': 0.717157287525381,
 'go right right': 0.717157287525381,
 'right right goddamnit dont care whats right': 0.7368825942078912,
 'movie right book right': 0.7418011102528388,
 'youre say guy total wacko right': 0.7418011102528388,
 'right part life right': 0.7418011102528388,
 'good cant mom right middle term': 0.7418011102528388,
 'right right right right whatta yuh mean yuh gonna take picture': 0.7470177871865296,
 'well one shoot guy right scar kid say real good get every available gun city': 0.7550510257216823,
 'alright right right whatta wanna': 0.7609542781331213,
 'good stuff': 0.7763932022500211,
 'love right': 0.7763932022500211,
 'abby right': 0.7763932022500211,
 'l

In [36]:
conversation = get_the_next_conversation(conversations)
conversation

'guy television alvy singer right right'

### Return result

In [37]:
print('>>> '+msg_raw)
msg2 = list(messages[messages['msg_pre_processed'] == conversation]['msg_2'])[0]
print('<<< '+msg2)

>>> I heard you are a good guy. Is it right?
<<< Gimme a break will yuh gimme a break. Jesus Christ!


In [38]:
conversation = return_conversation_by_page_rank(msg, conversations, page_compute=pc_q)
print('Conversation: '+ conversation)
print('Page compute: '+ str(pc_q[conversation]))
print('Similarity: '+ str(conversations[conversation]))

Conversation: thats okay thats okay thats okay thats fine probably walk think go handicap think want people knew something medium maybe even think kind technical mumbo jumbo learn dead wrong interested one thing tell story bart make u laugh make u cry make u wanna break joyous song one thing okay point run dump dont know technical mumbojumbo run ive get horsesense goddamnit showmanship also hope lou told big meaner kike town tell lou dont mean dick big sexual thing although youre writer would know coffee
Page compute: 0.0006971414537937947
Similarity: 1.0


In [39]:
print('>>> '+msg_raw)
msg2 = list(messages[messages['msg_pre_processed'] == conversation]['msg_2'])[0]
print('<<< '+msg2)

>>> I heard you are a good guy. Is it right?
<<< ...Yes thank you.


## Get result using Cossine Similarity with Embedding

In [40]:
def download_embedding(get_it):
    if get_it:
        !gdown https://drive.google.com/uc?id=1zI8pGfbUHuU_0wY_FV4tD6w6ZCUJTQbh
    print('Download finished')

In [41]:
#The embedding is already downloaded
#Change to True to download
download_embedding(True)

Downloading...
From: https://drive.google.com/uc?id=1zI8pGfbUHuU_0wY_FV4tD6w6ZCUJTQbh
To: /mnt/c/Users/Douglas/trainning/chatbot_for_movies/notebooks/ptwiki_20180420_100d.txt.bz2
214MB [01:58, 1.82MB/s] 
Download finished


In [42]:
%%time
#get the embedding
newfilepath = "embedding_wiki_100d_pt.txt"
filepath = "ptwiki_20180420_100d.txt.bz2"
with open(newfilepath, 'wb') as new_file, bz2.BZ2File(filepath, 'rb') as file:
    for data in iter(lambda : file.read(100 * 1024), b''):
        new_file.write(data)

CPU times: user 1min 40s, sys: 2.06 s, total: 1min 42s
Wall time: 1min 48s


In [43]:
%%time
word_vectors = gensim.models.KeyedVectors.load_word2vec_format(filepath, binary=False)

CPU times: user 6min 29s, sys: 11.7 s, total: 6min 41s
Wall time: 6min 56s


In [44]:
word_vectors

<gensim.models.keyedvectors.Word2VecKeyedVectors at 0x7fa7dc69c460>

In [45]:
def calculate_embedding(phrase):
    """
    Return the mean of embeddings of a phrase
    """
    
    arr = np.array([word_vectors[word] for word in phrase if word in word_vectors.vocab])
    
    sum = np.zeros(len(arr[0]))
    for a in arr:
        sum = sum + a
        
    arr_mean = sum / len(arr) 
    
    return arr_mean

In [46]:
def return_conversation_by_cossine_embedding(msg, res, questions, answers, word_vectors):
    """
    Return a dictionary of message and similarity sorted by highter similarity
    """
    if res >= 0.5:
        msg_list = questions    
    else:
        msg_list = answers       
    
    msg = msg.split(' ')
    
    similarity = []
    for m in msg_list:        
        m = m.split(' ')
        
        try:
            msg_vector_embedding = calculate_embedding(msg)
            m_vector_embedding   = calculate_embedding(m)
        
            similarity.append(distance.cosine(msg_vector_embedding, m_vector_embedding))
        except:
            print("An exception occurred")
            print('> '+ ' '.join(m))
    
    result = {} 
    for key in msg_list: 
        for value in similarity: 
            result[key] = value
            similarity.remove(value) 
            break 
    
    return {k: v for k, v in sorted(result.items(), key=lambda item: item[1], reverse=False)}
    

In [47]:
%%time
conversations = return_conversation_by_cossine_embedding(msg, res, questions, answers, word_vectors)
conversations

An exception occurred
> jammed
An exception occurred
> microorganism
An exception occurred
> tturned
An exception occurred
> kanita
An exception occurred
> whaddo
An exception occurred
> howd
An exception occurred
> infatuate
An exception occurred
> whered
An exception occurred
> youve
An exception occurred
> yex
An exception occurred
> ttell
An exception occurred
> hows
An exception occurred
> manyd
An exception occurred
> employ
An exception occurred
> youdidnt
An exception occurred
> wheres irth
An exception occurred
> wheres
An exception occurred
> wheres restroom
An exception occurred
> uhuhwhy
An exception occurred
> divinely
An exception occurred
> humiliate
An exception occurred
> whatja
An exception occurred
> hows verdell
An exception occurred
> chigro
An exception occurred
> youre
An exception occurred
> whyd
An exception occurred
> gaiijin
An exception occurred
> whaat
An exception occurred
> youre notbusy
An exception occurred
> yeeees
An exception occurred
> wantem
An exc

{'gonna disassemble spiffy old batmobile reassemble hbomb wheel capiche yesterday victor tomorrow vapor': 0.047029245815001564,
 'see lose contact couple day ago around somewhere sight little suppose look': 0.04766920387875284,
 'get wrong man get right man wrong man deliver right man accepted trust right man wrong anyway add confusion die u right man wouldnt': 0.047681741589255755,
 'yeah im religious im atheist like guy gonna arrest': 0.04895275418142031,
 'like body': 0.049188713558023234,
 'doctor mccurdy cause death': 0.04978913408300867,
 'like rough like beatin': 0.0500186070300479,
 'never ambitious excellency ambition virtue among noble fault rest u': 0.05060419683490347,
 'whats name man': 0.050734306550926256,
 'ready start columbia next fall course like year premed stuff whatever im really excite rene give job secretary poem get publish': 0.05073753467420017,
 'yes isnt': 0.05082468717775479,
 'think girl innocent': 0.05196322792142982,
 'mother blind': 0.05220879828229008,

In [48]:
conversation = get_the_next_conversation(conversations)
conversation

'gonna disassemble spiffy old batmobile reassemble hbomb wheel capiche yesterday victor tomorrow vapor'

### Return result

In [49]:
print('>>> '+msg_raw)
msg2 = list(messages[messages['msg_pre_processed'] == conversation]['msg_2'])[0]
print('<<< '+msg2)

>>> I heard you are a good guy. Is it right?
<<< He'd have more power as a martyr. No to destroy Batman we must first turn him into what he hates most. Meaning us.


In [50]:
conversation = return_conversation_by_page_rank(msg, conversations, page_compute=pc_q)
print('Conversation: '+ conversation)
print('Page compute: '+ str(pc_q[conversation]))
print('Similarity: '+ str(conversations[conversation]))

Conversation: know paper father husband alive think back year two ago ben call say want publish pentagon paper next day time already stop publishing anymore legal counsel say dont dont frighten knew say id lose whole fifth floor publish night id told ben go ahead woke darkness thought oh lord newspaper woke last night question right story
Page compute: 0.0005693780034919376
Similarity: 0.471290099186642


In [51]:
print('>>> '+msg_raw)
msg2 = list(messages[messages['msg_pre_processed'] == conversation]['msg_2'])[0]
print('<<< '+msg2)

>>> I heard you are a good guy. Is it right?
<<< I think so.
