# Aula 6

## Conteúdo

### Bot usando random

In [16]:
import random

# Palavras esperadas
welcome_words_input = ('hey', 'hello', 'hi')

# Palavras que responderão as esperadas
welcome_words_output = ('key', 'hello', 'how are you?',
                        'welcome', 'how are you doing?')

def welcome_message(text: str) -> str:
    # <str>.split() -> gera uma lista a partir do texto, quebrando
    #                  as palavras nos espaços ou no delimitador passado
    words = text.split()

    for word in words:
        # Valida se a palavra é uma das palavras esperadas
        #
        # <str>.lower() -> converte texto para minusculas
        if word.lower() in welcome_words_input:
            # random.choice(...) -> escolhe um elemento aleatório da lista
            return random.choice(welcome_words_output)


print('R:', welcome_message('hey'))
print('R:', welcome_message('hello'))
print('R:', welcome_message('hi'))

R: welcome
R: how are you doing?
R: how are you?


### Bot usando semelhança de cossenos

In [20]:
import nltk
import spacy
import numpy as np
from goose3 import Goose
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer


nltk.download('punkt')
nlp = spacy.load('en_core_web_sm')

article = Goose().extract('https://en.wikipedia.org/wiki/Natural_language_processing')
article_sentences = nltk.sent_tokenize(article.cleaned_text)

[nltk_data] Downloading package punkt to /home/gustavo/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [29]:
def preprocessing(sentence: str) -> str:
    '''
    Função de pré processamento das sentenças
    '''
    
    sentence_lower = sentence.lower()

    tokens = [token.text for token in nlp(sentence) if not (token.is_stop
                                                            or token.like_num
                                                            or token.is_punct
                                                            or token.is_space
                                                            or len(token) == 1)]
    return ' '.join(tokens)


def answer(user_text: str, threshold: float = 0.25) -> str:
    '''
    Função que obtém a resposta para um texto do usuário
    '''
    
    preprocessed_sentences = [preprocessing(sentence) for sentence in article_sentences]
    preprocessed_user_text = preprocessing(user_text)

    # Adiciona o texto do usuário processado ao final das
    # sentenças limpas
    preprocessed_sentences.append(user_text)

    # Instancia vetorizador de TF-IDF
    vectorizer = TfidfVectorizer()

    # Vetoriza e transforma as sentenças preprocessadas
    vectorized_sentences = vectorizer.fit_transform(preprocessed_sentences)

    # Calcula a similaridade de conseno entre a ultima posição (pergunta usuário)
    # e as demais sentenças
    similarity = cosine_similarity(vectorized_sentences[-1], vectorized_sentences)

    # Obtém o indice da penúltima posição (maior corresopndencia)
    similarity_index = similarity.argsort()[0][-2]

    # Obtém o valor de similaridade
    similarity_score = similarity[0][similarity_index]

    if similarity_score < threshold:
        return 'sorry, no answer was found'

    # Acessa a lista de sentenças originais
    return article_sentences[similarity_index]

In [34]:
exit_inputs = ('quit', 'close', 'exit', 'q')

while True:
    user_text = input()

    if user_text.lower() in exit_inputs:
        print('Chatbot: Bye! See you soon...')
        break

    if (msg := welcome_message(user_text)) != None:
        print(f'Chatbot: {msg}')
    else:
        print(f'Chatbot: {answer(user_text)}')

 hello


Chatbot: how are you?


 hi


Chatbot: key


 hey


Chatbot: key


 what is natural language processing?


Chatbot: Natural language processing has its roots in the 1950s.


 who is alan turing?


Chatbot: Already in 1950, Alan Turing published an article titled "Computing Machinery and Intelligence" which proposed what is now called the Turing test as a criterion of intelligence, though at the time that was not articulated as a problem separate from artificial intelligence.


 does artificial intelligence and natural language processign are connected?


Chatbot: sorry, no answer was found


 q


Chatbot: Bye! See you soon...
