In [None]:
# imports
import tensorflow as tf
import tensorflow_hub as hub
from transformers import BertTokenizer

In [None]:
# Load the model and tokenizer
tokenizer_to_use = "bert-large-uncased-whole-word-masking-finetuned-squad"
tokenizer = BertTokenizer.from_pretrained(tokenizer_to_use)
model = hub.load("https://tfhub.dev/see--/bert-uncased-tf2-qa/1")

In [None]:
def question_answer(question, reference):
    """
    Args:
        question: string containing the question to answer
        reference: string containing the reference document to find answer
    Returns:
        String containing the answer or None if no answer is found
    """
    quest_toks = tokenizer.tokenize(question)
    ref_toks = tokenizer.tokenize(reference)
    toks = ['[CLS]'] + quest_toks + ['[SEP]'] + ref_toks + ['[SEP]']
    input_word_ids = tokenizer.convert_tokens_to_ids(toks)
    input_mask = [1] * len(input_word_ids)
    quest_len = len(quest_toks)
    ref_len = len(ref_toks)
    input_type_ids = [0] * (1 + quest_len + 1) + [1] * (ref_len + 1)

    input_word_ids = tf.convert_to_tensor([input_word_ids])
    input_mask = tf.convert_to_tensor([input_mask])
    input_type_ids = tf.convert_to_tensor([input_type_ids])

    outputs = model([input_word_ids, input_mask, input_type_ids])

    short_start = tf.argmax(outputs[0][0][1:]) + 1
    short_end = tf.argmax(outputs[1][0][1:]) + 1
    answer_tokens = toks[short_start: short_end + 1]
    answer = tokenizer.convert_tokens_to_string(answer_tokens)

    if answer:
        return answer
    return None

In [None]:
# 0-main
with open('ZendeskArticles/PeerLearningDays.md') as f:
    reference = f.read()

question = 'When are PLDs?'
print(question + "\n\t" + str(question_answer(question, reference)))

question = 'What does PLD stand for?'
print(question + "\n\t" + str(question_answer(question, reference)))

question = 'What are Mock Interviews?'
print(question + "\n\t" + str(question_answer(question, reference)))

In [None]:
exit_commands = ['exit', 'quit', 'goodbye', 'bye']
while(True):
    d = input('Q: ')
    print("Q: " + d)
    if d.lower() in exit_commands:
        print('A: Goodbye')
        break
    print("A: ")


In [None]:
def answer_loop(reference):
    """
    Args:
        reference: the reference text
    If the answer cannot be found in the reference text respond with:
        'Sorry, I do not understand your question'
    """
    exit_commands = ['exit', 'quit', 'goodbye', 'bye']
    while(True):
        question = input('Q: ')
        print('Q: ' + question)
        if question.lower() in exit_commands:
            print('A: Goodbye')
            break
        answer = question_answer(question, reference)
        if answer:
            print('A: ' + answer)
        else:
            print('A: Sorry, I do not understand your question')


In [None]:
with open('ZendeskArticles/PeerLearningDays.md') as f:
    reference = f.read()

answer_loop(reference)

In [None]:
import os
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize

In [None]:
def load_files_from_directory(directory):
    files = []
    for filename in os.listdir(directory):
        if filename.endswith(".md"):
            with open(os.path.join(directory, filename),
                      "r",
                      encoding="utf-8") as file:
                content = file.read()
                files.append(content)
    return files

In [None]:
def semantic_search(corpus_path, sentence):
    # Load the Universal Sentence Encoder model
    model_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
    model = hub.load(model_url)

    # Load and preprocess the corpus documents
    corpus = load_files_from_directory(corpus_path)

    # Encode the query sentence and corpus documents
    sentence_embedding = model([sentence])[0]
    corpus_embeddings = model(corpus)

    # Compute cosine similarities
    similarity_scores = cosine_similarity(
        normalize([sentence_embedding]),
        normalize(corpus_embeddings)
    )[0]

    # Find the index of the most similar document
    most_similar_idx = np.argmax(similarity_scores)

    return corpus[most_similar_idx]

In [None]:
print(semantic_search('ZendeskArticles', 'When are PLDs?'))

In [None]:
print(semantic_search('ZendeskArticles', 'What is a Mock Interview'))

In [None]:
def semantic_answer(corpus_path):
    """
    Args:
        reference: the reference text
    If the answer cannot be found in the reference text respond with:
        'Sorry, I do not understand your question'
    """
    exit_commands = ['exit', 'quit', 'goodbye', 'bye']
    while(True):
        question = input('Q: ')
        if question.lower() in exit_commands:
            print('A: Goodbye')
            break
        reference = semantic_search(corpus_path, question)
        answer = question_answer(question, reference)
        if answer:
            print('A: ' + answer)
        else:
            print('A: Sorry, I do not understand your question')

In [None]:
#4-main
question_answer('ZendeskArticles')