In [30]:
# imports
import tensorflow as tf
import tensorflow_hub as hub
from transformers import BertTokenizer

In [4]:
# Load the model and tokenizer
tokenizer_to_use = "bert-large-uncased-whole-word-masking-finetuned-squad"
tokenizer = BertTokenizer.from_pretrained(tokenizer_to_use)
model = hub.load("https://tfhub.dev/see--/bert-uncased-tf2-qa/1")

In [5]:
def question_answer(question, reference):
    """
    Args:
        question: string containing the question to answer
        reference: string containing the reference document to find answer
    Returns:
        String containing the answer or None if no answer is found
    """
    quest_toks = tokenizer.tokenize(question)
    ref_toks = tokenizer.tokenize(reference)
    toks = ['[CLS]'] + quest_toks + ['[SEP]'] + ref_toks + ['[SEP]']
    input_word_ids = tokenizer.convert_tokens_to_ids(toks)
    input_mask = [1] * len(input_word_ids)
    quest_len = len(quest_toks)
    ref_len = len(ref_toks)
    input_type_ids = [0] * (1 + quest_len + 1) + [1] * (ref_len + 1)

    input_word_ids = tf.convert_to_tensor([input_word_ids])
    input_mask = tf.convert_to_tensor([input_mask])
    input_type_ids = tf.convert_to_tensor([input_type_ids])

    outputs = model([input_word_ids, input_mask, input_type_ids])

    short_start = tf.argmax(outputs[0][0][1:]) + 1
    short_end = tf.argmax(outputs[1][0][1:]) + 1
    answer_tokens = toks[short_start: short_end + 1]
    answer = tokenizer.convert_tokens_to_string(answer_tokens)

    if answer:
        return answer
    return None

In [6]:
# 0-main
with open('ZendeskArticles/PeerLearningDays.md') as f:
    reference = f.read()

question = 'When are PLDs?'
print(question + "\n\t" + str(question_answer(question, reference)))

question = 'What does PLD stand for?'
print(question + "\n\t" + str(question_answer(question, reference)))

question = 'What are Mock Interviews?'
print(question + "\n\t" + str(question_answer(question, reference)))

When are PLDs?
	on - site days from 9 : 00 am to 3 : 00 pm
What does PLD stand for?
	peer learning days
What are Mock Interviews?
	None


In [7]:
exit_commands = ['exit', 'quit', 'goodbye', 'bye']
while(True):
    d = input('Q: ')
    print("Q: " + d)
    if d.lower() in exit_commands:
        print('A: Goodbye')
        break
    print("A: ")


Q: Hello
A: 
Q: How are you?
A: 
Q: BYE
A: Goodbye


In [8]:
def answer_loop(reference):
    """
    Args:
        reference: the reference text
    If the answer cannot be found in the reference text respond with:
        'Sorry, I do not understand your question'
    """
    exit_commands = ['exit', 'quit', 'goodbye', 'bye']
    while(True):
        question = input('Q: ')
        print('Q: ' + question)
        if question.lower() in exit_commands:
            print('A: Goodbye')
            break
        answer = question_answer(question, reference)
        if answer:
            print('A: ' + answer)
        else:
            print('A: Sorry, I do not understand your question')


In [9]:
with open('ZendeskArticles/PeerLearningDays.md') as f:
    reference = f.read()

answer_loop(reference)

Q: When are PLDs?
A: on - site days from 9 : 00 am to 3 : 00 pm
Q: What are Mock Interviews?
A: Sorry, I do not understand your question
Q: What does PLD stand for?
A: peer learning days
Q: EXIT
A: Goodbye


In [38]:
def semantic_search(corpus_path, sentence):
    """
    Performs semantic search on a corpus of documents
    Args:
        corpus_path: path to the corpus of reference documents
        sentence: sentence on which to perform semantic search
    Returns:
        reference text of the document most similar to sentence
    """
    # Convert document text into list
    corpus = []
    for item in os.listdir(corpus_path):
        with open(os.path.join(corpus_path, item), 'r') as f:
            text = f.read()
            corpus.append(text)
    print(corpus[1])



In [39]:
print(semantic_search('ZendeskArticles', 'When are PLDs?'))

Computer and Internet Usage Policy
There is to be no video game play at school before 6:00 PM on weekdays
The intranet is proprietary. Screenshots and copies of projects/assignments are strictly prohibited from being shared. (Please refer to the Intellectual Property - Ownership Policy in the Student Catalog);
Holberton communications (Slack messages, emails, Holberton produced slide decks, etc.) are considered confidential communications and are not permitted to be shared publicly. (Please refer to the Intellectual Property - Ownership Policy in the Student Catalog);
It is required to regularly check your Holberton accounts:
Messages via the intranet on a daily basis
Slack Messages
Emails in your Google account on a daily basis
We have a zero tolerance policy for torrenting illegal data. Torrenting at school could result in immediate dismissal.
None
