In [None]:
import sys
import os
sys.path.append("../data-generator")
sys.path.append("../info-retrieval")
import contriever.contriever_final # Asmita's contriever

In [None]:
USER_QUESTION = 'What are the inputs and outputs of a Gray code counter?'
NUM_ANSWERS_GENERATED = 5

## Using Asmita's Contriever

In [None]:
my_contriever = contriever.contriever_final.ContrieverCB()

In [None]:
# my_contriever.generate_embeddings("../data-generator/prompt engineering/gpt-3 semantic search/1_top_quality.json")
contriever_contexts = my_contriever.retrieve_topk("What are the inputs and outputs of a Gray code counter?", path_to_json = "../data-generator/split_textbook/paragraphs.json", k = NUM_ANSWERS_GENERATED)

top_context_list = list(contriever_contexts.values())
for i, context in enumerate(top_context_list):
    res = []
    for sub in context:
        res.append(sub.replace("\n", ""))
    top_context_list[i] = "".join(res)
print('\n\n'.join(top_context_list))

### OPT

In [None]:
# clear cuda memory
import gc
import torch
torch.cuda.empty_cache()
gc.collect()

In [None]:
from transformers import GPT2Tokenizer, OPTForCausalLM
import torch

model = OPTForCausalLM.from_pretrained("facebook/opt-1.3b") # opt-350m
tokenizer = GPT2Tokenizer.from_pretrained("facebook/opt-1.3b")
# model.to("cuda")

In [None]:
# Generate
response_list = []
for i in range(NUM_ANSWERS_GENERATED):
  prompt = "Please answer this person's question accurately, clearly and concicely. Context: " + top_context_list[i] + '\n' + "Question: " + USER_QUESTION + '\n' + "Answer: "
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
  model = model.to("cuda")
  
  generate_ids = model.generate(inputs.input_ids, max_length=345, do_sample=True, top_k=50, top_p=0.95, temperature=0.95, num_return_sequences=1, repetition_penalty=1.2, length_penalty=1.2, pad_token_id=tokenizer.eos_token_id)
  response = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
  opt_answer = response.split("Answer:")[1]
  response_list.append(opt_answer)
  
print('\n---------------------------------NEXT---------------------------------\n'.join(response_list))

In [None]:
print('\n---------------------------------NEXT---------------------------------\n'.join(response_list))

### ReRanking (MS-Marco Cross Encoder)

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/ms-marco-MiniLM-L-6-v2')
tokenizer = AutoTokenizer.from_pretrained('cross-encoder/ms-marco-MiniLM-L-6-v2')

assert len([USER_QUESTION] * NUM_ANSWERS_GENERATED ) == len(response_list)

features = tokenizer([USER_QUESTION] * NUM_ANSWERS_GENERATED, response_list,  padding=True, truncation=True, return_tensors="pt")

model.eval()
with torch.no_grad():
    scores = model(**features).logits
    print(scores)

#### Working!! Using Jerome's Doc-Query

todo: figure out how to keep document in memory.

In [6]:
from docquery import document, pipeline
import json
import re
import poppler

""" Warning requires lots of memory and lots of dependencies."""

USER_QUESTION = "What is the difference between a synchronous and asynchronous counter?"
NUM_ANSWERS_GENERATED = 5

# Call the DocQuery class
pipeline = pipeline('document-question-answering')
doc = document.load_document("../data-generator/notes/Student_Notes_short.pdf")
# doc = document.load_document("../data-generator/notes/Student Notes.pdf")
answer = pipeline(question=USER_QUESTION, **doc.context, top_k=NUM_ANSWERS_GENERATED)
# print(answer)
print("Quotes from textbook: ")
for item in answer:
  print(item['answer'], ": ", item['score']) 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Quotes from textbook: 
No, :  3.0541425076080486e-05
No, :  3.6431545140658272e-06
we do not have to restrict ourselves to non-negative numbers. :  9.0

In [7]:
answer = pipeline(question=USER_QUESTION, **doc.context, top_k=NUM_ANSWERS_GENERATED)
# print(answer)
print("Quotes from textbook: ")
for item in answer:
  print(item['answer'], ": ", item['score']) 

Quotes from textbook: 
No, :  3.0541425076080486e-05
No, :  3.6431545140658272e-06
we do not have to restrict ourselves to non-negative numbers. :  9.00167478334879e-08
This range of integers falls within the representable range :  4.4374765195698274e-08
falls within the representable range for N-bit 2’s complement, :  2.578081037540869e-08


# Try running this 👇 This works running our main

In [None]:
import torch
import main

ta = main.TA_Pipeline()

USER_QUESTION = "user_question = 'What are the inputs and outputs of a Gray code counter?'"
NUM_ANSWERS_GENERATED = 5

top_context_list = ta.contriever(user_question=USER_QUESTION, num_answers_generated=NUM_ANSWERS_GENERATED)
generated_answers_list = ta.OPT(top_context_list)
scores = ta.re_ranking_ms_marco(generated_answers_list)
index_of_best_answer = torch.argmax(scores) # get best answer
print("Best answer 👇\n", generated_answers_list[index_of_best_answer])