In [1]:
from haystack import Pipeline
from haystack.nodes import FARMReader
from haystack.nodes import DensePassageRetriever
import os
from haystack.document_stores import ElasticsearchDocumentStore
import pandas as pd
import openai
import json

In [2]:
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")

In [3]:
document_store = ElasticsearchDocumentStore(
        host=host,
        username="",
        password="",
        index="document",
        similarity="dot_product",
        embedding_dim=768,
        return_embedding=True,
    )



In [4]:
retriever = DensePassageRetriever(
    document_store=document_store,
    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base"
)

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.


In [5]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

In [6]:
# initialize qna pipeline for reddit posts
querying_pipeline = Pipeline()
querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])

In [7]:
query = "When was Pratt School of Engineering founded?"

In [45]:
query = "What is the tuition fee for Master of Engineering Management at Duke University?"

In [31]:
query = "How long does it take to complete the Master of Engineering Management degree?"

In [59]:
query = "How many graduate students are enrolled in the Master of Engineering Management program?"

In [73]:
query = "How many credits are required for the Master degree?"

In [87]:
query = "What is the requirement for Bachelor of Science in Engineering?"

In [88]:
prediction = querying_pipeline.run(
    query=query,
    params={
        "Retriever": {"top_k": 10},
        "Reader": {"top_k": 5}
        }
)



Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s]

In [89]:
answers = pd.DataFrame([i.to_dict() for i in prediction["answers"]])

In [90]:
answers

Unnamed: 0,answer,type,score,context,offsets_in_document,offsets_in_context,document_ids,meta
0,Undergraduate,extractive,0.686385,## You are here Home » Undergraduate » Bachelor of Science in Engineering (B...,"[{'start': 23, 'end': 36}]","[{'start': 23, 'end': 36}]",[2ab4f8f9985d34065eef52dc05de6b01],{'_split_id': 0}
1,General Degree Requirements,extractive,0.638005,» Undergraduate » Bachelor of Science in Engineering (BSE) # General Degree...,"[{'start': 82, 'end': 109}]","[{'start': 62, 'end': 89}]",[74b3cb4128bf48c3a9f5449be0aa8e21],{'_split_id': 0}
2,Double Majors Many students complete the requirements for a departmental maj...,extractive,0.186097,Double Majors Many students complete the requirements for a departmental maj...,"[{'start': 120, 'end': 372}]","[{'start': 0, 'end': 252}]",[51fcf23736c44c409ab0958071e75e7c],{'_split_id': 0}
3,those that present essential subject matter and substance of the discipline,extractive,0.140724,Courses selected must be those that present essential subject matter and sub...,"[{'start': 25, 'end': 100}]","[{'start': 25, 'end': 100}]",[725320aaf7d6d72b973f25c9ec4aca8c],{'_split_id': 1}
4,masters,extractive,0.120374,**#24** Best Graduate Engineering School (tie)—includes Top 25 PhD and mast...,"[{'start': 378, 'end': 385}]","[{'start': 72, 'end': 79}]",[c42b7dc4cb233ffea48a0f0aaadb9e0a],{'_split_id': 4}


In [91]:
answers['document_ids'] = answers['document_ids'].apply(lambda x: x[0])

In [92]:
documents = pd.DataFrame([i.to_dict() for i in prediction["documents"]])

In [93]:
merge = pd.merge(documents, answers, left_on="id", right_on="document_ids", how="inner")

In [94]:
merge

Unnamed: 0,content,content_type,score_x,meta_x,id_hash_keys,embedding,id,answer,type,score_y,context,offsets_in_document,offsets_in_context,document_ids,meta_y
0,Courses selected must be those that present essential subject matter and sub...,text,0.695068,{'_split_id': 1},[content],"[0.43349174, 0.33182672, 0.60650444, 0.47958523, 0.42741317, 0.23738325, 0.2...",725320aaf7d6d72b973f25c9ec4aca8c,those that present essential subject matter and substance of the discipline,extractive,0.140724,Courses selected must be those that present essential subject matter and sub...,"[{'start': 25, 'end': 100}]","[{'start': 25, 'end': 100}]",725320aaf7d6d72b973f25c9ec4aca8c,{'_split_id': 1}
1,## You are here Home » Undergraduate » Bachelor of Science in Engineering (B...,text,0.694863,{'_split_id': 0},[content],"[0.4361204, 0.5437646, 0.35926843, 0.14172421, 0.4723882, -0.00055124983, 0....",2ab4f8f9985d34065eef52dc05de6b01,Undergraduate,extractive,0.686385,## You are here Home » Undergraduate » Bachelor of Science in Engineering (B...,"[{'start': 23, 'end': 36}]","[{'start': 23, 'end': 36}]",2ab4f8f9985d34065eef52dc05de6b01,{'_split_id': 0}
2,## You are here Home » Undergraduate » Bachelor of Science in Engineering (B...,text,0.691977,{'_split_id': 0},[content],"[0.3786407, 0.6160215, 0.5768361, 0.34926012, 0.26924872, -0.013151873, 0.69...",74b3cb4128bf48c3a9f5449be0aa8e21,General Degree Requirements,extractive,0.638005,» Undergraduate » Bachelor of Science in Engineering (BSE) # General Degree...,"[{'start': 82, 'end': 109}]","[{'start': 62, 'end': 89}]",74b3cb4128bf48c3a9f5449be0aa8e21,{'_split_id': 0}
3,## You are here Home » Undergraduate » For Current Students » Policies & Pro...,text,0.688841,{'_split_id': 0},[content],"[0.74831176, 0.65101445, 0.48997885, 0.04963132, 0.15922277, 0.067849405, 0....",51fcf23736c44c409ab0958071e75e7c,Double Majors Many students complete the requirements for a departmental maj...,extractive,0.186097,Double Majors Many students complete the requirements for a departmental maj...,"[{'start': 120, 'end': 372}]","[{'start': 0, 'end': 252}]",51fcf23736c44c409ab0958071e75e7c,{'_split_id': 0}
4,* * * ## Rankings ### Academics #### U.S. News & World Report * **#7** onlin...,text,0.688414,{'_split_id': 4},[content],"[0.061269794, 0.11897026, 0.4946767, 0.6576563, 0.48026288, -0.086671084, 0....",c42b7dc4cb233ffea48a0f0aaadb9e0a,masters,extractive,0.120374,**#24** Best Graduate Engineering School (tie)—includes Top 25 PhD and mast...,"[{'start': 378, 'end': 385}]","[{'start': 72, 'end': 79}]",c42b7dc4cb233ffea48a0f0aaadb9e0a,{'_split_id': 4}


In [95]:
info = merge["content"].head(1).values[0]

In [96]:
info

"Courses selected must be those that present essential subject matter and substance of the discipline. ## Engineering and Applied Sciences, Digital Computation (4) This requirement is met by completion of one course from each of four of the following seven areas: digital systems, electrical science, information and computer science, mechanics (solid and fluid), materials science, systems analysis, and thermal science and transfer processes. Students are expected to have acquired digital-computer programming capability before their sophomore year. The programming capability is generally satisfied by Engineering 103L (53L). ## Engineering Department Courses (15) The academic department (BME, CEE, ECE or MEMS) administering the major field of study, or the interdisciplinary curricula for IDEAS students, specifies this requirement. In general, it consists of both required and elective courses planned in consultation with a student's advisor. Including the 4 credits in engineering and appli

In [97]:
config = json.load(open("../config.json"))
openai_api_key = config["openai_api_key"]

In [98]:
openai.api_key = openai_api_key

In [99]:
completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user",
             "content": f"Can you answer the given question based on the provided information? Question: '{query}' Information: '{info}'"}
        ]
    )

In [100]:
dict(completion.choices[0].message)["content"].replace("\n", "")

"The requirement for Bachelor of Science in Engineering includes completing one course from each of the seven areas: digital systems, electrical science, information and computer science, mechanics (solid and fluid), materials science, systems analysis, and thermal science and transfer processes. Additionally, students must have acquired digital-computer programming capability before their sophomore year, which is generally satisfied by Engineering 103L (53L). Furthermore, students must complete 15 engineering department courses, which includes both required and elective courses planned in consultation with a student's advisor. A total of 13.0 credits in engineering work are required, including the 4 credits in engineering and applied sciences listed under general requirements."

In [26]:
# initialize qna pipeline for reddit posts
querying_pipeline = Pipeline()
querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])

In [27]:
from haystack.pipelines import GenerativeQAPipeline

In [28]:
pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)

In [40]:
res = pipe.run(query=query, params={"Generator": {"top_k": 2}, "Retriever": {"top_k": 10}})

In [41]:
res

{'query': 'When was Pratt School of Engineering founded?',
 'answers': [<Answer {'answer': ' duke university', 'type': 'generative', 'score': None, 'context': None, 'offsets_in_document': None, 'offsets_in_context': None, 'document_ids': ['8ac5c618edf87ef60f6cfb703ad8d98e', '6ad9a9a0d75efa87f008c3a84094a997', 'b5c13e0776f002cedbfeed927620a055', '7bab54b0d21cad535e31cd66d98f755b', '9b3f892980e6dae18f7b1373533408d7', 'e5b86ef2a9583e4e991a454f1fc8d6a8', 'c880c68f7852ce38087067928f5f70e0', 'e2a2def9477f6c06a011823d3dcc7508', 'f70d9c84d5a60b3530898ca92b273353', '529c8feecb3ac0eff13b3512ab1f4186'], 'meta': {'doc_scores': [0.6842600275997561, 0.6695001093427642, 0.6679870914859393, 0.6649212424771971, 0.6615553298375124, 0.6601689040614747, 0.6599819989821073, 0.6574389459378871, 0.6573049316658068, 0.6568630701384175], 'content': ["## You are here Home » Alumni & Giving # Duke Engineering: A History From its beginnings more than 75 years ago, Duke's Pratt School of Engineering has grown into

In [42]:
from haystack.nodes import PromptNode

In [48]:
prompt_node = PromptNode(default_prompt_template="question-answering")

In [49]:
pipe1 = Pipeline()

In [50]:
pipe1.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipe1.add_node(component=prompt_node, name="prompt_node", inputs=["Retriever"])

In [53]:
pipe1.run(query=query, params={"Retriever": {"top_k": 10}})

Exception: Exception while running node 'prompt_node': Expected prompt parameters ['documents', 'questions'] but got ['top_k', 'query', 'documents', 'stop_words'].
Enable debug logging to see the data that was passed when the pipeline failed.

In [None]:
pipe.add_node(component=node, name="prompt_node", inputs=["Query"])

In [None]:
prompt_node.prompt(prompt_template="question-answering", 
          documents=[Document("Berlin is the capital of Germany."), Document("Paris is the capital of France.")],
          query="What is the capital of Germany?")

In [7]:
from haystack.nodes import PromptNode

# Specify "gpt-3.5-turbo" as the model for PromptNode
prompt_node = PromptNode(model_name_or_path="gpt-3.5-turbo", api_key=openai_api_key)

PromptNode has been potentially initialized with a language model not fine-tuned on instruction following tasks. Many of the default prompts and PromptTemplates will likely not work as intended. Use custom prompts and PromptTemplates specific to the gpt-3.5-turbo model


In [8]:
messages = [{"role": "system", "content": "You are a helpful assistant"}]

def build_chat(user_input: str = "", asistant_input: str = ""):
    if user_input != "":
        messages.append({"role": "user", "content": user_input})
        
    if asistant_input != "":
        messages.append({"role": "assistant", "content": asistant_input})

def chat(input: str):
    build_chat(user_input=input)
    chat_gpt_answer = prompt_node(messages)
    build_chat(asistant_input=chat_gpt_answer[0])
    return chat_gpt_answer

In [9]:
chat("Who is Barack Obama Married to?")

['Barack Obama is married to Michelle Obama.']

In [10]:
from getpass import getpass

In [13]:
from haystack.nodes import PromptTemplate, PromptNode, PromptModel

In [18]:
prompt_open_ai = PromptModel(model_name_or_path="text-davinci-003", api_key=openai_api_key)

In [19]:
pipe1 = Pipeline()
pipe1.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipe1.add_node(component=prompt_open_ai, name="prompt_node_2", inputs=["Retriever"])

In [22]:
pipe1.run(query=query)



Exception: Exception while running node 'prompt_node_2': This method should never be implemented in the derived class
Enable debug logging to see the data that was passed when the pipeline failed.

In [None]:

pipe1.add_node(component=prompt_node, name="prompt_node", inputs=["Retriever"])

In [None]:

pipe.add_node(component=node_openai, name="prompt_node_2", inputs=["prompt_node1"])
output = pipe.run(query="not relevant", documents=[Document("Berlin is the capital of Germany")]