In [3]:
# Enable jupyter widgets
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [2]:
from haystack.document_stores import ElasticsearchDocumentStore

from haystack.nodes import EmbeddingRetriever
import pandas as pd

In [4]:
# Recommended: Start Elasticsearch using Docker via the Haystack utility function
from haystack.utils import launch_es

launch_es()

60dff4dba8f83cd63bae26e9d8c9ae222a011d19c3d395e2209d9d0128714397


In [6]:
from haystack.document_stores import ElasticsearchDocumentStore

document_store = ElasticsearchDocumentStore(
    host="localhost",
    username="",
    password="",
    index="document",
    embedding_field="question_emb",
    embedding_dim=384,
    excluded_meta_data=["question_emb"],
)

In [7]:
# load scraped web data
import json
with open('../cci-scrape/cci-scrape.json', 'r') as f:
    data = json.loads(f.read())

In [8]:
retriever = EmbeddingRetriever(
    document_store=document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2", use_gpu=False
)

INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
INFO - haystack.nodes.retriever.dense -  Init retriever using embeddings of model sentence-transformers/all-MiniLM-L6-v2
INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find sentence-transformers/all-MiniLM-L6-v2 locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Loaded sentence-transformers/all-MiniLM-L6-v2
INFO - haystack.modeling.data_handler.processor -  Initialized processor without tasks. Supply `metric` and `label_list` to the constructor for using the default task or add a custom task later via processor.add_task()
INFO - haystack.modeling.logger -  ML Logging is turned off. No pa

In [9]:
df = pd.DataFrame(columns=['topic','question','answer','link'])
topic = []
question = []
answer = []
link = []

for topic_key in data.keys():
    for question_key in data[topic_key].keys():
        question.append(data[topic_key][question_key]['question'])
        answer.append(data[topic_key][question_key]['answer'])
        link.append(data[topic_key][question_key]['question_link'])
        topic.append(topic_key)

df['topic'] = topic
df['question'] = question
df['answer'] = answer
df['link'] = link

In [10]:
df.to_csv('faq.csv', sep=',', encoding='utf-8', index=False)

In [11]:
# Get dataframe with columns "question", "answer" and some custom metadata
df = pd.read_csv("faq.csv")
print(df.head())

# Get embeddings for our questions from the FAQs
questions = list(df["question"].values)
df["question_emb"] = retriever.embed_queries(texts=questions)
df = df.rename(columns={"question": "content"})

# Convert Dataframe to list of dicts and index them in our DocumentStore
docs_to_index = df.to_dict(orient="records")
document_store.write_documents(docs_to_index)

                 topic  \
0    advanced-standing   
1          application   
2          application   
3       broaderimpacts   
4  cci-research-office   

                                                                  question  \
0          What's different about transfer credits from Advanced Standing?   
1  I am having a lot of problems with my application.  Whom can I contact?   
2   I'm interested in applying to CIS PhD program.  What should I do next?   
3                What are some activities to consider for broader impacts?   
4             I need help with my external funding.  Who should I contact?   

                                                                            answer  \
0  \n<p>With advanced standing,</p><ul><li>You must have a master's degree in C...   
1  \n<p>For all application questions, please</p><ol><li>For the fastest respon...   
2  \n<p>Admission is competitive. Preference is given to applicants with strong...   
3  \n<p>Refer to <a class="ext

Inferencing Samples: 100%|██████████| 3/3 [00:20<00:00,  6.81s/ Batches]


In [12]:
from haystack.pipelines import FAQPipeline

pipe = FAQPipeline(retriever=retriever)

In [13]:
from haystack.utils import print_answers

prediction = pipe.run(query="What's different about transfer credits and advanced standing?", params={"Retriever":{"top_k":3}})
print_answers(prediction, details='medium')

Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  3.48 Batches/s]



Query: What's different about transfer credits and advanced standing?
Answers:
[   {   'answer': '\n'
                  '<p>With advanced standing,</p><ul><li>You must have a '
                  "master's degree in Computer Science, Software Information "
                  'Systems, or Computer Engineering.</li><li>Advanced standing '
                  'applies only to the Computer Science or Software '
                  'Information Systems Track only.</li><li>You cannot transfer '
                  'any credits</li><li>However, you only have 42 credits left '
                  'to go on your degree.\xa0 Which is 30 credits less than the '
                  'normal 72 credits</li><li>Other than declaring the advanced '
                  'standing during the application process, no other actions '
                  'are needed.</li><li>You must complete your qualifying exam '
                  'after your first year in the program.</li></ul><p>With '
                  'transfer credit

In [14]:
prediction = pipe.run(query="Who should I contact for help with external funding?", params={"Retriever":{"top_k":3}})
print_answers(prediction, details='medium')

Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  4.23 Batches/s]


Query: Who should I contact for help with external funding?
Answers:
[   {   'answer': '\n'
                  '<div class="table-wrap"><table class="relative-table '
                  'wrapped confluenceTable" style="width: '
                  '66.2512%;"><colgroup><col style="width: 31.7391%;"/><col '
                  'style="width: 68.2609%;"/></colgroup><tbody><tr><th '
                  'class="confluenceTh">Questions</th><th '
                  'class="confluenceTh">Whom to contact</th></tr><tr><td '
                  'class="confluenceTd">For spending external funds</td><td '
                  'class="confluenceTd"><p>Contact Caroline '
                  'Kennedy</p></td></tr><tr><td class="confluenceTd">For '
                  'preparing new submissions</td><td '
                  'class="confluenceTd">Contact Audrey '
                  'Callahan</td></tr></tbody></table></div>\n',
        'context': '\n'
                   '<div class="table-wrap"><table class="relative-table


