# This notebook creates json documents for indexing in pyserini

In [1]:
import pandas as pd

In [2]:
xls = pd.ExcelFile('Q&A BC Immunize vaccine safety 3 topics ISA.xlsx')

df0 = pd.read_excel(xls)
df0 = df0.dropna()

In [3]:
all_labels = []
for x in df0['Categories'].unique():
    all_labels.extend(y.strip() for y in x.split(','))

labels = sorted(list(set(all_labels)))
print(labels)

label_dict = {}
for idx, x in enumerate(labels):
    label_dict[x]=idx

['Adults', 'Chickenpox vaccines', 'Diphtheria', 'General immunization', 'HPV vaccines', 'Hepatitis A vaccine', 'Hepatitis B vaccine', 'Immunization records', 'Infants & young children', 'Influenza vaccines', 'Measles', 'Meningococcal vaccines', 'Other', 'Pneumococcal vaccines', 'Pregnancy', 'Rotavirus Vaccines', 'School-age children & teens', 'Shingles vaccines', 'TB skin test', 'Travel vaccines', 'Vaccine Schedules', 'Vaccine ingredients', 'Vaccine safety', 'Where to get immunized', 'mumps and rubella vaccines', 'tetanus and pertussis vaccines']


In [4]:
import json 

path = "./vaccine_collection"
for idx, row in df0.iterrows():
    text_q = row['Question']
    text_a = row['Answer']
    label  = row['Categories']
    new_obj = {
                "text_q":text_q,
                "text_a":text_a,
                "label" : label
            }
    new_path = os.path.join(path,str(idx)+".json")
    with open(new_path, 'w') as outfile:
        json.dump(new_obj, outfile)

In [5]:
from pyserini.search import SimpleSearcher

searcher = SimpleSearcher('./indexes/vaccine_collection_jsonl')
hits = searcher.search('what is a vaccine ?')

for i in range(len(hits)):
    print(f'{i+1:2} {hits[i].docid:15} {hits[i].score:.5f} {hits[0].raw}')

1 836             1.45960 How often virus of polio, MMR and meningococcal are mutated? How long it to create a new vaccine? How old are vaccines which you used today for vaccination? I mean when was last update them, like a year of charge? What's exactly content in vaccines and what's the risque to take it? Thank you.

	It is important to get your vaccine information from reliable sources. This page provides a list of websites where you can find trustworthy, science-based information about vaccines. 

		How often virus of polio, MMR and meningococcal are mutated-It is not clear what you are asking in this question. 

		It can take 10 years or longer for a vaccine to be developed, tested, and finally approved for use by Health Canada. Read more here.

		Refer to the History of Immunization in BC for which vaccines were used at what time in BC's vaccine programs.

		Refer to Health Canada Drug Product Database to find the product monographs and  information on any changes to vaccines app

In [6]:
class LuceneSearcher:
    def __init__(self, path=None):
        if path:
            self.searcher = SimpleSearcher(path)
        else:
            self.searcher = SimpleSearcher('./indexes/vaccine_collection_jsonl')
    
    def print_hits(self, query, k=5):
        hits = searcher.search(query, k=k)
        for i in range(len(hits)):
            print(f'{i+1:2} {query} \n {hits[i].score:.5f} {hits[i].raw}')
    
    def return_top_hits(self, query, k=5):
        hits = searcher.search(query, k=k)
        
        ret = []
        for i in range(len(hits)):
            ret.append([query, hits[i].score, hits[i].raw])

        return ret

In [7]:
srch = LuceneSearcher()

srch.print_hits("what vaccines are safe for me if i am pregnant ?")

1 what vaccines are safe for me if i am pregnant ? 
 7.00290 Is it safe to immunize my 15 month old child with MMR while I am pregnant?
Immunization of household contacts with the MMR vaccine is safe for pregnant women.  Vaccine viruses in the MMR vaccine are not transmitted to contacts so the vaccine does not pose a risk to a pregnant household member.  The MMR vaccine should be administered to children and other household contacts of pregnant women as recommended.  Ensuring that children and other close contacts are up-to-date with their immunizations can help protect the health of the pregnant woman and her baby.  

 2 what vaccines are safe for me if i am pregnant ? 
 6.11810 I am 25 yrs old. My doctor advised me to get Rubella Vaccine before I plan to get pregnant. Where can I go to get Rubella Vaccine for free of cost in Surrey BC.
The rubella vaccine is combined with the measles and mumps vaccine (the MMR vaccine).  You can get the MMR vaccine for free from your local public hea

In [8]:
xls = pd.ExcelFile("./chatbot_data.xlsx")

df = pd.read_excel(xls).dropna()

new_array = []
for idx, row in df.iterrows():
    text_q = row['Question']
    new_questions = text_q.split('|')
    for x in new_questions:
        srch.print_hits(x,k=2)
        print('-'*80)
    print('*'*80)

 and this year.Should him still get his second dose of Chickenpox vaccine in grade 6?
By herpes on the face, we assume that you mean cold sores. The virus that causes chickenpox (the varicella zoster virus) is a virus of the herpes family but is not the same virus that causes cold sores (cold sores are caused by another virus of the herpes family, the herpes simplex virus). If a child has had cold sores, it is still recommended that they get the chickenpox (varicella) vaccine.
It is not necessary for people who have had chickenpox or shingles at 1 year of age or older to get the chickenpox vaccine.
If this does not answer your question, please clarify and resubmit your question.  You can also contact your local public health unit and ask to speak with a public health nurse. You can find the contact information for your local public health unit here. 

 2  My child has caught a cold. Should we vaccinate ?  
 6.17410 Hello my daughter has a 18 month immunization tomorrow she taking antib

In [9]:
xls = pd.ExcelFile("./chatbot_data.xlsx")

df = pd.read_excel(xls).dropna()

new_array = []
for idx, row in df.iterrows():
    text_q = row['Question']
    new_questions = text_q.split('|')
    for x in new_questions[1:]:
        new_array.extend(srch.return_top_hits(x,k=3))

In [10]:
columns = ["question", "score", "closest"]

new_df = pd.DataFrame(new_array,columns=columns)

new_df.to_csv("./text_search_results.csv")

In [51]:
import sys
sys.stdout=open("closest_preds.txt","w")

xls = pd.ExcelFile("./chatbot_data.xlsx")

df = pd.read_excel(xls).dropna()

for idx, row in df.iterrows():
    text_q = row['Question']
    new_questions = text_q.split('|')
    for x in new_questions[1:]:
        print(srch.return_top_hits(x,k=3))

sys.stdout.close()

In [None]:
lol = 10