In [1]:
import os
import sys

In [2]:
IN_COLAB='google.colab' in sys.modules

In [3]:
if IN_COLAB:
    from google.colab import drive

    WORKING_FOLDER="/content/drive/MyDrive/unicamp/ia368v_dd/aula_08"

    drive.mount('/content/drive', force_remount=True)

    os.chdir(WORKING_FOLDER)
    
    !python3.8 -m pip install openai -q

In [4]:
import pandas as pd
import pickle
import numpy as np

import json

import time

import re

import openai

In [5]:
TREC_COVID_MERGED_FILE="trec_covid_merged_data.tsv"
TREC_COVID_DOCUMENTS_FILE="trec_covid_original_title_text_merged.tsv"

TREC_COVID_QRELS="trec_covid_qrels.tsv"

In [6]:
API_KEYS_FILE="../api_keys_20230324.json"

In [7]:
pd.set_option('display.max_colwidth', None)

## Set the random seed

In [8]:
RANDOM_SEED = 6

In [9]:
rng = np.random.default_rng(RANDOM_SEED)

## Explore TREC COVID documents with questions

In [10]:
merged_df = pd.read_csv(TREC_COVID_MERGED_FILE, sep="\t")

In [11]:
merged_df

Unnamed: 0,query-id,corpus-id,score,query-text,corpus-title,corpus-text,query-metadata,corpus-metadata
0,1,005b2j4b,2,what is the origin of COVID-19,Monophyletic Relationship between Severe Acute Respiratory Syndrome Coronavirus and Group 2 Coronaviruses,"Although primary genomic analysis has revealed that severe acute respiratory syndrome coronavirus (SARS CoV) is a new type of coronavirus, the different protein trees published in previous reports have provided no conclusive evidence indicating the phylogenetic position of SARS CoV. To clarify the phylogenetic relationship between SARS CoV and other coronaviruses, we compiled a large data set composed of 7 concatenated protein sequences and performed comprehensive analyses, using the maximum-likelihood, Bayesian-inference, and maximum-parsimony methods. All resulting phylogenetic trees displayed an identical topology and supported the hypothesis that the relationship between SARS CoV and group 2 CoVs is monophyletic. Relationships among all major groups were well resolved and were supported by all statistical analyses.","{'query': 'coronavirus origin', 'narrative': ""seeking range of information about the SARS-CoV-2 virus's origin, including its evolution, animal source, and first transmission into humans""}","{'url': 'https://www.ncbi.nlm.nih.gov/pubmed/15116304/', 'pubmed_id': '15116304'}"
1,16,005b2j4b,0,how long does coronavirus remain stable on surfaces?,Monophyletic Relationship between Severe Acute Respiratory Syndrome Coronavirus and Group 2 Coronaviruses,"Although primary genomic analysis has revealed that severe acute respiratory syndrome coronavirus (SARS CoV) is a new type of coronavirus, the different protein trees published in previous reports have provided no conclusive evidence indicating the phylogenetic position of SARS CoV. To clarify the phylogenetic relationship between SARS CoV and other coronaviruses, we compiled a large data set composed of 7 concatenated protein sequences and performed comprehensive analyses, using the maximum-likelihood, Bayesian-inference, and maximum-parsimony methods. All resulting phylogenetic trees displayed an identical topology and supported the hypothesis that the relationship between SARS CoV and group 2 CoVs is monophyletic. Relationships among all major groups were well resolved and were supported by all statistical analyses.","{'query': 'how long does coronavirus survive on surfaces', 'narrative': 'Studies of time SARS-CoV-2 remains stable after being deposited from an infected person on everyday surfaces in a household or hospital setting, such as through coughing or touching objects.'}","{'url': 'https://www.ncbi.nlm.nih.gov/pubmed/15116304/', 'pubmed_id': '15116304'}"
2,32,005b2j4b,0,"Does SARS-CoV-2 have any subtypes, and if so what are they?",Monophyletic Relationship between Severe Acute Respiratory Syndrome Coronavirus and Group 2 Coronaviruses,"Although primary genomic analysis has revealed that severe acute respiratory syndrome coronavirus (SARS CoV) is a new type of coronavirus, the different protein trees published in previous reports have provided no conclusive evidence indicating the phylogenetic position of SARS CoV. To clarify the phylogenetic relationship between SARS CoV and other coronaviruses, we compiled a large data set composed of 7 concatenated protein sequences and performed comprehensive analyses, using the maximum-likelihood, Bayesian-inference, and maximum-parsimony methods. All resulting phylogenetic trees displayed an identical topology and supported the hypothesis that the relationship between SARS CoV and group 2 CoVs is monophyletic. Relationships among all major groups were well resolved and were supported by all statistical analyses.","{'query': 'coronavirus subtypes', 'narrative': 'Papers that discuss subtypes of the virus, from named subtypes to speculative subtypes based on genomic or geographic clustering.'}","{'url': 'https://www.ncbi.nlm.nih.gov/pubmed/15116304/', 'pubmed_id': '15116304'}"
3,37,005b2j4b,0,What is the result of phylogenetic analysis of SARS-CoV-2 genome sequence?,Monophyletic Relationship between Severe Acute Respiratory Syndrome Coronavirus and Group 2 Coronaviruses,"Although primary genomic analysis has revealed that severe acute respiratory syndrome coronavirus (SARS CoV) is a new type of coronavirus, the different protein trees published in previous reports have provided no conclusive evidence indicating the phylogenetic position of SARS CoV. To clarify the phylogenetic relationship between SARS CoV and other coronaviruses, we compiled a large data set composed of 7 concatenated protein sequences and performed comprehensive analyses, using the maximum-likelihood, Bayesian-inference, and maximum-parsimony methods. All resulting phylogenetic trees displayed an identical topology and supported the hypothesis that the relationship between SARS CoV and group 2 CoVs is monophyletic. Relationships among all major groups were well resolved and were supported by all statistical analyses.","{'query': 'SARS-CoV-2 phylogenetic analysis', 'narrative': 'Looking for a range of studies which provide the results of phylogenetic network analysis on the SARS-CoV-2 genome'}","{'url': 'https://www.ncbi.nlm.nih.gov/pubmed/15116304/', 'pubmed_id': '15116304'}"
4,1,00fmeepz,1,what is the origin of COVID-19,Comprehensive overview of COVID-19 based on current evidence,"In December 2019, twenty-seven pneumonia patients with unknown causes originated in South China seafood market in Wuhan. The virus infection spread rapidly and swept through China in less than a month. Subsequently, the virus was proven a novel coronavirus and named SARS-CoV-2. The outbreak of novel coronavirus has been determined as a Public Health Emergency of International Concern (PHEIC) by WHO on January 31, 2020. Similar to other coronaviruses like the Middle East Respiratory Syndrome (MERS) CoV and Severe Acute Respiratory Syndrome (SARS) CoV, the novel coronavirus was reported to spread via respiratory droplets and close contact from human to human, which means the virus is highly infectious and dangerous. Unfortunately, till now the virus has spread to over 200 countries/territories/areas around the world and the Coronavirus Disease 2019 (COVID-19) outbreak is continuing to grow. Currently, information sharing and transparency are essential for risk assessment and epidemic control in all endemic areas. In this article, we compared SARS-CoV-2 with SARS-CoV and influenza virus, discussed current researching progress of COVID-19, including clinical characteristics, pathological changes, treatment measures, and so on.","{'query': 'coronavirus origin', 'narrative': ""seeking range of information about the SARS-CoV-2 virus's origin, including its evolution, animal source, and first transmission into humans""}","{'url': '', 'pubmed_id': ''}"
...,...,...,...,...,...,...,...,...
66331,50,zn10rnrm,1,what is known about an mRNA vaccine for the SARS-CoV-2 virus?,Characterization of RNA in Saliva,"Background: We have previously shown that human mRNAs are present in saliva and can be used as biomarkers of oral cancer. In this study, we analyzed the integrity, sources, and stability of salivary RNA. Methods: We measured the integrity of salivary RNA with reverse transcription followed by PCR (RT-PCR) or RT-quantitative PCR (RT-qPCR). To study RNA entry sites into the oral cavity, we used RT-PCR analysis of salivary RNA from the 3 major salivary glands, gingival crevice fluid, and desquamated oral epithelial cells. We measured stability of the salivary β-actin mRNA by RT-qPCR of salivary RNA incubated at room temperature for different periods of time. We measured RNA association with other macromolecules by filtering saliva through pores of different sizes before performing RT-qPCR. To assess RNA–macromolecule interaction, we incubated saliva with Triton X-100 for different periods of time before performing RT-qPCR. Results: In most cases, we detected partial- to full-length salivary mRNAs and smaller amounts of middle and 3′ gene amplicons compared with the 5′. RNA was present in all oral fluids examined. Endogenous salivary β-actin mRNA degraded more slowly than exogenous β-actin mRNA, with half-lives of 12.2 and 0.4 min, respectively (P <0.001). Salivary RNA could not pass through 0.22 or 0.45 μm pores. Incubation of saliva with Triton X-100 accelerated degradation of salivary RNA. Conclusions: Saliva harbors both full-length and partially degraded forms of mRNA. RNA enters the oral cavity from different sources, and association with macromolecules may protect salivary RNA from degradation.","{'query': 'mRNA vaccine coronavirus', 'narrative': 'Looking for studies specifically focusing on mRNA vaccines for COVID-19, including how mRNA vaccines work, why they are promising, and any results from actual clinical studies.'}","{'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7108156/', 'pubmed_id': '16601067'}"
66332,50,zstmdt4n,0,what is known about an mRNA vaccine for the SARS-CoV-2 virus?,Coordinate induction of IFN-α and -γ by SARS-CoV also in the absence of virus replication,"Abstract Background: Severe acute respiratory syndrome (SARS) is an emerging infection caused by a novel coronavirus known as SARS-CoV, characterized by an over-exuberant immune response with lung lymphomononuclear cells infiltration and proliferation that may account for tissue damage more than the direct effect of viral replication. This study is aimed at investigating the capability of SARS-CoV to activate IFN-α and -γ expression in lymphomonocytes (PBMC) from healthy donors, evaluating whether viral replication is necessary for this activation. Results: SARS-CoV virus is able to induce both IFN-α and -γ mRNA accumulation and protein release in a dose-dependent manner, MOI 10 being the most effective. The time course curve indicated that IFN-α mRNA induction peaked at 24 h.p.i,. whereas IFN-γ mRNA was still increasing at 48 h.p.i. Released IFN (both types) reached a plateau after 24–48 h.p.i. and remained rather stable over a 5-day period. A transient peak of negative strand viral RNA was detected after 1–2 days of infection, but neither infectious virus progeny yield nor newly produced viral genomic RNA could be evidenced in infected cultures, even after prolonged observation time (up to 13 days). Cocultivation of PBMC with fixed SARS-CoV-infected Vero cells was even more efficient than exposure to live virus in eliciting IFN-α and -γ induction. A combination of IFN-α and -γ strongly inhibited SARS-CoV replication in Vero cells, while the single cytokines were much less effective. Conclusions: This study provides evidence that SARS-CoV is able to induce in normal PBMC a coordinate induction of IFN-α and -γ gene expression. Virus replication is not necessary for IFN induction since efficient IFN expression could be obtained also by the cocultivation of normal PBMC with fixed SARS-CoV-infected cells. Concomitant activation of IFN-α and -γ gene expression by SARS-CoV in vivo may be relevant for the pathogenesis of the disease, both for the possible involvement in immunomediated damage of the tissues and for the strong inhibition of SARS-CoV replication as a result of combined cytokine action.","{'query': 'mRNA vaccine coronavirus', 'narrative': 'Looking for studies specifically focusing on mRNA vaccines for COVID-19, including how mRNA vaccines work, why they are promising, and any results from actual clinical studies.'}","{'url': 'https://api.elsevier.com/content/article/pii/S0042682205004241; https://www.sciencedirect.com/science/article/pii/S0042682205004241; https://www.ncbi.nlm.nih.gov/pubmed/16095648/', 'pubmed_id': '16095648'}"
66333,50,zth8ffy3,0,what is known about an mRNA vaccine for the SARS-CoV-2 virus?,Vasculopathy and Coagulopathy Associated with SARS-CoV-2 Infection,"The emergence of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2), the causative agent of coronavirus disease 2019 (COVID-19), has resulted in > 500,000 deaths worldwide, including > 125,000 deaths in the U.S. since its emergence in late December 2019 and June 2020. Neither curative anti-viral drugs nor a protective vaccine is currently available for the treatment and prevention of COVID-19. Recently, new clinical syndromes associated with coagulopathy and vasculopathy have emerged as a cause of sudden death and other serious clinical manifestations in younger patients infected with SARS-CoV-2 infection. Angiotensin converting enzyme 2 (ACE2), the receptor for SARS-CoV-2 and other coronaviruses, is a transmembrane protein expressed by lung alveolar epithelial cells, enterocytes, and vascular endothelial cells, whose physiologic role is to induce the maturation of angiotensin I to generate angiotensin 1-7, a peptide hormone that controls vasoconstriction and blood pressure. In this review, we provide the general context of the molecular and cellular mechanisms of SARS-CoV-2 infection with a focus on endothelial cells, describe the vasculopathy and coagulopathy syndromes in patients with SARS-CoV-2, and outline current understanding of the underlying mechanistic aspects.","{'query': 'mRNA vaccine coronavirus', 'narrative': 'Looking for studies specifically focusing on mRNA vaccines for COVID-19, including how mRNA vaccines work, why they are promising, and any results from actual clinical studies.'}","{'url': '', 'pubmed_id': ''}"
66334,50,zv4nbz9p,2,what is known about an mRNA vaccine for the SARS-CoV-2 virus?,"Emerging Technologies for Use in the Study, Diagnosis, and Treatment of Patients with COVID-19","INTRODUCTION: The COVID-19 pandemic has caused an unprecedented health and economic worldwide crisis. Innovative solutions are imperative given limited resources and immediate need for medical supplies, healthcare support and treatments. AIM: The purpose of this review is to summarize emerging technologies being implemented in the study, diagnosis, and treatment of COVID-19. RESULTS: Key focus areas include the applications of artificial intelligence, the use of Big Data and Internet of Things, the importance of mathematical modeling for predictions, utilization of technology for community screening, the use of nanotechnology for treatment and vaccine development, the utility of telemedicine, the implementation of 3D-printing to manage new demands and the potential of robotics. CONCLUSION: The review concludes by highlighting the need for collaboration in the scientific community with open sharing of knowledge, tools, and expertise.","{'query': 'mRNA vaccine coronavirus', 'narrative': 'Looking for studies specifically focusing on mRNA vaccines for COVID-19, including how mRNA vaccines work, why they are promising, and any results from actual clinical studies.'}","{'url': 'https://doi.org/10.1007/s12195-020-00629-w', 'pubmed_id': ''}"


In [12]:
docs_with_questions = merged_df['corpus-id'].unique()

In [13]:
docs_with_questions.shape

(35480,)

In [14]:
which_docs = rng.choice(docs_with_questions, 100, replace=False)

In [15]:
which_docs

array(['70hskj1o', '7a3wdduq', 'd4yidznm', 'kjdcg8nz', 'vpomgedg',
       't3y1w9ef', 'jud53dmv', '6n0ce55n', 'gfyup5aj', 'c813ttt2',
       'vsswxwdi', 'jkqg1qal', 'm8m4il3j', 'k7bh8bf7', 'g12ln2nf',
       'nb9rljzu', '9hgtvm6d', '1z6l12ks', '2ma564ej', '3xw4qjoy',
       'ajlq8s34', 'mz1bof2x', '8yablc7b', 'hus02944', '34m7y2l1',
       'hjzlj8k3', '3gmb3kqd', '3f5h5e0e', '0y53hnve', '7ums36c9',
       'ydtrb6wh', 'q8pagn56', 't4os33em', '5wsj003j', '4g85h6ta',
       'h6gb99fw', 'gxtgttas', 'mm4ngrla', 'fsrdu4tq', 'tmpidjrp',
       '08zf7161', 'fex8sd1t', 'b8f4a7o3', 'wz5pgoq4', 'kdudslre',
       'gjkdm90a', 'kzaowysv', 'cw3jkf3x', '6zfmjq9p', 'kzavc4ez',
       'hyoyjpbd', 'k65501xp', 'dvgqouk2', '6q0y3ewu', '9pb2eqoa',
       'xv3k0irk', 'fen2yodv', 'tdvb0fhv', 'gu5vrd2v', 'yfjlbyn4',
       'ah9cnwzw', 'gf9wripj', '4mx9t5td', 'oz4mvyw8', '8ta7o7fe',
       '7mcfehzc', 't0chpsuh', 'igxdatq1', 'xfi9lzy7', 'k5fh5ujf',
       '15c85zi4', 'zghh0zbd', 'dhwwhsgb', 'au3kcait', 'mh30oc

In [16]:
merged_df[merged_df['corpus-id'] == which_docs[0]]

Unnamed: 0,query-id,corpus-id,score,query-text,corpus-title,corpus-text,query-metadata,corpus-metadata
16685,4,70hskj1o,0,what causes death from Covid-19?,Chatbots in the fight against the COVID-19 pandemic,"We are all together in a fight against the COVID-19 pandemic. Chatbots, if effectively designed and deployed, could help us by sharing up-to-date information quickly, encouraging desired health impacting behaviors, and lessening the psychological damage caused by fear and isolation. Despite this potential, the risk of amplifying misinformation and the lack of prior effectiveness research is cause for concern. Immediate collaborations between healthcare workers, companies, academics and governments are merited and may aid future pandemic preparedness efforts.","{'query': 'how do people die from the coronavirus', 'narrative': 'Studies looking at mechanisms of death from Covid-19.'}","{'url': 'https://www.ncbi.nlm.nih.gov/pubmed/32377576/; https://doi.org/10.1038/s41746-020-0280-0', 'pubmed_id': '32377576'}"
16686,26,70hskj1o,0,what are the initial symptoms of Covid-19?,Chatbots in the fight against the COVID-19 pandemic,"We are all together in a fight against the COVID-19 pandemic. Chatbots, if effectively designed and deployed, could help us by sharing up-to-date information quickly, encouraging desired health impacting behaviors, and lessening the psychological damage caused by fear and isolation. Despite this potential, the risk of amplifying misinformation and the lack of prior effectiveness research is cause for concern. Immediate collaborations between healthcare workers, companies, academics and governments are merited and may aid future pandemic preparedness efforts.","{'query': 'coronavirus early symptoms', 'narrative': 'Studies of patients and the first clinical manifestations they develop upon active infection?'}","{'url': 'https://www.ncbi.nlm.nih.gov/pubmed/32377576/; https://doi.org/10.1038/s41746-020-0280-0', 'pubmed_id': '32377576'}"


## Generate some questions using OpenAI

In [26]:
OPENAI_API_QUERY_PARAMS={"model": None,
                         "prompt": None,
                         "temperature": 0.7,
                         "max_tokens": 512,
                         "top_p": 1.0,
                         "frequency_penalty": 0,
                         "presence_penalty": 0}

In [27]:
ZERO_SHOT_TEMPLATE="Considering the text in quotes below:\n\n\"{}\"\n\nWhich questions can you drawn about non-trivial conclusions from the text? Please write complete and independent questions, without any implicit references to other questions or to the text."

In [28]:
OPENAI_RESPONSE_REGEX="[\n\r]*[0-9\.]+\s+(.+)[\n\r]?"

In [29]:
with open(API_KEYS_FILE) as inputFile:
    api_keys = json.load(inputFile)

In [30]:
openai.api_key = api_keys['OPENAI_API_KEY']

In [31]:
request_params = OPENAI_API_QUERY_PARAMS
request_params['model'] = 'text-davinci-003'

In [32]:
new_dataset = []

for document_id in which_docs:
    
    doc_text = merged_df[merged_df['corpus-id'] == document_id]['corpus-text'].to_numpy()[0]
    
    if doc_text is not np.nan:
        request_params['prompt'] = ZERO_SHOT_TEMPLATE.format(doc_text)

        print(request_params['prompt'])
        
        request_start_time = time.time()

        response = openai.Completion.create(**request_params)

        elapsed_time = time.time() - request_start_time
        
        print("-- Request time: {}".format(elapsed_time))
        
        print(response['choices'][0]['text'])
        
        m = re.finditer(OPENAI_RESPONSE_REGEX, response['choices'][0]['text'])
        
        questions = [match.group(1) for match in m]
        
        for question in questions:
            new_dataset.append({'query': question,
                                'positive_doc_id': document_id,
                                'negative_doc_ids': []})      

    else:
        print("Document {} has empty text...".format(document_id))

-- Request time: 4.114557504653931


1. How can chatbots be designed to effectively share up-to-date information during a pandemic? 
2. What strategies can be used to encourage desired health impacting behaviors through chatbots?
3. What are the risks associated with amplifying misinformation with chatbot technology?
4. What research has been conducted on the effectiveness of chatbots during pandemics?
5. How can collaborations between healthcare workers, companies, academics and governments help prepare for future pandemics?
-- Request time: 9.281001567840576


1. What is the cost of a single TriSilix chip?
2. What is the precision of the temperature cycling capability of TriSilix?
3. How many tests can a TriSilix device perform with a 4000 mAh battery?
4. What is the limit of detection of TriSilix when used to detect M. avium subsp. paratuberculosis?
5. What is the limit of detection of TriSilix when used to detect SARS-CoV-2?
6. How long does it take to produce a single TriSilix chi

-- Request time: 4.358523607254028


1. How does the proposed mathematical model predict the dynamics of COVID-19 in India?
2. What are the effects of quarantining on the basic reproduction number of SARS-CoV-2?
3. What is the estimated inflection point and ending phase of SARS-CoV-2 in India?
4. What are the factors that contribute to the effectiveness of restrictive social distancing and contact tracing?
5. Is it possible to eliminate the SARS-CoV-2 pandemic in India?
-- Request time: 5.817364692687988


1. What are the potential tissue targets of SARS-CoV-2 in the central nervous system?
2. What are the possible routes of entry of SARS-CoV-2 into the central nervous system?
3. What types of clinical neurological complications have been reported in COVID-19 patients?
4. How can recognition and understanding of neurological disorders associated with COVID-19 lead to improved clinical outcomes?
5. What kind of neuropathological studies are necessary to understand the pathogenesis of CO

-- Request time: 3.710998773574829


1.What countries advocated wearing face masks in public in relation to the SARS CoV2 epidemic?
2.How did the national promotion of face masks in public correlate to the number of COVID19 cases per inhabitant?
3.What are the potential effects of testing intensity on the COVID19 cases per inhabitant in a country?
4.What evidence is there to suggest that face mask usage reduces the transmission and acquisition of respiratory viral infections?
-- Request time: 4.206372499465942


1. What tissue systems have been found to contain an autologous renin-angiotensin system?
2. What effects does angiotensin II have on tissue fibrosis?
3. How does the angiotensin type 2 receptor (AT(2)) affect fibrosis in animal models?
4. What clinical reports suggest a beneficial role for modulation of angiotensin II signaling in cutaneous scarring? 
5. What therapies are currently available to target the angiotensin system?
-- Request time: 4.82649040222168


1. What are the

-- Request time: 4.145026683807373


1. What advances have been made in understanding the proteome content of various biofluids?
2. What technologies are being used to analyze biofluids for biomarkers?
3. What are the potential benefits of identifying novel disease-specific biomarkers?
4. What are some of the challenges associated with using noninvasively collected samples for biomarker discovery?
5. How have proteomic approaches been used to find novel biomarkers in serum, plasma, and lymph?
-- Request time: 3.47428560256958


1. What is the relationship between ACEI/ARB therapy and severity of COVID-19 pneumonia?
2. Does the use of chloroquine reduce the risk of developing severe COVID-19 pneumonia?
3. Does the presence of comorbidities increase the risk of developing severe COVID-19 pneumonia?
4. What is the effect of age on the risk of developing severe COVID-19 pneumonia?
-- Request time: 4.690702438354492


1. What is the impact of changes in the behavior of the SIR model on the 

-- Request time: 3.013779401779175


1. What are the potential outcomes of an immune response directed against carbohydrate epitopes?
2. How do carbohydrate epitopes differ in terms of their structure and chemical context?
3. What cellular mechanisms underpin carbohydrate immunity?
4. What new technologies are being developed to facilitate carbohydrate vaccine development?
5. How does the immune system interact with self and nonself glycans?
-- Request time: 5.19819188117981


1. What are the most important elements of the French Public Agency of Health's data regarding the spread of COVID-19?
2. How does the mathematical model account for the impact of confinement measures on the spread of the epidemics?
3. What scenarios are being considered to exit confinement, and how do they impact the spread of the disease?
4. How accurate are the predictions of the mathematical model?
5. What are the implications of the model's predictions on the re-emergence of the epidemics?
Document xfi9lzy7 

-- Request time: 5.768783092498779


1. What resources are available to further investigate the racial associations between African-Americans and COVID-19?
2. What is the correlation between the percentage of Asian-Americans living in counties and the percentage of COVID-19 confirmed cases and deaths?
3. Does the percentage of Whites living in a county correlate to COVID-19 confirmed cases and deaths?
4. What is the relationship between the percentage of African-Americans living in a county and the percentage of COVID-19 confirmed cases and deaths?
5. Are there any other racial groups that show correlations between their percentages living in counties and the percentage of COVID-19 confirmed cases and deaths?
-- Request time: 3.7350807189941406


1. How did Milton portray Pandæmonium in Paradise Lost?
2. How did Enrico Fermi estimate the strength of the Trinity nuclear bomb?
3. How has the Covid-19 pandemic spread from Wuhan, China?
4. Are there any datasets that can help us understand

In [33]:
pd.DataFrame(new_dataset)

Unnamed: 0,query,positive_doc_id,negative_doc_ids
0,How can chatbots be designed to effectively share up-to-date information during a pandemic?,70hskj1o,[]
1,What strategies can be used to encourage desired health impacting behaviors through chatbots?,70hskj1o,[]
2,What are the risks associated with amplifying misinformation with chatbot technology?,70hskj1o,[]
3,What research has been conducted on the effectiveness of chatbots during pandemics?,70hskj1o,[]
4,"How can collaborations between healthcare workers, companies, academics and governments help prepare for future pandemics?",70hskj1o,[]
...,...,...,...
458,What are some common symptoms of MERS-CoV infection?,24lzevco,[]
459,Are there any existing effective anti-MERS-CoV antiviral agents or therapeutics?,24lzevco,[]
460,What are some potential Host-Directed Therapies for MERS-CoV infected patients?,24lzevco,[]
461,Could Host-Directed Therapies improve treatment outcomes for patients with MERS-CoV infection?,24lzevco,[]


In [35]:
with open("eduseiti_100_queries_expansion_20230428_01.jsonl", "w") as outputFile:
    for i, query in enumerate(new_dataset):
        if i > 0:
            outputFile.write("\n")
            
        json.dump(query, outputFile)