In [1]:
import os
from Classes.MetaDataCSVLoader import MetaDataCSVLoader
from Classes.FilteringClient import FilteringClient
from Classes.PubMedClient import PubMedClient
from utils.constants import keywords, study_designs, system_template, metadata_columns
from utils.utils import create_df_fill_full_abstract, extract_keywords_from_results
import pandas as pd


In [2]:

# Process the data
# openai = OpenAIClient()
pubmed = PubMedClient()

# Retrieving results from PubMed as a df
results = pubmed.search('fever AND covid', max_results=1000)
id_list = results['IdList']
papers = pubmed.fetch_details(id_list)
df = create_df_fill_full_abstract(papers)
# Columns = 'PMID', 'Title', 'Abstract', 'Journal', 'Language', 'Year', 'Month', 'Abstract_1'

# Extracting only the PMID and Abstract
df_abstracts = pd.DataFrame(df, columns=['PMID', 'Title', 'Abstract'])
display(df_abstracts)

# check if data folder exists, if not create it
if not os.path.exists('data'):
    os.makedirs('data')

# save to csv
df_abstracts.to_csv('data/pubmedabstracts.csv', index=False)


metadata_columns = ['PMID', 'Title', 'Abstract']
# load the csv into a list of documents
loader = MetaDataCSVLoader(file_path="data/pubmedabstracts.csv", 
                           metadata_columns=metadata_columns, 
                           encoding="utf-8")
bg_data = loader.load()

print(len(bg_data))
print(type(bg_data[0]))
# Create an instance of the FilteringClient using the data
filteringclient = FilteringClient(bg_data)



Unnamed: 0,PMID,Title,Abstract
0,33222680,COVID-19 Outbreak: Neurological Manifestations...,[COVID-19 is one of the most disastrous respir...
1,34117360,Diagnosis of COVID-19 in children guided by la...,"Of all the suspected infected, 2596 tested neg..."
2,33797605,A systematic review of pregnant women with COV...,"[In December 2019, a novel coronavirus disease..."
3,33231411,Fever management in COVID-19 patients.,No Abstract
4,35545266,"Unresolving fever, headache, cough, and negati...",No Abstract
...,...,...,...
995,34398733,"An exploration of parental awareness, knowledg...","The study sample consisted of 141 parents, of ..."
996,32362394,First cases of COVID-19 in heart transplantati...,No Abstract
997,35135793,Multisystem inflammatory syndrome in an adult ...,[Kawasaki-like multisystem inflammatory syndro...
998,34020435,RT-PCR diagnosis of COVID-19 from exhaled brea...,[Current diagnostic testing for coronavirus di...


1000
<class 'langchain_core.documents.base.Document'>


In [3]:
query = "What is the effect of covid on the reproductive system?"
no_of_articles = 2 
population = "covid"
intervention = "COVID-19 Vaccine"
comparison = "Placebo"
outcome = "reproductive system complications"    

output = filteringclient.rag_result(query, no_of_articles, population, intervention, comparison, outcome)

Loading DB

        ### Question: 
        What is the effect of covid on the reproductive system?

        ### Response: 
        As a language model AI, I don't have the ability to access real-time databases or internet to provide specific articles from PubMed. However, I can guide you on how to select the articles based on the given abstracts and PICO.

Since the abstracts provided are all the same, I will use it as an example:

Title: [Assuming the title is] "The Impact of SARS-CoV-2 on Semen Parameters in COVID-19 Patients"
PMID: [Assuming the PubMed ID is] 123456

Reason for Selection: This article is relevant to the research question as it investigates the effect of SARS-CoV-2 on semen parameters, which is a component of the male reproductive system. The study compares semen analyses before and after COVID-19 diagnosis, which aligns with the PICO components of 'Intervention' (COVID-19) and 'Outcome' (reproductive system complications). 

Snippet from the Article: "Clinical and h

In [4]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.prompts import PromptTemplate
from IPython.display import display, Markdown



model = ChatOpenAI(temperature=0)
retriever = filteringclient.get_retriever()


prompt = PromptTemplate(
   input_variables=["research_question", "population", "intervention", "comparison", "outcome", "no_of_articles"],
   template=
   """
    You are a researcher performing a literature review based on a research question, PICO, and a number of documents to select.
    PICO is an acronym for Population, Intervention, Comparison, and Outcome, and it's a framework used in research to structure questions and facilitate literature reviews.
    Research Question: {input}
    Population: {population}
    Intervention: {intervention}
    Comparison: {comparison}
    Outcome: {outcome}
    Number of Articles to Select: {no_of_articles}

    The dataset provided is a pre-processed dataset of PubMed articles.
    It includes the following columns:
    'PMID': the PubMed ID of the published article,
    'Title': the title of the published article,
    'Abstract': an abstract of the published article,
    ----------------
    {context}
    --------------

    Based on these details, you are required to select {no_of_articles} articles that are most relevant to the research question.
    For each of these articles, provide:
    - The title of the article
    - The PMID of the article
    - A Snippet from the abstract of the article which is most relevant to the research question
    - A brief description of the reason why you selected it


    If there are actual numbers and percentages, state it.
    For the numbers and percentages, provide a reference list stating the title and the year of publication.
    """
)

question_answer_chain = create_stuff_documents_chain(
    model, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)


In [9]:
query_los = "Share 1 document detailing COVID treatment"

retriever = openai.get_retriever()
retriever.invoke(query_los)

[Document(page_content="treatments has the potential to prevent COVID-19 and to decrease the severity of mild and moderate cases of Coronavirus. We propose heat treatments for this uncontrolled worldwide coronavirus pandemic while studies are being done to test the effectiveness of heat treatments in the prevention and treatment of COVID-19.']", metadata={'Abstract': "['COVID-19 is a new contagious disease caused by a new coronavirus known as severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). COVID-19 is a disease that has reached every continent in the world; it has overloaded the medical system worldwide and it has been declared a pandemic by the World Health Organization. Currently there is no definite treatment for COVID-19. We realize that host immunity is a critical factor in the outcome of coronavirus 2 infection. Here, however, we review the pathophysiology of the disease with a focus on searching for what we can do to combat this new disease. From this, we find that

In [None]:
for doc in result["source_documents"]:
    print(doc.metadata)
    print(doc.page_content)

    pmid = doc.metadata["PMID"]
    link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
    print(link)
    print("\n\n")

In [None]:
query_los = "Wwhat is the effect of covid on the reproductive system?"

result = openai.print_result(query_los, system_template)
print(result["source_documents"])

In [None]:
for doc in result["source_documents"]:
    print(doc.metadata)
    print(doc.page_content)
    print("\n\n")

In [None]:
client = PubMedClient()
results = client.search('fever AND covid', max_results=10)
id_list = results['IdList']
papers = client.fetch_details(id_list)
print(papers)
df = create_df_fill_full_abstract(papers)
display(df)

extract_abstract = extract_keywords_from_results(df, keywords, study_designs)
display(extract_abstract)

In [None]:
papers

In [None]:
extract_abstract.columns

In [None]:
pmid = papers["PubmedArticle"][0]["MedlineCitation"]["PMID"]

In [None]:
str(pmid)

In [None]:
link = f"https://pubmed.ncbi.nlm.nih.gov/{str(pmid)}"