In [1]:
import os
from Classes.MetaDataCSVLoader import MetaDataCSVLoader
from Classes.OpenAIClient import OpenAIClient
from Classes.FilteringClient import FilteringClient
from Classes.PubMedClient import PubMedClient
from utils.constants import keywords, study_designs, system_template, metadata_columns
from utils.utils import create_df_fill_full_abstract, extract_keywords_from_results
import pandas as pd


In [2]:

# Process the data
# openai = OpenAIClient()
pubmed = PubMedClient()

# Retrieving results from PubMed as a df
results = pubmed.search('fever AND covid', max_results=1000)
id_list = results['IdList']
papers = pubmed.fetch_details(id_list)
df = create_df_fill_full_abstract(papers)
# Columns = 'PMID', 'Title', 'Abstract', 'Journal', 'Language', 'Year', 'Month', 'Abstract_1'

# Extracting only the PMID and Abstract
df_abstracts = pd.DataFrame(df, columns=['PMID', 'Title', 'Abstract'])
display(df_abstracts)

# check if data folder exists, if not create it
if not os.path.exists('data'):
    os.makedirs('data')

# save to csv
df_abstracts.to_csv('data/pubmedabstracts.csv', index=False)


metadata_columns = ['PMID', 'Title', 'Abstract']
# load the csv into a list of documents
loader = MetaDataCSVLoader(file_path="data/pubmedabstracts.csv", 
                           metadata_columns=metadata_columns, 
                           encoding="utf-8")
bg_data = loader.load()

print(len(bg_data))
print(type(bg_data[0]))
# Create an instance of the FilteringClient using the data
openai = FilteringClient(bg_data)



Unnamed: 0,PMID,Title,Abstract
0,33222680,COVID-19 Outbreak: Neurological Manifestations...,[COVID-19 is one of the most disastrous respir...
1,34117360,Diagnosis of COVID-19 in children guided by la...,"Of all the suspected infected, 2596 tested neg..."
2,33797605,A systematic review of pregnant women with COV...,"[In December 2019, a novel coronavirus disease..."
3,37399831,Associations of COVID-19 symptoms with omicron...,[Previous SARS-CoV-2 infection and vaccination...
4,35545266,"Unresolving fever, headache, cough, and negati...",No Abstract
...,...,...,...
995,35135793,Multisystem inflammatory syndrome in an adult ...,[Kawasaki-like multisystem inflammatory syndro...
996,34020435,RT-PCR diagnosis of COVID-19 from exhaled brea...,[Current diagnostic testing for coronavirus di...
997,33929003,An update on Sars-CoV-2: a review.,[Sars-CoV-2 is a new global health challenge t...
998,33132333,Clinical and Microbiological Features of Asymp...,[Objective To describe the clinical features a...


1000
<class 'langchain_core.documents.base.Document'>


In [3]:
openai.load_db()

Loading DB


<langchain_community.vectorstores.chroma.Chroma at 0x137c289b0>

In [5]:
query_los = "What are the outcomes of covid?"

retriever = openai.get_retriever()
retriever.invoke(query_los)

[Document(page_content='caused over 100,000 deaths in the United States, thus far. The decision to admit a patient must balance the risks of transmission with the benefit of being readily available to provide urgent supportive care should the patient develop complications. Thus, there is a significant benefit to being able to predict poor outcomes. We performed a targeted review of the literature, focusing on clinical and laboratory predictors of poor outcomes in COVID-19. Our case report and narrative review outline', metadata={'Abstract': "[StringElement('In late December 2019, the coronavirus 2 (SARS-CoV-2) emerged in Wuhan, China. It quickly spread and emerged as a global pandemic with far-reaching impacts on society. As clinical research on this novel virus emerges, there is a limited amount of data that review clinical and laboratory predictors of severe disease. We present a case of a patient with severely elevated inflammatory markers who remained clinically stable during his h

In [None]:
#result["source_documents"]

In [None]:
query_los = "What is the treatment for covid?"

result = openai.print_result(query_los, system_template)
print(result["source_documents"])

In [None]:
for doc in result["source_documents"]:
    print(doc.metadata)
    print(doc.page_content)

    pmid = doc.metadata["PMID"]
    link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
    print(link)
    print("\n\n")

In [None]:
query_los = "Wwhat is the effect of covid on the reproductive system?"

result = openai.print_result(query_los, system_template)
print(result["source_documents"])

In [None]:
for doc in result["source_documents"]:
    print(doc.metadata)
    print(doc.page_content)
    print("\n\n")

In [None]:
client = PubMedClient()
results = client.search('fever AND covid', max_results=10)
id_list = results['IdList']
papers = client.fetch_details(id_list)
print(papers)
df = create_df_fill_full_abstract(papers)
display(df)

extract_abstract = extract_keywords_from_results(df, keywords, study_designs)
display(extract_abstract)

In [None]:
papers

In [None]:
extract_abstract.columns

In [None]:
pmid = papers["PubmedArticle"][0]["MedlineCitation"]["PMID"]

In [None]:
str(pmid)

In [None]:
link = f"https://pubmed.ncbi.nlm.nih.gov/{str(pmid)}"