In [1]:
import os
import pandas as pd
from langchain.docstore.document import Document

In [2]:
csv_dir = os.path.join(os.getcwd(), "data.csv")
df = pd.read_csv(csv_dir)

In [3]:
documents = []
for index, row in df.iterrows():
    doc = Document(
        page_content=row['Abstract'],
        metadata={
            'PMID': row['PMID'],
            'Date of Publication': row['Date of Publication'],
            'Title': row['Title'],
            'MeSH': row['MeSH']
        }
    )
    documents.append(doc)

In [4]:
len(documents)

929

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

In [7]:
documents = text_splitter.split_documents(documents)

In [8]:
len(documents)

3477

In [9]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma

In [10]:
persist_directory = "database"
embedding = OllamaEmbeddings(model="gemma:2b")
db = Chroma.from_documents(documents=documents, embedding=embedding, persist_directory=persist_directory)

In [23]:
query="Dengue Flavivirus"
result = db.similarity_search(query)
result

[Document(page_content='A widespread epidemic of dengue hemorrhagic fever (DHF) occurred in southern Vietnam in 1998, with 438.98 cases/100,000 population and 342 deaths. The number of DHF cases and deaths per 100,000 population increased 152.4% and 151.8%, respectively, over a 1997 epidemic. Dengue viruses were isolated from 143 patient blood samples; DEN-3 virus was identified as the predominant serotype, although a resurgence of DEN-4 was noted.', metadata={'Date of Publication': '2000 Jul-Aug', 'MeSH': "['Adult', 'Antibodies, Viral/blood', 'Dengue Virus/classification/immunology/isolation & purification', '*Disease Outbreaks', 'Humans', 'Serotyping', 'Severe Dengue/*epidemiology/mortality/virology', 'Vietnam/epidemiology']", 'PMID': 10905983, 'Title': 'Dengue epidemic in southern Vietnam, 1998.'}),
 Document(page_content='Clinical, haematologic, and demographic data were recorded. Of 210 dengue-suspected patients, 107 were confirmed. The disease manifested as Dengue Fever (62%), De

In [27]:
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate

In [26]:
llm = Ollama(model="gemma:2b")
llm

Ollama(model='gemma:2b')

In [69]:
prompt = ChatPromptTemplate.from_template(
    """You will be answering Dengue medical questions using provided contexts. The provided contexts are abstracts from medical papers related to Dengue. Think comprehensively before providing an answer. 
    
    Note: If you do not know something or cannot answer using the provided context just reply "IDK"
    
    <context>
    {context}
    </context>
    
    Question: {input}
    """
)

In [70]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [76]:
doc_chain = create_stuff_documents_chain(llm, prompt)
retriever = db.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 10}     
)
retriever

VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000017FCAE9A950>, search_type='mmr', search_kwargs={'k': 10})

In [79]:
retr_chain = create_retrieval_chain(retriever, doc_chain)

In [85]:
query = "What are the antiviral effects of Interferon-inducible transmembrane proteins 1, 2 and 3 (IFITM1, IFITM2 and IFITM3)?"
retr_chain.invoke({
    "input": query
})

{'input': 'What are the antiviral effects of Interferon-inducible transmembrane proteins 1, 2 and 3 (IFITM1, IFITM2 and IFITM3)?',
 'context': [Document(page_content='ZIKV genome biology and molecular pathogenesis. The ZIKV genome evolved rapidly from the Flavivirus genus and diverged from the members of this genus, even within the dengue virus cluster to which ZIKV belongs. Genome variations and divergences also exist among ZIKV strains/isolates. These genome divergences might account for the uniqueness of Zika disease. ZIKV infection activates not only the antiviral immune response but also the pro-inflammatory responses associated with disease symptoms.', metadata={'Date of Publication': '2017 Mar 22', 'MeSH': "['Apoptosis', 'Cell Proliferation', 'Dengue Virus/genetics/*pathogenicity', 'Evolution, Molecular', 'Genetic Variation', '*Genome, Viral', 'Humans', 'Neuroglia/immunology/pathology', 'Phylogeny', 'Zika Virus Infection/immunology/*pathology']", 'PMID': 28325921, 'Title': 'Zika