In [2]:
import requests
import pandas
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.pinecone import Pinecone
from supabase.client import Client, create_client
import datetime
from tqdm import tqdm
import pinecone
import os
from dotenv import load_dotenv
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

load_dotenv()

  from tqdm.autonotebook import tqdm


True

In [3]:
index_name = "cadmir"
namespace = "cad1"
pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),
    environment=os.getenv("PINECONE_ENV")
)

In [4]:
embedding = OpenAIEmbeddings()
# supabase: Client = create_client(supabase_url=os.environ.get("SUPABASE_URL"), supabase_key=os.environ.get("SUPABASE_SERVICE_KEY"))
# store = SupabaseVectorStore(embedding=embedding, client=supabase, table_name="knowledge")
# store.similarity_search("miRNA",400)

In [5]:
index = pinecone.Index("cadmir")
pinecones = Pinecone(index, embedding.embed_query, 'text' )

In [12]:
llm = OpenAI()
cadmirIndex = Pinecone.from_existing_index("cadmir", embedding, "text", namespace="cad1")

In [13]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(), retriever=cadmirIndex.as_retriever(search_kwargs={'k':10}), return_source_documents=True)

In [14]:
qa({"query":"what mirna are associated with androgen levels "})

{'query': 'what mirna are associated with androgen levels ',
 'result': " I don't know.",
 'source_documents': [Document(page_content='was observed that in the multivariate anal‑ ysis, although plasma and tissue expression levels of mir‑718, mir‑486, mir‑130 and mir‑122 were included in the model, only the plasmid levels of miR‑486 showed a significant change [odds ratio (or) 2.467; 95% ci, 1.407‑4.327; Table iV]. The or analysis obtained with the logit model, suggested that statistically the increase of mir‑486 increased the risk of damage in the ascending aortic wall (increase of a point of plasma mir‑486 produced an increase equal to 2.467 of the risk of being lGMd or HGMd compared with the cn group).Plasma circulating levels of miR‑718 were significantly decreased in the HGd group compared with the cn and lGd groups (P<0.01; Fig. 5B), whereas mi r‑122 levels were significantly decreased in the LGMD and HGD groups compared with the cn group (P<0.001; Fig. 5B). n otably, plasma expre

In [15]:
pinecones.as_retriever()

VectorStoreRetriever(tags=['Pinecone'], metadata=None, vectorstore=<langchain.vectorstores.pinecone.Pinecone object at 0x127bd6350>, search_type='similarity', search_kwargs={})

In [28]:
question = "How does the miRNA-33 pathway work?"

In [29]:
docs = cadmirIndex.similarity_search(query=question, namespace="cad1", k=10)

In [30]:
# Define prompt
prompt_template = f"""${question} 

""" + """

Here are the information:"{text}"
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Define StuffDocumentsChain
stuff_chain = StuffDocumentsChain(
    llm_chain=llm_chain, document_variable_name="text"
)

print(stuff_chain.run(docs))

The miRNA-33 pathway is involved in regulating autophagy, lysosomal function, and lipid homeostasis. It works by repressing multiple genes with related functions, including genes involved in autophagy and lipid metabolism. The pathway is regulated by miR-33 and its passenger strand miR-33*, which both contribute to the inhibitory activity of the pathway. In the context of Mycobacterium tuberculosis (Mtb) infection, Mtb induces the expression of miR-33 and miR-33* to impair xenophagy (the process by which cells eliminate intracellular pathogens) and promote fatty acid stores in lipid bodies, which helps the bacteria survive inside host cells. Additionally, miR-33 is involved in regulating cell proliferation, cell cycle progression, and fatty acid metabolism. The pathway is also implicated in myocardial regeneration and repair, as well as in the pathogenesis of myocardial fibrosis.
