In [None]:
import os
import pdfplumber
from sentence_transformers import SentenceTransformer
import chromadb
from langchain_google_genai import ChatGoogleGenerativeAI

from langchain.schema import HumanMessage

# -------- Configurações --------
api_key = os.getenv("GEMINI_API_KEY")
PDF_PATH = "genes-15-00975.pdf"

# -------- Extrair texto do PDF --------
def extrair_texto_pdf(caminho_pdf):
    texto = ""
    with pdfplumber.open(caminho_pdf) as pdf:
        for pagina in pdf.pages:
            texto += pagina.extract_text() + "\n"
    return texto

texto = extrair_texto_pdf(PDF_PATH)

# -------- Dividir texto em trechos --------
def dividir_texto(texto, tamanho=500):
    palavras = texto.split()
    trechos = []
    for i in range(0, len(palavras), tamanho):
        trechos.append(" ".join(palavras[i:i+tamanho]))
    return trechos

trechos = dividir_texto(texto)

# -------- Gerar embeddings --------
modelo = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = modelo.encode(trechos)

# -------- Configurar Chroma (novo) --------
cliente = chromadb.Client(chromadb.config.Settings(
    persist_directory="meu_banco_chroma"
))

# Cria ou pega a coleção
if "artigos" in [c.name for c in cliente.list_collections()]:
    colecao = cliente.get_collection("artigos")
else:
    colecao = cliente.create_collection(name="artigos")

# -------- Inserir vetores --------
colecao.add(
    documents=trechos,
    metadatas=[{"arquivo": PDF_PATH} for _ in trechos],
    ids=[str(i) for i in range(len(trechos))],
    embeddings=embeddings.tolist()
)

def buscar_autores(pergunta):
    embedding_pergunta = modelo.encode([pergunta])
    
    resultados = colecao.query(
        query_embeddings=embedding_pergunta.tolist(),
        n_results=3
    )
    
    textos_relevantes = resultados['documents'][0]
    
    # Gemini LLM
    llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", api_key=api_key)
    resposta = llm([HumanMessage(content=f"Com base nesses trechos: {textos_relevantes}, me diga quem são os autores do artigo.")])
    
    return resposta.content

# -------- Exemplo de uso --------
pergunta = "Quem são os autores deste artigo?"
autores = buscar_autores(pergunta)
print("Autores:", autores)


E0000 00:00:1759615870.643366   24035 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
  resposta = llm([HumanMessage(content=f"Com base nesses trechos: {textos_relevantes}, me diga quem são os autores do artigo.")])


Autores: Com base nos trechos fornecidos, **não é possível identificar os autores do artigo**. As informações sobre os autores não estão presentes nestes segmentos de texto.


Natural killer (NK) cells are an important first-line of defense against malignant cells. Because of the potential for increased cancer risk from astronaut exposure to space radiation, we determined whether microgravity present during spaceflight affects the body’s defenses against leukemogenesis. Human NK cells were cultured for 48 h under normal gravity and simulated microgravity (sμG), and cytotoxicity against K-562 (CML) and MOLT-4 (T-ALL) cells was measured using standard methodology or under continuous sμG. This brief exposure to sμG markedly reduced NK cytotoxicity against both leukemias, and these deleterious effects were more pronounced in continuous sμG. RNA-seq performed on NK cells from two additional healthy donors provided insight into the mechanism(s) by which sμG reduced cytotoxicity. Given our prior report of space radiation-induced human T-ALL in vivo, the reduced cytotoxicity against MOLT-4 is striking and raises the possibility that μG may increase astronaut risk of

2024-08-14
