In [2]:
%pip install pinecone

Collecting pinecone
  Downloading pinecone-6.0.1-py3-none-any.whl.metadata (8.8 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Downloading pinecone-6.0.1-py3-none-any.whl (421 kB)
Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl (6.2 kB)
Installing collected packages: pinecone-plugin-interface, pinecone
Successfully installed pinecone-6.0.1 pinecone-plugin-interface-0.0.7
Note: you may need to restart the kernel to use updated packages.


In [36]:
%pip install -U langchain-pinecone


Collecting langchain-pinecone
  Downloading langchain_pinecone-0.2.3-py3-none-any.whl.metadata (1.3 kB)
Collecting pinecone<6.0.0,>=5.4.0 (from langchain-pinecone)
  Downloading pinecone-5.4.2-py3-none-any.whl.metadata (19 kB)
Collecting aiohttp<3.11,>=3.10 (from langchain-pinecone)
  Downloading aiohttp-3.10.11-cp39-cp39-win_amd64.whl.metadata (8.0 kB)
Collecting langchain-tests<1.0.0,>=0.3.7 (from langchain-pinecone)
  Downloading langchain_tests-0.3.12-py3-none-any.whl.metadata (3.2 kB)
Collecting pytest<9,>=7 (from langchain-tests<1.0.0,>=0.3.7->langchain-pinecone)
  Downloading pytest-8.3.4-py3-none-any.whl.metadata (7.5 kB)
Collecting pytest-asyncio<1,>=0.20 (from langchain-tests<1.0.0,>=0.3.7->langchain-pinecone)
  Downloading pytest_asyncio-0.25.3-py3-none-any.whl.metadata (3.9 kB)
Collecting syrupy<5,>=4 (from langchain-tests<1.0.0,>=0.3.7->langchain-pinecone)
  Downloading syrupy-4.8.2-py3-none-any.whl.metadata (36 kB)
Collecting pytest-socket<1,>=0.6.0 (from langchain-tests<

  You can safely remove it manually.


In [43]:
import os
import time
import PyPDF2
import openai
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings.base import Embeddings
from langchain_pinecone import Pinecone as PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec

# Configurar la clave de API de SambaNova
os.environ["SAMBANOVA_API_KEY"] = "f3612483-e9ec-4409-a019-2e4081c0c575"

# Configurar la clave de API y la región de Pinecone
api_key = "pcsk_5bpf1j_3NvVtLhnctqAMsivZy77kuDDdpoAa2CArjnFuNojf2xuX9ZABqResibrbExzPkz"
index_name = "agriculture-index"

# Inicializar el cliente de Pinecone
pc = Pinecone(api_key=api_key)

# Crear o conectar al índice en Pinecone
if index_name in pc.list_indexes():
    print(f"El índice '{index_name}' ya existe en Pinecone.")
    print(f"Conectando al índice existente '{index_name}' en Pinecone.")
    pinecone_index = pc.Index(index_name)
else:
    print(f"Creando un nuevo índice '{index_name}' en Pinecone...")
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    pinecone_index = pc.Index(index_name)
    print(f"Índice '{index_name}' creado correctamente en Pinecone.")

print(f"Conectado al índice '{index_name}' en Pinecone.")

# Ruta al archivo PDF
pdf_path = 'libro1.pdf'

# Extraer texto del PDF
text = ""
with open(pdf_path, 'rb') as file:
    pdf_reader = PyPDF2.PdfReader(file)
    for page in pdf_reader.pages:
        text += page.extract_text() if page.extract_text() else ""

# Dividir el texto en fragmentos
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=0,
    length_function=len,
)
chunks = text_splitter.split_text(text)

# Convertir los fragmentos a objetos Document
documents = [Document(page_content=chunk) for chunk in chunks]

# Cargar el modelo de embeddings locales con Sentence Transformers
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Crear una clase de Embeddings compatible con Langchain
class LocalEmbeddings(Embeddings):
    def embed_documents(self, texts):
        return embedding_model.encode(texts, convert_to_tensor=False).tolist()
    
    def embed_query(self, text):
        return embedding_model.encode([text], convert_to_tensor=False).tolist()[0]

embedding = LocalEmbeddings()

# Indexar los documentos en Pinecone
vectorstore = PineconeVectorStore(
    index=pinecone_index,
    embedding=embedding,
    text_key="text"
)

vectorstore.add_documents(documents)

semantic_chunk_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

print("Indexación con Pinecone completada correctamente usando embeddings locales.")

# Configurar el cliente de SambaNova con la API compatible con OpenAI
client = openai.OpenAI(
    api_key=os.environ.get("SAMBANOVA_API_KEY"),
    base_url="https://api.sambanova.ai/v1",
)

# Función para interactuar con Llama 3.3
def query_llama_sambanova(prompt: str, context: str = "") -> str:
    response = client.chat.completions.create(
        model="Meta-Llama-3.3-70B-Instruct",
        messages=[
            {"role": "system", "content": "You are a helpful assistant"},
            {"role": "user", "content": f"Context: {context}\n\nQuestion: {prompt}"}
        ],
        temperature=0.1,
        top_p=0.1
    )
    return response.choices[0].message.content

# Realizar consultas y mostrar resultados
questions = [
    "What is crop rotation and why is it important?",
    "How can soil pH affect plant growth?",
    "What are effective water management practices in agriculture?"
]

for question in questions:
    print(f"\nPregunta: {question}")
    retrieved_docs = semantic_chunk_retriever.get_relevant_documents(question)
    if retrieved_docs:
        combined_context = "\n".join([doc.page_content for doc in retrieved_docs])
        print("\nChunks Recuperados:")
        for i, doc in enumerate(retrieved_docs):
            print(f"\nChunk {i+1}:")
            print(doc.page_content)
        result = query_llama_sambanova(question, combined_context)
        print(f"\nRespuesta del Modelo:\n{result}")
    else:
        print("No se recuperaron chunks relevantes.")


Creando un nuevo índice 'agriculture-index' en Pinecone...
Índice 'agriculture-index' creado correctamente en Pinecone.
Conectado al índice 'agriculture-index' en Pinecone.


  vectorstore = PineconeVectorStore(


Indexación con Pinecone completada correctamente usando embeddings locales.

Pregunta: What is crop rotation and why is it important?


  retrieved_docs = semantic_chunk_retriever.get_relevant_documents(question)



Chunks Recuperados:

Chunk 1:
and War: Security in a World of Conﬂict , The Crawford Fund for International Agricultural Research, Parliament
House, Canberra, August 15, 2000. ACIAR Monograph No. 73, pp. 39–59.
Hammer, G.L. (1998). Crop modelling: Current status and opportunities to advance. Acta Horticulturae ,456:27–36.
Hammer, G., Kropff, M.J., Sinclair, T.R., and Porter, J.R. (2002). Future contributions of crop modelling – from heuristics
and supporting decision-making to understanding genetic regulation and aiding crop improvement. European Journal
of Agronomy ,18:15–31.

Chunk 2:
2Discuss the contributions of the International Agricultural Research Centers to world crop improvement.
3Discuss the role of the International Agricultural Research Centers in germplasm collection and maintenance.
4Discuss plant breeding efforts by national programs in developing countries.
5Discuss the importance of orphan crops and the efforts being made to improve them.
6Discuss the Green Revolutio

In [45]:
%pip install fastapi

Collecting fastapi
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting starlette<0.46.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.45.3-py3-none-any.whl.metadata (6.3 kB)
Downloading fastapi-0.115.8-py3-none-any.whl (94 kB)
Downloading starlette-0.45.3-py3-none-any.whl (71 kB)
Installing collected packages: starlette, fastapi
Successfully installed fastapi-0.115.8 starlette-0.45.3
Note: you may need to restart the kernel to use updated packages.


In [47]:
%pip install uvicorn 

Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Downloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
Installing collected packages: uvicorn
Successfully installed uvicorn-0.34.0
Note: you may need to restart the kernel to use updated packages.


In [48]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
import uvicorn

# Inicializar la aplicación FastAPI
app = FastAPI()

# Modelo de solicitud para las preguntas
class QuestionRequest(BaseModel):
    questions: List[str]

# Endpoint para realizar consultas a la base de datos vectorial
@app.post("/ask")
def ask_questions(request: QuestionRequest):
    responses = []
    for question in request.questions:
        print(f"\n🟢 Pregunta: {question}")
        retrieved_docs = semantic_chunk_retriever.get_relevant_documents(question)
        
        if retrieved_docs:
            combined_context = "\n".join([doc.page_content for doc in retrieved_docs])
            print("\n🔍 Chunks Recuperados:")
            
            for i, doc in enumerate(retrieved_docs):
                print(f"\nChunk {i+1}:")
                print(doc.page_content)
            
            result = query_llama_sambanova(question, combined_context)
            print(f"\n🤖 Respuesta del Modelo:\n{result}")
            
            responses.append({
                "question": question,
                "response": result,
                "chunks": [doc.page_content for doc in retrieved_docs]
            })
        else:
            responses.append({
                "question": question,
                "response": "No se recuperaron chunks relevantes.",
                "chunks": []
            })
    
    return {"responses": responses}

# Ejecutar el servidor en localhost
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)


RuntimeError: asyncio.run() cannot be called from a running event loop