In [1]:

import requests
from bs4 import BeautifulSoup
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.docstore.document import Document
from pymongo import MongoClient
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma



Mongo_Atlas = "mongodb+srv://zayrafemi:<db_password>@cluster0.nhoxg.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"

client = MongoClient(Mongo_Atlas)

DB_NAME = "vectorstore"  # Cambia este valor al nombre de tu base de datos
COLLECTION_NAME = "documentos"  # Cambia este valor al nombre de tu colección
ATLAS_VECTOR_SEARCH_INDEX_NAME = "docs_python"

# Referencia a la colección de MongoDB
MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]

# Inicializa los embeddings de HuggingFace
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Configuración de MongoDB Atlas Vector Search
vector_store = MongoDBAtlasVectorSearch(
    collection=MONGODB_COLLECTION,
    embedding=embeddings,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
    relevance_score_fn="cosine",  
)

url = "https://pythonology.eu/using-pandas_ta-to-generate-technical-indicators-and-signals/"

# Obtener el contenido de la página
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
datos = soup.get_text()

# Dividir el texto en fragmentos (chunks)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1200, chunk_overlap=100, add_start_index=True
)

# Dividir el texto en fragmentos más pequeños
chunks = text_splitter.split_text(datos)
print(f"Number of chunks: {len(chunks)}")

# Crear embeddings del texto usando HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Crear una lista de documentos a partir de los fragmentos
documents = [Document(page_content=chunk) for chunk in chunks]

# Corregir la creación del vectorstore utilizando Chroma y embeddings
vectorstore = Chroma.from_documents(
    documents=documents,  # Documentos a agregar
    embedding=embedding_model  # Pasamos la instancia del modelo de embeddings
)
print("Documents added to the vector store.")

# Definir el modelo LLM de Ollama
llm = OllamaLLM(model="llama3.2", server_url="http://localhost:11434")

# Crear el prompt template para el QA
prompt = ChatPromptTemplate.from_template(
    template="Use the context below to answer the user's question:\n\n{context}\n\nQuestion: {question}\nAnswer:"
)

# Crear el retriever a partir del vectorstore
retriever = vectorstore.as_retriever()

# Crear la cadena de QA (RetrievalQA)

# Realizar una consulta
question = "what are the oversold and overbought periods?"
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
response = retriever.invoke(question)
print("Response:", response)

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


Number of chunks: 12
Documents added to the vector store.
Response: [Document(metadata={}, page_content="# Plotting SMAs\nplt.figure(figsize=(12, 6))\nplt.plot(data['Close'], label='Close Price')\nplt.plot(data['SMA_20'], label='20-Day SMA')\nplt.plot(data['SMA_50'], label='50-Day SMA')\nplt.legend()\nplt.show()\nAs you saw in the code we used the following code to calculate the SMA of the last 20 days and then added the results as a column to our data dataframe:ta.sma(data['Close'], length=20)If you want to know which methods are available through pandas_ta on your data, you can use the help function:help(data.ta)Plotting RSIThe Relative Strength Index (RSI) is a momentum oscillator that measures the speed and change of price movements. It oscillates between 0 and 100 and is typically used to identify overbought (above 70) or oversold (below 30) conditions in a market.Here’s how you can calculate and plot RSI using pandas_ta:# we are using the same data as before\n# Calculate RSI\ndat

In [26]:
import requests
from bs4 import BeautifulSoup
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.docstore.document import Document
from pymongo import MongoClient
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
import PyPDF2
from langchain.prompts import PromptTemplate

Mongo_Atlas = "mongodb+srv://zayrafemi:<db_password>@cluster0.nhoxg.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"

client = MongoClient(Mongo_Atlas)

DB_NAME = "vectorstore"  # Cambia este valor al nombre de tu base de datos
COLLECTION_NAME = "documentos"  # Cambia este valor al nombre de tu colección
ATLAS_VECTOR_SEARCH_INDEX_NAME = "dnd_doc"

# Referencia a la colección de MongoDB
MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]

# Inicializa los embeddings de HuggingFace
embedding_model = HuggingFaceEmbeddings(model_name="jaimevera1107/all-MiniLM-L6-v2-similarity-es")

# Configuración de MongoDB Atlas Vector Search
vector_store = MongoDBAtlasVectorSearch(
    collection=MONGODB_COLLECTION,
    embedding=embeddings,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
    relevance_score_fn="cosine",  
)

# Cargar el PDF y extraer el texto
pdf_path = "D&D5Manual.pdf"

with open(pdf_path, "rb") as file:
    reader = PyPDF2.PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()

# Usar RecursiveCharacterTextSplitter para dividir el texto de manera eficiente
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)  # Definir el tamaño del fragmento y la superposición
chunks = text_splitter.split_text(text)

print(f"Number of chunks: {len(chunks)}")

# Cargar el modelo LLM de Ollama
llm = OllamaLLM(model="llama3.2", server_url="http://localhost:11434")

# Cargar el modelo de embeddings de HuggingFace
embedding_model = HuggingFaceEmbeddings(model_name="jaimevera1107/all-MiniLM-L6-v2-similarity-es")

# Inicializar el vectorstore (almacen de vectores) con Chroma
vectorstore = Chroma(persist_directory="./vectorstore", embedding_function=embedding_model)

# Crear documentos de Langchain con los fragmentos
documents = [Document(page_content=chunk) for chunk in chunks]

# Agregar documentos al vectorstore
vectorstore.add_documents(documents)
print("Documents added to the vector store.")

# Crear el prompt para la pregunta
prompt_template = """
Usa el contexto a continuación para responder la pregunta del usuario:

{context}

Pregunta: {question}

Respuesta:
"""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# Crear el retriever a partir del vectorstore
retriever = vectorstore.as_retriever()

# Hacer una pregunta de ejemplo
question = "¿Cuánto viven los elfos?"

# Obtener la respuesta usando el chain de QA
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
response = retriever.invoke(question)
# Imprimir la respuesta
print("Respuesta:", response)

Number of chunks: 3120


OperationalError: attempt to write a readonly database