In [139]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# List of URLs to load documents from
urls = [
    "https://en.wikipedia.org/wiki/Portal:Current_events/February_2025",
    "https://en.wikipedia.org/wiki/Roland_TR-808"
]
# Load documents from the URLs
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

In [140]:
# Initialize a text splitter with specified chunk size and overlap
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, chunk_overlap=0
)
# Split the documents into chunks
doc_splits = text_splitter.split_documents(docs_list)

In [141]:
from langchain_community.vectorstores import DuckDB
from langchain_ollama import OllamaEmbeddings
# Create embeddings for documents and store them in a vector store
vectorstore = DuckDB.from_documents(
    documents=doc_splits,
    embedding=OllamaEmbeddings(
    model="llama3.2"
)
)
retriever = vectorstore.as_retriever(k=4)



In [142]:
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
# Define the prompt template for the LLM
prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks.
    Use the following documents to answer the question.
    If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise:
    Question: {question}
    Documents: {documents}
    Answer:
    """,
    input_variables=["question", "documents"],
)

In [143]:
# Initialize the LLM with Llama 3.1 model
llm = ChatOllama(
    model="llama3.2",
    temperature=0,
)

In [144]:
# Create a chain combining the prompt template and LLM
rag_chain = prompt | llm | StrOutputParser()

In [145]:
# Define the RAG application class
class RAGApplication:
    def __init__(self, retriever, rag_chain):
        self.retriever = retriever
        self.rag_chain = rag_chain
    def run(self, question):
        # Retrieve relevant documents
        documents = self.retriever.invoke(question)
        print(documents)
        # Extract content from retrieved documents
        doc_texts = "\\n".join([doc.page_content for doc in documents])
        # Get the answer from the language model
        answer = self.rag_chain.invoke({"question": question, "documents": doc_texts})
        return answer

In [152]:
# Initialize the RAG application
rag_application = RAGApplication(retriever, rag_chain)
# Example usage
question = "What is 808?"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Roland_TR-808', 'title': 'Roland TR-808 - Wikipedia', 'language': 'en', '_similarity_score': np.float64(0.5035594203219714)}, page_content='Other artists who have used the 808 include Damon Albarn, Diplo, Fatboy Slim, David Guetta and New Order.[3][40] It has been referenced in lyrics by artists including the Beastie Boys, Beck, Outkast, Kelis, TI, Lil Wayne, Britney Spears, Beyoncé, R Kelly and Robbie Williams.[3][11] Its bass drum has been used as a metaphor for a heartbeat in songs by artists including Madonna, Rihanna and Kesha.[11]'), Document(metadata={'source': 'https://en.wikipedia.org/wiki/Roland_TR-808', 'title': 'Roland TR-808 - Wikipedia', 'language': 'en', '_similarity_score': np.float64(0.4913470701467913)}, page_content='The British electronic group 808 State took its name from the 808 and used it extensively.[11] 808 State\'s Graham Massey said: "The Roland gear began to be a kind of Esperanto in music. The wh