In [1]:
!pip -qq install langchain
!pip -qq install langchain-core
!pip -qq install langchain-community

In [6]:
!pip install ollama langchain beautifulsoup4 chromadb gradio -q

In [1]:
from langchain_community.llms import Ollama
llm = Ollama(model = "llama3.1")
llm.invoke("what is the Meaning of life")

"One of the most profound and intriguing questions humanity has ever pondered. The meaning of life is a complex and multifaceted concept that has been debated by philosophers, theologians, scientists, and scholars across various disciplines for centuries.\n\nThere isn't a single, universally accepted answer to this question. However, here are some perspectives on the meaning of life:\n\n**Philosophical Perspectives:**\n\n1. **Existentialism:** Life has no inherent meaning; it is up to each individual to create their own purpose.\n2. **Hedonism:** The ultimate goal of life is to maximize happiness and pleasure.\n3. **Stoicism:** The meaning of life lies in living a virtuous, self-disciplined life that aligns with reason and virtue.\n\n**Spiritual Perspectives:**\n\n1. **Theism:** The purpose of life is to serve God or achieve spiritual enlightenment.\n2. **Buddhism:** The goal of life is to attain Nirvana (liberation from suffering) through the Four Noble Truths.\n3. **Judaism:** Life h

In [2]:
import ollama
ollama.pull(model="nomic-embed-text")
ollama.pull(model="llama3.1")

{'status': 'success'}

In [25]:
import gradio as gr
import ollama
from bs4 import BeautifulSoup as bs
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS, Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain.docstore.document import Document
import PyPDF2
import requests
import torch

# Load the data from the PDF
pdf_path = 'https://arxiv.org/pdf/2404.11597'

def load_pdf(pdf_path):
    response = requests.get(pdf_path)
    with open("document.pdf", "wb") as f:
        f.write(response.content)
        
    text = ""
    with open("document.pdf", "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            text += page.extract_text()
    return text

pdf_text = load_pdf(pdf_path)
docs = [Document(page_content=pdf_text, metadata={})]

# Function to use LLM for chunking
def llm_chunking(text, chunk_size):
    system_message = (
        "You are a helpful assistant. Split the given text into chunks. "
        "Each chunk should be around the specified character size."
    )
    prompt = f"Text: {text}\n\nChunk size: {chunk_size}"
    messages = [
        {'role': 'system', 'content': system_message},
        {'role': 'user', 'content': prompt}
    ]
    response = ollama.chat(model='llama3.1', messages=messages)
    # Process the response to extract the chunks
    # Assuming the response is structured as a JSON object with 'chunks' as a list of text chunks
    chunks = response['message']['content'].split("\n\n")  # Split response by double newline
    return chunks

# Split the loaded documents into chunks using LLM
chunk_size = 4000  # Define your chunk size
chunks = llm_chunking(pdf_text, chunk_size)

# Convert chunks to Document objects
splits = [Document(page_content=chunk, metadata={}) for chunk in chunks]

# Create Ollama embeddings and vector store, ensuring CUDA usage
embeddings = OllamaEmbeddings(model="nomic-embed-text")

# Ensure that the embeddings use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

# Define the function to call the Ollama Llama3 model
def ollama_llm(question, context):
    system_message = (
        "You are a helpful assistant. Answer the question based on the given context. "
        "If the answer is not in the provided text, just respond with 'I do not know'."
    )
    formatted_prompt = f"Question: {question}\n\nContext: {context}"
    messages = [
        {'role': 'system', 'content': system_message},
        {'role': 'user', 'content': formatted_prompt}
    ]
    response = ollama.chat(model='llama3.1', messages=messages)
    return response['message']['content']

# Define the RAG setup
retriever = vectorstore.as_retriever()

def rag_chain(question):
    retrieved_docs = retriever.invoke(question)
    formatted_context = "\n\n".join(doc.page_content for doc in retrieved_docs)
    answer = ollama_llm(question, formatted_context)
    return f"Answer: {answer}\n\nUsed Text for Inference:\n{formatted_context}"

# Define the Gradio interface
def get_important_facts(question):
    return rag_chain(question)

# Create a Gradio app interface
iface = gr.Interface(
  fn=get_important_facts,
  inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
  outputs="text",
  title="RAG with Llama3.1",
  description="Ask questions about the provided context",
)

# Launch the Gradio app
iface.launch()


INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Using device: cuda
Running on local URL:  http://127.0.0.1:7870


INFO:httpx:HTTP Request: GET http://127.0.0.1:7870/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://127.0.0.1:7870/ "HTTP/1.1 200 OK"



To create a public link, set `share=True` in `launch()`.




INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
