In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Step 1: Load the PDF
loader = PyPDFLoader("pdfs/RAG.pdf")
docs = loader.load()

# Step 2: Create the text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

# Step 3: Split the documents
texts = text_splitter.split_documents(docs)


In [3]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Use a supported HF model
embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Now this will work
db = FAISS.from_documents(texts, embed)
retriever = db.as_retriever()

  embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
"""
Based on the {context} provided answer the query asked by the user in a best possible way.
Example1- Question:"What skill is necessary to become Data Scientist?"
Answer:"SQL, Python, Machine Learning and concepts which help in future values predictions."
Question:{input}
Answer:
"""
)

In [5]:
from langchain_ollama import OllamaLLM

model=OllamaLLM(model='llama3.1')

In [6]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

combine_docs_chain = create_stuff_documents_chain(model, prompt)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [None]:
result=retrieval_chain.invoke({'input':"What is RAG?"})
print(result["answer"])

In [10]:
!pip install llama-index-readers-file pymupdf
!pip install llama-index-vector-stores-postgres
!pip install llama-index-embeddings-huggingface
!pip install llama-index-llms-llama-cpp

Collecting llama-index-llms-llama-cpp
  Using cached llama_index_llms_llama_cpp-0.4.0-py3-none-any.whl.metadata (4.5 kB)
Collecting llama-cpp-python<0.4.0,>=0.3.0 (from llama-index-llms-llama-cpp)
  Using cached llama_cpp_python-0.3.10.tar.gz (79.0 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting diskcache>=5.6.1 (from llama-cpp-python<0.4.0,>=0.3.0->llama-index-llms-llama-cpp)
  Using cached diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Using cached llama_index_llms_llama_cpp-0.4.0-py3-none-any.whl (7.5 kB)
Using cached diskcache-5.6.3-py3-none-any.whl (45 kB)
Building wheels for collec

  error: subprocess-exited-with-error
  
  × Building wheel for llama-cpp-python (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [20 lines of output]
      [32m*** [1mscikit-build-core 0.11.5[0m using [34mCMake 4.0.3[39m[0m [31m(wheel)[0m
      [32m***[0m [1mConfiguring CMake...[0m
      loading initial cache file C:\Users\ADMINI~1\AppData\Local\Temp\tmpe8_gz7jw\build\CMakeInit.txt
      -- Building for: NMake Makefiles
      CMake Error at CMakeLists.txt:3 (project):
        Running
      
         'nmake' '-?'
      
        failed with:
      
         no such file or directory
      
      
      CMake Error: CMAKE_C_COMPILER not set, after EnableLanguage
      CMake Error: CMAKE_CXX_COMPILER not set, after EnableLanguage
      -- Configuring incomplete, errors occurred!
      [31m
      [1m***[0m [31mCMake configuration failed[0m
      [end of output]
  
  note: This error originates from a subprocess, and is likely not a problem with pip.
  ERRO

In [None]:
# sentence transformers
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")

In [8]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("data/llama2.pdf")
documents = loader.load()


In [12]:
from llama_index.llms.llama_cpp import LlamaCPP

model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf"

llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    model_url=model_url,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    verbose=True,
)


ModuleNotFoundError: No module named 'llama_index.llms.llama_cpp'

In [None]:
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.openai_like import OpenAILike


Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")


documents = SimpleDirectoryReader("pdfs").load_data()

index = VectorStoreIndex.from_documents(documents)

query_engine = index.as_query_engine(llm=llm)

query = "What is RAG?"
response = query_engine.query(query)

print("Answer:", response)


  from .autonotebook import tqdm as notebook_tqdm


APITimeoutError: Request timed out.