In [1]:
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
import sys

In [14]:
def ingest():
    # Get the doc
    loader = PyPDFLoader("data/tutorial.pdf")
    pages = loader.load_and_split()
    # Split the pages by char
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1024,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(pages)
    print(f"Split {len(pages)} documents into {len(chunks)} chunks.")
    #
    embedding = FastEmbedEmbeddings()
    #Create vector store
    Chroma.from_documents(documents=chunks,  embedding=embedding, persist_directory="./sql_chroma_db")

In [15]:
ingest()

[32m2025-07-02 15:21:40.836[0m | [31m[1mERROR   [0m | [36mfastembed.common.model_management[0m:[36mdownload_model[0m:[36m430[0m - [31m[1mCould not download model from HuggingFace: (ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')), '(Request ID: 0cbaceb8-72db-4f0b-ad13-220767d19af2)') Falling back to other sources.[0m
[32m2025-07-02 15:21:40.837[0m | [31m[1mERROR   [0m | [36mfastembed.common.model_management[0m:[36mdownload_model[0m:[36m452[0m - [31m[1mCould not download model from either source, sleeping for 3.0 seconds, 2 retries left.[0m


Split 169 documents into 445 chunks.


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [16]:
from huggingface_hub import login
access_token_read = "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"
access_token_write = "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"
login(token = access_token_read)

In [17]:
def rag_chain():
    model = ChatOllama(model="llama3.2:latest")
    #
    prompt = PromptTemplate.from_template(
        """
        <s> [Instructions] You are a friendly assistant. Answer the question based only on the following context. 
        If you don't know the answer, then reply, No Context availabel for this question {input}. [/Instructions] </s> 
        [Instructions] Question: {input} 
        Context: {context} 
        Answer: [/Instructions]
        """
    )
    #Load vector store
    embedding = FastEmbedEmbeddings()
    vector_store = Chroma(persist_directory="./sql_chroma_db", embedding_function=embedding)

    #Create chain
    retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={
            "k": 3,
            "score_threshold": 0.5,
        },
    )

    document_chain = create_stuff_documents_chain(model, prompt)
    chain = create_retrieval_chain(retriever, document_chain)
    #
    return chain

In [18]:
print("Test")

Test


In [19]:
print("Test")

Test


In [20]:
def ask(query: str):
    #
    chain = rag_chain()
    # invoke chain
    result = chain.invoke({"input": query})
    # print results
    print(result["answer"])
    for doc in result["context"]:
        print("Source: ", doc.metadata["source"])

In [23]:
ask("How do I define a function in python?")

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


To define a function in Python, you can use the `def` keyword followed by the function name and a parameter list. For example:

```python
def ask_ok(prompt, retries=4, reminder='Please try again!'):
    while True:
        reply = input(prompt)
        if reply in {'y', 'ye', 'yes'}:
            return True
        if reply in {'n', 'no', 'nop', 'nope'}:
            return False
        retries = retries - 1
        if retries < 0:
            raise ValueError('invalid user response')
        print(reminder)
```

You can also specify default values for one or more arguments, like this:

```python
def ask_ok(prompt, retries=4, reminder='Please try again!'):
    # function body
```

Additionally, you can define functions with a variable number of arguments using various forms, such as specifying default argument values.
Source:  data/tutorial.pdf
Source:  data/tutorial.pdf
Source:  data/tutorial.pdf


In [26]:
ask("How do I write a for loop in python?")

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


To write a for loop in Python, you can use the following syntax:

```python
for variable in iterable:
    # do something with variable
```

For example:

```python
fruits = ['apple', 'banana', 'cherry']
for fruit in fruits:
    print(fruit)
```

This will output: 
```
apple
banana
cherry
```

Note that you can also use the `enumerate` function to iterate over both the index and value of each item in a list:

```python
fruits = ['apple', 'banana', 'cherry']
for i, fruit in enumerate(fruits):
    print(i, fruit)
```

This will output:
```
0 apple
1 banana
2 cherry
```
Source:  data/tutorial.pdf
Source:  data/tutorial.pdf
Source:  data/tutorial.pdf


In [27]:
ask("How is a list different from a tuple?")

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


A list is different from a tuple in that lists are mutable, meaning their contents can be modified after creation, whereas tuples are immutable and cannot be changed once created.
Source:  data/tutorial.pdf
Source:  data/tutorial.pdf
Source:  data/tutorial.pdf
