In [1]:
%%bash 

python.exe --version 

ls -la 

pwd

ollama.exe list 


Python 3.12.6


In [2]:
%%time 
## Created '6709' chunks from 'Data/Blender43Manual.pdf' at './ChromaDB'
#!python.exe ingest_pdfs.py 

CPU times: total: 0 ns
Wall time: 0 ns


In [3]:
%%time 
## python.exe -m pip install langchain_ollama
import smolagents
from langchain_ollama.llms import OllamaLLM

hf_token = ""
model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"

engine = smolagents.HfApiModel(model_id=model_id, token=hf_token, max_tokens=5000)
print( engine.to_dict() )

messages = [{"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]}]

response = engine(messages, stop_sequences=["END"])
print(response.content)


  from .autonotebook import tqdm as notebook_tqdm


{'max_tokens': 5000, 'last_input_token_count': None, 'last_output_token_count': None, 'model_id': 'Qwen/Qwen2.5-Coder-32B-Instruct', 'provider': None}
Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?
CPU times: total: 1.23 s
Wall time: 1.8 s


In [4]:
%%time
import smolagents

# Create the reasoner for better RAG
reasoner = smolagents.CodeAgent(tools=[], model=engine, add_base_tools=False, max_steps=2)
print( list(reasoner.to_dict().keys()) )


['tools', 'model', 'managed_agents', 'prompt_templates', 'max_steps', 'verbosity_level', 'grammar', 'planning_interval', 'name', 'description', 'requirements', 'authorized_imports', 'executor_type', 'executor_kwargs', 'max_print_outputs_length']
CPU times: total: 15.6 ms
Wall time: 26.1 ms


In [5]:
%%time 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader

def load_and_process_pdfs(data_dir: str):
    """Load PDFs from directory and split into chunks."""
    loader = DirectoryLoader(
        data_dir,
        glob="**/*.pdf",
        loader_cls=PyPDFLoader
    )
    documents = loader.load()
    
    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len,
    )
    chunks = text_splitter.split_documents(documents)
    return chunks

data_dir = "Data"
chunks = load_and_process_pdfs(data_dir)


CPU times: total: 33.1 s
Wall time: 33.3 s


In [6]:
## python.exe -m pip install rank_bm25
import smolagents
from langchain_community.retrievers import BM25Retriever

class RetrieverTool(smolagents.Tool):
    name = "retriever"
    description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query."
    inputs = {
        "query": {
            "type": "string",
            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
        }
    }
    output_type = "string"

    def __init__(self, docs, **kwargs):
        super().__init__(**kwargs)
        self.retriever = BM25Retriever.from_documents(
            docs, k=10
        )

    def forward(self, query: str) -> str:
        assert isinstance(query, str), "Your search query must be a string"

        docs = self.retriever.invoke(
            query,
        )
        return "\nRetrieved documents:\n" + "".join(
            [
                f"\n\n===== Document {str(i)} =====\n" + doc.page_content
                for i, doc in enumerate(docs)
            ]
        )

retriever_tool = RetrieverTool(chunks)
print( retriever_tool )


<__main__.RetrieverTool object at 0x000001F6F1EFF3B0>


In [7]:
%%time
import smolagents

agent = smolagents.CodeAgent(tools=[retriever_tool], model=engine, max_steps=2, verbosity_level=0)
print( agent )

<smolagents.agents.CodeAgent object at 0x000001F6F1EFF410>
CPU times: total: 15.6 ms
Wall time: 9.55 ms


In [8]:
%%time 
query = "List the Principled BSDF Inputs"  
agent_output = agent.run(query)
print(f"Final output: '{agent_output}' ") ## 22

Final output: 'It seems the previous search query did not retrieve the specific information we need regarding the Principled BSDF inputs. Let me look up the documentation directly to provide you with the correct details.

The Principled BSDF shader in Blender is used to create realistic materials by handling various lighting and surface properties. Here are the inputs for the Principled BSDF shader:

1. **Base Color** - The base color of the surface.
2. **Subsurface** - The amount of light that diffuses beneath the surface.
3. **Subsurface Radius** - The radius over which the light diffuses. It can be defined in terms of RGB to provide different diffusion rates for different wavelengths.
4. **Subsurface Color** - The color of the subsurface scattering.
5. **Metallic** - Determines the amount of metallicity in the material, where 1 is fully metallic and 0 is fully non-metallic.
6. **Specular** - Controls the reflectivity of the material.
7. **Specular Tint** - Adjusts the color of the h

In [9]:
%%time 
query = "Waht is the meaning of BSDF?"
agent_output = agent.run(query)
print(f"Final output: '{agent_output}' ")  ## Bidirectional Scattering Distribution Function

Final output: 'BSDF stands for Bidirectional Scattering Distribution Function. It is a fundamental concept in computer graphics and physics that describes how light is reflected and refracted at a surface. In the context of rendering and shading, a BSDF defines the probability of light being scattered in a specific direction given its incoming direction. This allows for a more realistic simulation of light interactions with surfaces, considering both reflection and transmission of light.' 
CPU times: total: 15.6 ms
Wall time: 21.2 s
