In [1]:
!pip install crewai crewai-tools langchain openai

Collecting crewai
  Downloading crewai-1.7.2-py3-none-any.whl.metadata (36 kB)
Collecting crewai-tools
  Downloading crewai_tools-1.7.2-py3-none-any.whl.metadata (11 kB)
Collecting aiosqlite~=0.21.0 (from crewai)
  Downloading aiosqlite-0.21.0-py3-none-any.whl.metadata (4.3 kB)
Collecting appdirs~=1.4.4 (from crewai)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting chromadb~=1.1.0 (from crewai)
  Downloading chromadb-1.1.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting click~=8.1.7 (from crewai)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting instructor>=1.3.3 (from crewai)
  Downloading instructor-1.13.0-py3-none-any.whl.metadata (11 kB)
Collecting json-repair~=0.25.2 (from crewai)
  Downloading json_repair-0.25.3-py3-none-any.whl.metadata (7.9 kB)
Collecting json5~=0.10.0 (from crewai)
  Downloading json5-0.10.0-py3-none-any.whl.metadata (34 kB)
Collecting jsonref~=1.1.0 (from crewai)
  Downlo

In [None]:
!pip install sentence_transformers

In [3]:
import os
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
# Optional: If you’ll use web search tools
os.environ["SERPER_API_KEY"] = userdata.get("SERPER_API_KEY")
os.environ["OPENAI_MODEL_NAME"] = "gpt-4o-mini"

In [4]:
import crewai
from crewai import Agent, Task, Crew, Process
from crewai_tools import SerperDevTool

In [5]:
print(crewai.__version__)

1.7.2


Initializing the tool

In [7]:
PDF_FILE_PATH = "/content/2. Essentials of LLMs.pdf"

In [8]:
from crewai_tools import PDFSearchTool

# - embedding_model (required): choose provider + provider-specific config
# - vectordb (required): choose vector DB and pass its config

pdf_tool = PDFSearchTool(
    pdf = PDF_FILE_PATH,
    config={
        "embedding_model": {
            # Supported providers: "openai", "azure", "google-generativeai", "google-vertex",
            # "voyageai", "cohere", "huggingface", "jina", "sentence-transformer",
            # "text2vec", "ollama", "openclip", "instructor", "onnx", "roboflow", "watsonx", "custom"
            "provider": "sentence-transformer",  # or: "google-generativeai", "cohere", "ollama", ...
            "config": {
                # Model identifier for the chosen provider. "model" will be auto-mapped to "model_name" internally.
                "model": "BAAI/bge-m3",
                # or you may use all-MiniLM-L6-v2
                # Optional: API key. If omitted, the tool will use provider-specific env vars
                # (e.g., OPENAI_API_KEY or EMBEDDINGS_OPENAI_API_KEY for OpenAI).
                # "api_key": "sk-...",

                # Provider-specific examples:
                # --- Google Generative AI ---
                # (Set provider="google-generativeai" above)
                # "model_name": "gemini-embedding-001",
                # "task_type": "RETRIEVAL_DOCUMENT",
                # "title": "Embeddings",

                # --- Cohere ---
                # (Set provider="cohere" above)
                # "model": "embed-english-v3.0",

                # --- Ollama (local) ---
                # (Set provider="ollama" above)
                # "model": "nomic-embed-text",
            },
        },
        "vectordb": {
                    "provider": "chromadb",  # or "qdrant"
                    "config": {
                        # For ChromaDB: pass "settings" (chromadb.config.Settings) or rely on defaults.
                        # Example (uncomment and import):
                        # from chromadb.config import Settings
                        # "settings": Settings(
                        #     persist_directory="/content/chroma",
                        #     allow_reset=True,
                        #     is_persistent=True,
                        # ),

                        # For Qdrant: pass "vectors_config" (qdrant_client.models.VectorParams).
                        # Example (uncomment and import):
                        # from qdrant_client.models import VectorParams, Distance
                        # "vectors_config": VectorParams(size=384, distance=Distance.COSINE),

                        # Note: collection name is controlled by the tool (default: "rag_tool_collection"), not set here.
                    }
        },
    }
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Defining the Agent

In [9]:
researcher = Agent(
    role='Researcher of Data Science',
    goal="Get the relevant content for the topic {topic} from {PDF_file}",
    backstory='You are an expert in understanding the videos on A.I., Data Science, Machine Learning and allied topics',
    tools=[pdf_tool],
    llm_name="gpt-4o-mini",
    verbose=True,
    memory=True,
    allow_delegation=True
)

In [10]:
writer = Agent(
    role='Writer',
    goal="Narrate about {topic} referring to the {PDF_file}",
    backstory=(
        "Anyone having any query about requires a precise answer to the asked question"
        ),
    tools=[pdf_tool],
    llm_name="gpt-4o-mini",
    verbose=True,
    memory=True,
    allow_delegation=False
)

Defining a task

In [11]:
research_task = Task(
    description=(
        "Based on searched contents about the {topic} from the {PDF_file}, provide an insight."
    ),
    expected_output="An answer in short on the {topic} in the {PDF_file}.",
    tools=[pdf_tool],
    agent=researcher,
    async_execution=False,
    output_file="my-research-report.md"
)

Building the crew

In [12]:
from crewai import llm
crew = Crew(
    agents=[researcher, writer],
    tasks=[research_task],
    process=Process.sequential,
    memory=True,
    cache=True,
    max_rpm=100,
    share_crew=True
)

Running the crew

In [13]:
result = crew.kickoff(inputs={'topic':"Why is Quantization required?",
                              'PDF_file':"/content/2. Essentials of LLMs.pdf"})
print(result)

Quantization is a technique for compressing large language models (LLMs) by reducing the precision of their weights and activations. It converts high-precision data types, like 32-bit floating point (FP32), to lower-precision data types, like 8-bit integer (INT8). This reduces the number of bits required to represent the model's weights, which makes the model smaller. Quantized models require less memory and storage, and can run on less powerful hardware.


Would you like to view your execution traces? [y/N] (20s timeout): 