<a href="https://colab.research.google.com/github/frank-morales2020/Cloud_curious/blob/master/AGENT_ORCHESTRARION_DEMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install Pytorch & other libraries
!pip install torch tensorboard --quiet

# Install Hugging Face libraries
!pip install  --upgrade transformers datasets accelerate evaluate bitsandbytes --quiet



#FlashAttention only supports Ampere GPUs or newer. #NEED A100 OR L4 IN GOOGLE COLAB
#!pip install -U transformers
!pip install -U flash-attn --no-build-isolation --quiet


! pip install peft --quiet
! pip install datasets trl ninja packaging --quiet

# Uncomment only if you're using A100 GPU
#!pip install flash-attn --no-build-isolation
!pip install diffusers safetensors  --quiet
!pip install colab-env --quiet

!pip install crewai --quiet
!pip install langchain --quiet
!pip install faiss-gpu --quiet
!pip install sentence-transformers --quiet

!pip install langchain_community -q

!pip install faiss-cpu -q

!pip install crewai-tools -q

!pip install transformers -U -q

In [2]:
!nvidia-smi

Sat Mar 22 11:20:06 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   34C    P0             42W /  400W |       0MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import torch
import os
import sys
import json
import IPython
from datetime import datetime
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments,
    pipeline,
)

from crewai import Agent, Task, Crew, Process
from langchain.tools import Tool
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

from langchain.callbacks.manager import CallbackManager
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

import warnings
warnings.filterwarnings("ignore")

import colab_env
from openai import OpenAI
client = OpenAI(api_key = os.getenv("OPENAI_API_KEY"))

In [3]:
# BitsAndBytesConfig int-4 config
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Setup LangChain components for RAG (using Mistral via Transformers)
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_texts(["Example product information", "More example info"], embeddings)
retriever = vectorstore.as_retriever()

model_id = "mistralai/Mistral-7B-Instruct-v0.1" #01 march 2024 AND 10/03/2024

# Mistral pipeline setup with 4-bit quantization
mistral_pipeline = pipeline(
    "text-generation",
    model=model_id,
    quantization_config=quantization_config
)

mistral_llm = HuggingFacePipeline(pipeline=mistral_pipeline)

# Modified RAG logic using LLMChain
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer:"""
prompt_template = PromptTemplate(
    template=template, input_variables=["context", "question"]
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [8]:
# Set the environment variable for LiteLLM to use Hugging Face
os.environ["LITELLM_PROVIDER"] = "huggingface"

from crewai.tools import BaseTool  # Import BaseTool from crewai.tools

# Define a CrewAI Tool
from crewai.tools import BaseTool
from crewai.agents.crew_agent_executor import CrewAgentExecutor  # Replace with actual outer class

# Then, you may need to access AgentExecutor like this:
#agent_executor = CrewAgentExecutor.AgentExecutor(llm=model_id, provider="huggingface", tool=self)

from crewai.tools import BaseTool
import litellm
litellm.provider = "huggingface"

class CrewTool(BaseTool):
    name: str = "Product Information Retriever"
    description: str = "Useful for retrieving product information from a database."

    def _run(self, query: str) -> str:
        """
        Retrieves product information based on the query using Mistral pipeline.
        """

        docs = retriever.get_relevant_documents(query)
        context = "\n".join([doc.page_content for doc in docs])

        # Use mistral_pipeline directly (no litellm)
        try:
            # Call mistral_pipeline with query
            response = mistral_pipeline(query, max_new_tokens=256)
            # Extract generated text from the response
            content = response[0]['generated_text']
            return content
        except Exception as e:
            print(f"Error during product information retrieval: {e}")
            return ""  # Return empty string on error



# Define Agents using CrewAI
retrieval_agent = Agent(
    role='Product Information Retriever',
    goal='Retrieve product details from internal databases.',
    backstory='You are an expert in accessing product specifications.',
    tools=[CrewTool()],  # Use the CrewTool class
    llm=model_id,  # Pass the HuggingFacePipeline object directly
    # Explicitly provide model and provider information
    llm_config={"model": model_id, "provider": "huggingface"}
)

from transformers import AutoModelForCausalLM, AutoTokenizer

llama2_model_id = "meta-llama/Llama-2-7b-chat-hf"

# Use from_pretrained to download the model
model = AutoModelForCausalLM.from_pretrained(llama2_model_id,
                                             trust_remote_code=True,
                                             quantization_config=quantization_config)

tokenizer = AutoTokenizer.from_pretrained(llama2_model_id,
                                             trust_remote_code=True)

# Now, update the pipeline to use the downloaded model
llama2_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    quantization_config=quantization_config
)

llama2_llm = HuggingFacePipeline(pipeline=llama2_pipeline)

search_agent = Agent(
    role='Web Searcher',
    goal='Gather relevant information from external web sources.',
    backstory='You are an expert at searching the web for information',
    llm=llama2_llm,
    llm_config={"model": llama2_model_id, "provider": "huggingface"}  # Add explicit configuration
)

sentiment_agent = Agent(
    role='Sentiment Analyzer',
    goal='Determine the customer sentiment.',
    backstory='You are an expert at analyzing customer emotions.',
    llm=llama2_llm,
    llm_config={"model": llama2_model_id, "provider": "huggingface"}  # Add explicit configuration
)

rag_agent = Agent(
    role='Response Generator',
    goal='Formulate a comprehensive and helpful response.',
    backstory='You are an expert at combining retrieved information into coherent answers.',
    llm=llama2_llm,
    llm_config={"model": llama2_model_id, "provider": "huggingface"}  # Add explicit configuration
)


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0



[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



In [9]:
# Define Tasks
retrieval_task = Task(description="Retrieve product information based on the refined query.",
                      agent=retrieval_agent,
                      expected_output="str")  # Change expected_output to "str"
search_task = Task(description="Search the web for any related information.",
                    agent=search_agent,
                    expected_output="str") # Change expected_output to "str"
sentiment_task = Task(description="Determine the sentiment of the customer's query.",
                      agent=sentiment_agent,
                      expected_output="str") # Change expected_output to "str"
rag_task = Task(description="Combine all information to generate a final response.",
                 agent=rag_agent,
                 expected_output="str") # Change expected_output to "str"

# Create Crew
crew = Crew(
    agents=[retrieval_agent, search_agent, sentiment_agent, rag_agent],
    tasks=[retrieval_task, search_task, sentiment_task, rag_task],
    process=Process.sequential
)


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



In [10]:
# Run Crew
user_query = "Where can I find information about my product?"
result = crew.kickoff(inputs={"user_query": user_query})  # Remove model_config
print(result)

ERROR:root:LiteLLM call failed: litellm.BadRequestError: LLM Provider NOT provided. Pass in the LLM provider you are trying to call. You passed model=mistralai/Mistral-7B-Instruct-v0.1
 Pass model as E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/starcoder',..)` Learn more: https://docs.litellm.ai/docs/providers



[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

[91m Error during LLM call: litellm.BadRequestError: LLM Provider NOT provided. Pass in the LLM provider you are trying to call. You passed model=mistralai/Mistral-7B-Instruct-v0.1
 Pass model as E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/starcoder',..)` Learn more: https://docs.litellm.ai/docs/providers[00m
[91m An unknown error occurred. Please check the details below.[00m
[91m Error details: litellm.BadRequestError: LLM Provider NOT provided. Pass in the LLM provider you are trying to call. You passed model=mistralai/Mistral-7B-Instruct-v0.1
 Pa

BadRequestError: litellm.BadRequestError: LLM Provider NOT provided. Pass in the LLM provider you are trying to call. You passed model=mistralai/Mistral-7B-Instruct-v0.1
 Pass model as E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/starcoder',..)` Learn more: https://docs.litellm.ai/docs/providers