<a href="https://colab.research.google.com/github/graphlit/graphlit-samples/blob/main/python/Notebook%20Examples/Graphlit_2024_12_07_CrewAI_Web_Marketing_Analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Description**

This example shows how to integrate with CrewAI and the Graphlit Agent Tools to analyze a corporate web marketing strategy.

**Requirements**

Prior to running this notebook, you will need to [signup](https://docs.graphlit.dev/getting-started/signup) for Graphlit, and [create a project](https://docs.graphlit.dev/getting-started/create-project).

You will need the Graphlit organization ID, preview environment ID and JWT secret from your created project.

Assign these properties as Colab secrets: GRAPHLIT_ORGANIZATION_ID, GRAPHLIT_ENVIRONMENT_ID and GRAPHLIT_JWT_SECRET.

For CrewAI, assign this property as Colab secret: OPENAI_API_KEY.

---

Install CrewAI

In [19]:
!pip install --upgrade crewai



Install LangChain OpenAI support

In [20]:
!pip install --upgrade langchain-openai



Install Graphlit Python agent tools SDK

In [21]:
!pip install --upgrade graphlit-tools[crewai]



In [22]:
!pip install --upgrade isodate



Initialize OpenAI for CrewAI

In [23]:
import os
from google.colab import userdata

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

Initialize Graphlit

In [24]:
import os
from google.colab import userdata
from graphlit import Graphlit
from graphlit_api import input_types, enums, exceptions

os.environ['GRAPHLIT_ORGANIZATION_ID'] = userdata.get('GRAPHLIT_ORGANIZATION_ID')
os.environ['GRAPHLIT_ENVIRONMENT_ID'] = userdata.get('GRAPHLIT_ENVIRONMENT_ID')
os.environ['GRAPHLIT_JWT_SECRET'] = userdata.get('GRAPHLIT_JWT_SECRET')

graphlit = Graphlit()

Define Graphlit helper functions

In [25]:
from datetime import datetime, timedelta
import isodate

async def lookup_usage(correlation_id: str):
    if graphlit.client is None:
        return;

    try:
        response = await graphlit.client.lookup_usage(correlation_id)

        return response.lookup_usage if response.lookup_usage is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

async def lookup_credits(correlation_id: str):
    if graphlit.client is None:
        return;

    try:
        response = await graphlit.client.lookup_credits(correlation_id)

        return response.lookup_credits if response.lookup_credits is not None else None
    except exceptions.GraphQLClientError as e:
        print(str(e))
        return None

def dump_usage_record(record):
    print(f"{record.date}: {record.name}")

    duration = isodate.parse_duration(record.duration)

    if record.workflow:
        print(f"- Workflow [{record.workflow}] took {duration}, used credits [{record.credits:.8f}]")
    else:
        print(f"- Operation took {duration}, used credits [{record.credits:.8f}]")

    if record.entity_id:
        if record.entity_type:
            if record.entity_type == enums.EntityTypes.CONTENT and record.content_type:
                print(f"- {record.entity_type} [{record.entity_id}]: Content type [{record.content_type}], file type [{record.file_type}]")
            else:
                print(f"- {record.entity_type} [{record.entity_id}]")
        else:
            print(f"- Entity [{record.entity_id}]")

    if record.model_service:
        print(f"- Model service [{record.model_service}], model name [{record.model_name}]")

    if record.processor_name:
        if record.processor_name in ["Deepgram Audio Transcription", "Assembly.AI Audio Transcription"]:
            length = timedelta(milliseconds=record.count or 0)

            if record.model_name:
                print(f"- Processor name [{record.processor_name}], model name [{record.model_name}], length [{length}]")
            else:
                print(f"- Processor name [{record.processor_name}], length [{length}]")
        else:
            if record.count:
                if record.model_name:
                    print(f"- Processor name [{record.processor_name}], model name [{record.model_name}], units [{record.count}]")
                else:
                    print(f"- Processor name [{record.processor_name}], units [{record.count}]")
            else:
                if record.model_name:
                    print(f"- Processor name [{record.processor_name}], model name [{record.model_name}]")
                else:
                    print(f"- Processor name [{record.processor_name}]")

    if record.uri:
        print(f"- URI [{record.uri}]")

    if record.name == "Prompt completion":
        if record.prompt:
            print(f"- Prompt [{record.prompt_tokens} tokens (includes RAG context tokens)]:")
            print(record.prompt)

        if record.completion:
            print(f"- Completion [{record.completion_tokens} tokens (includes JSON guardrails tokens)], throughput: {record.throughput:.3f} tokens/sec:")
            print(record.completion)

    elif record.name == "Text embedding":
        if record.prompt_tokens is not None:
            print(f"- Text embedding [{record.prompt_tokens} tokens], throughput: {record.throughput:.3f} tokens/sec")

    elif record.name == "Document preparation":
        if record.prompt_tokens is not None and record.completion_tokens is not None:
            print(f"- Document preparation [{record.prompt_tokens} input tokens, {record.completion_tokens} output tokens], throughput: {record.throughput:.3f} tokens/sec")

    elif record.name == "Data extraction":
        if record.prompt_tokens is not None and record.completion_tokens is not None:
            print(f"- Data extraction [{record.prompt_tokens} input tokens, {record.completion_tokens} output tokens], throughput: {record.throughput:.3f} tokens/sec")

    elif record.name == "GraphQL":
        if record.request:
            print(f"- Request:")
            print(record.request)

        if record.variables:
            print(f"- Variables:")
            print(record.variables)

        if record.response:
            print(f"- Response:")
            print(record.response)

    if record.name.startswith("Upload"):
        print(f"- File upload [{record.count} bytes], throughput: {record.throughput:.3f} bytes/sec")

    print()

async def delete_all_contents():
    if graphlit.client is None:
        return;

    _ = await graphlit.client.delete_all_contents(is_synchronous=True)


Execute Graphlit example

In [26]:
import nest_asyncio

nest_asyncio.apply()

# Remove any existing contents; only needed for notebook example
await delete_all_contents()

print('Deleted all contents.')


Deleted all contents.


Define and kickoff CrewAI crew

In [27]:
import os
import dotenv
from crewai import Agent, Crew, Process, Task
from langchain_openai import ChatOpenAI
from graphlit_tools import WebSearchTool, WebMapTool, DescribeWebPageTool, CrewAIConverter
from datetime import datetime

company_name = input('Enter the automaker company name to be analyzed: ')

llm = ChatOpenAI(model="gpt-4o")

# NOTE: create a unique cost correlation ID
correlation_id = datetime.now().isoformat()

# NOTE: need to convert Graphlit tools to CrewAI tool schema
web_search_tool = CrewAIConverter.from_tool(WebSearchTool(graphlit, correlation_id=correlation_id))
web_map_tool = CrewAIConverter.from_tool(WebMapTool(graphlit, correlation_id=correlation_id))
describe_web_page_tool = CrewAIConverter.from_tool(DescribeWebPageTool(graphlit, correlation_id=correlation_id))

web_search_agent = Agent(
    role="Web Researcher",
    goal="Find the {company} website.",
    backstory="",
    verbose=True,
    llm=llm,
    allow_delegation=False,
    tools=[web_search_tool],
)

web_map_agent = Agent(
    role="Web Mapping Agent",
    goal="Enumerate all the web page URLs for the provided web site.",
    backstory="",
    verbose=True,
    llm=llm,
    allow_delegation=False,
    tools=[web_map_tool],
)

web_page_analyst_agent = Agent(
    role="Web Analyst Agent",
    goal="Visually analyze the {company} web pages, and describe the branding, overall layout and marketing approach. Also extract the company name and any products you find.",
    backstory="You work for a major automotive manufacturer, and are doing competitive analysis on other automakers websites.",
    verbose=True,
    llm=llm,
    allow_delegation=False,
    tools=[describe_web_page_tool],
)

editor_agent = Agent(
    role="Marketing Editor Agent",
    goal="Write marketing strategy reports given provided web page analyses.",
    backstory="You work for a major automotive manufacturer, and are doing competitive analysis on other automakers websites.",
    verbose=True,
    llm=llm,
    allow_delegation=False
)

search_web_task = Task(
    description=(
        """Given company named {company}, search the web to find their home page.
        Return the root path for URLs, not individual web pages.
        For example return https://www.example.com, not https://www.example.com/index.html"""
    ),
    expected_output="A single URL for the {company} home page",
    agent=web_search_agent,
)

fetch_web_pages_task = Task(
    description=(
        """Fetch the URLs at or beneath the given home page for further analysis.
        Filter the resulting URLs to locate pages which appear to be about automobile models and specifications.
        Select one most relevant page per automobile model.
        """
    ),
    expected_output="A list of web page URLs, maximum 5",
    agent=web_map_agent,
    context=[search_web_task],
)

analyze_web_page_task = Task(
    description=(
        """Analyze the provided web pages from the {company} website from a marketing perspective.

        Execute task once for each provided web page.

        Keep as much of the detail from each web page in your final analysis.

        Do *not* pass a prompt to the provided tool, just skip the 'prompt' argument so the tool uses its default analysis prompt.
        """
    ),
    expected_output="A thorough analysis of the web pages from a marketing perspective.",
    agent=web_page_analyst_agent,
    context=[fetch_web_pages_task],
)

writer_task = Task(
    description=(
        """Write a thorough analysis of the {company} web marketing strategy given the detailed analyses from the provided {company} web pages.

        Keep useful details from the web page analysis in your final summary.
        """
    ),
    expected_output="A thorough and well-structured summary of the {company} web marketing strategy.",
    agent=editor_agent,
    context=[analyze_web_page_task],
)

crew = Crew(
    agents=[web_search_agent, web_map_agent, web_page_analyst_agent, editor_agent],
    tasks=[search_web_task, fetch_web_pages_task, analyze_web_page_task, writer_task],
    process=Process.sequential,
    planning=True,
    verbose=True,
)

# Kickoff the process and print the result
result = await crew.kickoff_async(inputs={"company": company_name})
print("Website Summary Process Completed:")
print(result)

Enter the automaker company name to be analyzed: Land Rover




[1m[93m 
[2024-12-07 22:15:17][INFO]: Planning the crew execution[00m
[1m[95m# Agent:[00m [1m[92mWeb Researcher[00m
[95m## Task:[00m [92mGiven company named Land Rover, search the web to find their home page.
        Return the root path for URLs, not individual web pages.
        For example return https://www.example.com, not https://www.example.com/index.html1. The Web Researcher will initiate the task by accessing the Graphlit web search tool. 2. The Web Researcher will input the search phrase 'Land Rover official site' into the 'search' argument of the tool. 3. The researcher will set the 'search_limit' argument to a practical number (e.g., 5) to limit the number of web pages returned. 4. The tool will execute a web search and return a list of relevant websites. 5. The Web Researcher will scan through the results and identify the root URL for the Land Rover home page, ensuring it is formatted as 'https://www.landrover.com' (or the equivalent valid URL). 6. The Web Rese

Calculate Graphlit credits & usage

In [28]:
import time
from IPython.display import display, HTML, JSON, Markdown

time.sleep(10)

credits = await lookup_credits(correlation_id)

if credits is not None:
    display(Markdown(f"### Credits used: {credits.credits:.6f}"))
    print(f"- storage [{credits.storage_ratio:.2f}%], compute [{credits.compute_ratio:.2f}%]")
    print(f"- embedding [{credits.embedding_ratio:.2f}%], completion [{credits.completion_ratio:.2f}%]")
    print(f"- ingestion [{credits.ingestion_ratio:.2f}%], indexing [{credits.indexing_ratio:.2f}%], preparation [{credits.preparation_ratio:.2f}%], extraction [{credits.extraction_ratio:.2f}%], enrichment [{credits.enrichment_ratio:.2f}%], publishing [{credits.publishing_ratio:.2f}%]")
    print(f"- search [{credits.search_ratio:.2f}%], conversation [{credits.conversation_ratio:.2f}%]")
    print()

usage = await lookup_usage(correlation_id)

if usage is not None:
    display(Markdown(f"### Usage records:"))

    for record in usage:
        dump_usage_record(record)
    print()


### Credits used: 2.267108

- storage [0.11%], compute [2.74%]
- embedding [2.30%], completion [63.02%]
- ingestion [24.76%], indexing [0.00%], preparation [7.06%], extraction [0.00%], enrichment [0.00%], publishing [0.00%]
- search [0.00%], conversation [0.00%]



### Usage records:

2024-12-07T22:17:46.821Z: GraphQL
- Operation took 0:00:12.207807, used credits [0.00000000]
- Request:
mutation DescribeImage($prompt: String!, $uri: URL!, $specification: EntityReferenceInput, $correlationId: String) { describeImage(prompt: $prompt, uri: $uri, specification: $specification, correlationId: $correlationId) { role author message citations { content { id name state originalDate identifier uri type fileType mimeType format formatName fileExtension fileName fileSize masterUri imageUri textUri audioUri transcriptUri summary customSummary keywords bullets headlines posts chapters questions video { width height duration make model software title description keywords author } audio { keywords author series episode episodeType season publisher copyright genre title description bitrate channels sampleRate bitsPerSample duration } image { width height resolutionX resolutionY bitsPerComponent components projectionType orientation description make model software lens focalLength ex