In [3]:
!pip install crewai crewai[tools] google-generativeai python-dotenv
!pip install langchain_google_genai

Collecting crewai
  Downloading crewai-0.175.0-py3-none-any.whl.metadata (35 kB)
Collecting appdirs>=1.4.4 (from crewai)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting chromadb>=0.5.23 (from crewai)
  Downloading chromadb-1.0.20-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting instructor>=1.3.3 (from crewai)
  Downloading instructor-1.11.2-py3-none-any.whl.metadata (11 kB)
Collecting json-repair==0.25.2 (from crewai)
  Downloading json_repair-0.25.2-py3-none-any.whl.metadata (7.9 kB)
Collecting json5>=0.10.0 (from crewai)
  Downloading json5-0.12.1-py3-none-any.whl.metadata (36 kB)
Collecting jsonref>=1.1.0 (from crewai)
  Downloading jsonref-1.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting litellm==1.74.9 (from crewai)
  Downloading litellm-1.74.9-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.6/40.6 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting

#Import Libraries & Access Key:

In [1]:
import os
from google.colab import userdata

from crewai import Agent, Task, Crew, Process, LLM

# --- Setup: Get API Keys ---
try:
    GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
    BROWSERLESS_API_KEY = userdata.get('BROWSERLESS_API_KEY')
except userdata.SecretNotFoundError as e:
    raise ValueError(f"Error loading secret: {e}. Please ensure both keys are set in Colab secrets.")

if not GOOGLE_API_KEY or not BROWSERLESS_API_KEY:
    raise ValueError("One or both API keys are missing.")

os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
os.environ["BROWSERLESS_API_KEY"] = BROWSERLESS_API_KEY

# --- Agent and Crew Initialization ---
# FIX: Use the correct model name format for litellm.
gemini_llm = LLM(
    model="gemini/gemini-2.5-flash", # Correct format for gemini provider
    api_key=os.environ["GOOGLE_API_KEY"]
)

##Agent Building Process##

1- Define a Agents:

In [2]:
# Initialize website scraping tool
# Removing PlaywrightScrapeTool due to asyncio error. Will explore alternatives.

researcher = Agent(
    role='Senior Job Researcher',
    goal="Scour a company's career page to find all available software engineering job listings",
    backstory="""You are an expert market researcher. Your specialty is finding and identifying relevant job opportunities from complex, dynamically-loaded career websites.
    You are a master at sifting through HTML to find the core job data.""",
    verbose=True,
    allow_delegation=False,
    tools=[], # Removed scrape_tool
    llm=gemini_llm  # Pass LLM to the agent directly
)

analyst = Agent(
    role='Job Data Analyst',
    goal='Extract key details from the provided job listings HTML content',
    backstory="""You are a meticulous data analyst. You can read raw HTML and extract specific information with perfect accuracy. You focus on job titles, locations, and application links, formatting them neatly.""",
    verbose=True,
    allow_delegation=False,
    llm=gemini_llm # Pass LLM to the agent directly
)

2- Design the Task:

In [3]:
# --- Define Tasks ---
target_urls = [
    "https://www.google.com/about/careers/applications/jobs/results/",
    "https://www.metacareers.com/jobs/",
    "https://hiring.amazon.com/", # Using hiring.amazon.com for job listings
    "https://careers.microsoft.com/", # Added Microsoft
    "https://digitalcareers.infosys.com/", # Added Infosys
    "https://www.accenture.com/us-en/careers/jobsearch", # Added Accenture
    "https://www.tcs.com/careers", # Added TCS
    "https://careers.oracle.com/en/sites/jobsearch/jobs?location=United%20States&locationId=300000000149325", # Added Oracle
    "https://jobs.fidelity.com/en/jobs/?search=&origin=global&lat=&lng=", # Added Fidelity
]
company_names = ["Google", "Meta", "Amazon", "Microsoft", "Infosys", "Accenture", "TCS", "Oracle", "Fidelity", "AutoZone"] # Added company names

research_task = Task(
    description=f'Visit the career pages for {", ".join(company_names)} at the URLs {", ".join(target_urls)} and scrape all job content.',
    expected_output='The full, raw HTML content of the job listings pages from all specified URLs.',
    agent=researcher
)

analysis_task = Task(
    description="""From the provided HTML content from all scraped career pages, identify and extract the following for each
    available job listing: Job Title, Location, and a direct URL link to the application page.
    Format the final output as a clean list of jobs, clearly indicating which company each job belongs to.""",
    expected_output='A formatted list of all jobs from all specified companies with their title, location, and application link.',
    agent=analyst
)

3- Assemble and Run the Crew:

In [4]:
# --- Create and Run the Crew ---
crew = Crew(
    agents=[researcher, analyst],
    tasks=[research_task, analysis_task],
    verbose=True,
    process=Process.sequential
)

print("Starting the Job Aggregation Crew...")
result = crew.kickoff()
print("\n\n##################################")
print("## Final Report ##")
print(result)

Output hidden; open in https://colab.research.google.com to view.