In [2]:
from typing import Optional, Dict
from smolagents import CodeAgent, tool, LiteLLMModel , GradioUI,OpenAIServerModel
import requests
import os
import time
from bs4 import BeautifulSoup
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
@tool
def scrape_real_estate_agents(state: str, city_name: str, num_pages: Optional[int] = 2) -> Dict[str, any]:
    """Scrapes realtor.com for real estate agent information in specified city and state
    
    Args:
        state: State abbreviation (e.g., 'CA', 'NY')
        city_name: City name with hyphens instead of spaces (e.g., 'buffalo')
        num_pages: Number of pages to scrape (default: 2)
    """
    try:
        # Initialize results
        agent_names = []         # Names
        agent_phones = []        # Phone numbers
        agent_offices = []       # Office names
        pages_scraped = 0
        
        # Set up headers
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
            "Connection": "keep-alive"
        }

        # Process pages
        for page in range(1, num_pages + 1):
            # Construct URL
            if page == 1:
                url = f'https://www.realtor.com/realestateagents/{city_name}_{state}/'
            else:
                url = f'https://www.realtor.com/realestateagents/{city_name}_{state}/pg-{page}'
            
            print(f"Scraping page {page}...")
            
            # Get page content
            response = requests.get(url, headers=headers)
            if response.status_code != 200:
                return {"error": f"Failed to access page {page}: Status code {response.status_code}"}

            soup = BeautifulSoup(response.text, features="html.parser")
            
            # Find all agent cards
            agent_cards = soup.find_all('div', class_='agent-list-card')
            
            for card in agent_cards:
                # Find name
                name_elem = card.find('div', class_='agent-name')
                if name_elem:
                    name = name_elem.text.strip()
                    if name and name not in agent_names:
                        agent_names.append(name)
                        print(f"Found agent: {name}")

                # Find phone
                phone_elem = card.find('a', {'data-testid': 'agent-phone'}) or \
                            card.find(class_='btn-contact-me-call') or \
                            card.find('a', href=lambda x: x and x.startswith('tel:'))
                
                if phone_elem:
                    phone = phone_elem.get('href', '').replace('tel:', '').strip()
                    if phone:
                        agent_phones.append(phone)
                        print(f"Found phone: {phone}")

                # Get office/company name
                office_elem = card.find('div', class_='agent-group') or \
                            card.find('div', class_='text-semibold')
                if office_elem:
                    office = office_elem.text.strip()
                    agent_offices.append(office)
                    print(f"Found office: {office}")
                else:
                    agent_offices.append("")
            
            pages_scraped += 1
            time.sleep(2)  # Rate limiting

        if not agent_names:
            return {"error": "No agents found. The website structure might have changed or no results for this location."}

        # Return structured data
        return {
            "names": agent_names,
            "phones": agent_phones,
            "offices": agent_offices,
            "total_agents": len(agent_names),
            "pages_scraped": pages_scraped,
            "city": city_name,
            "state": state
        }
        
    except Exception as e:
        return {"error": f"Scraping error: {str(e)}"}

In [4]:
@tool
def export_to_csv(scraped_data: Dict[str, any], output_filename: Optional[str] = None) -> str:
    """Exports scraped real estate agent data to a CSV file
    
    Args:
        scraped_data: Dictionary containing the results of the scraping
        output_filename: Optional filename for the CSV file (default: cityname.csv)
    """
    try:
        if "error" in scraped_data:
            return f"Error: {scraped_data['error']}"
            
        if not output_filename:
            output_filename = f"{scraped_data['city'].replace('-', '')}.csv"
            
        # Ensure all lists are of equal length
        max_length = max(len(scraped_data['names']), len(scraped_data['phones']), len(scraped_data['offices']))
        
        # Pad shorter lists with empty strings
        scraped_data['names'].extend([""] * (max_length - len(scraped_data['names'])))
        scraped_data['phones'].extend([""] * (max_length - len(scraped_data['phones'])))
        scraped_data['offices'].extend([""] * (max_length - len(scraped_data['offices'])))
        
        # Create DataFrame with just names, phones, and offices
        df = pd.DataFrame({
            'Names': scraped_data['names'],
            'Phone': scraped_data['phones'],
            'Office': scraped_data['offices']
        })
        
        df.to_csv(output_filename, index=False, encoding='utf-8')
        return f"Data saved to {output_filename}. Total entries: {len(df)}"
        
    except Exception as e:
        return f"Error saving CSV: {str(e)}"


In [5]:
deepseek_model =  OpenAIServerModel(
            model_id="deepseek-r1:7b",
            api_base="http://localhost:11434/v1",
            api_key="ollama"
        )

    # Create agent with tools
agent = CodeAgent(
    tools=[scrape_real_estate_agents, export_to_csv],
    model=deepseek_model,
    additional_authorized_imports=["pandas", "bs4", "time"]
)

In [None]:
result = agent.run("""
Thought: Let's scrape realtor data
Code:
```python
# Scrape realtor data
data = scrape_realtor(state="NY", city_name="buffalo", num_pages=2)

# Save to CSV
if "error" not in data:
    result = save_to_csv(data)
    print(result)
else:
    print(f"Error: {data['error']}")
```
""")
    
print(result)
GradioUI(agent).launch()