# AI Property Rental Assistant for Durham
This notebook creates an intelligent property rental assistant that:
1. Scrapes rental property listings from OnTheMarket.com
2. Uses OpenAI's GPT-4o-mini to analyze and recommend properties based on user preferences
3. Provides formatted recommendations in markdown for easy reading

Purpose: Help students and professionals find suitable rental properties in Durham, UK

In [None]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [None]:
# =====================================
# STEP 1: ENVIRONMENT SETUP & API KEYS
# =====================================

# Load environment variables from .env file
# Make sure you have a .env file with: OPENAI_API_KEY=your_key_here
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Validate the OpenAI API key format and existence
if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

# Initialize OpenAI client
openai = OpenAI()

In [None]:
# =====================================
# STEP 2: WEB SCRAPING SETUP
# =====================================

# HTTP headers to mimic a real browser request
# Many websites block requests without proper headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A class to represent and scrape content from a webpage.
    
    This class handles:
    - Fetching HTML content from a URL
    - Parsing HTML with BeautifulSoup
    - Extracting clean text content (removing scripts, styles, etc.)
    - Error handling for failed requests
    
    Attributes:
        url (str): The URL of the website
        title (str): The page title
        text (str): Clean text content from the page body
    """
    
    def __init__(self, url):
        """
        Initialize Website object by scraping content from the given URL.
        
        Args:
            url (str): The website URL to scrape
        """
        self.url = url
        try:
            # Make HTTP request with timeout to prevent hanging
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()  # Raises an HTTPError for bad responses
            
            # Parse HTML content
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Extract page title
            self.title = soup.title.string if soup.title else "No title found"
            
            # Clean up the HTML by removing irrelevant elements
            if soup.body:
                # Remove scripts, styles, images, and input elements
                for irrelevant in soup.body(["script", "style", "img", "input"]):
                    irrelevant.decompose()
                
                # Extract clean text with proper line separation
                self.text = soup.body.get_text(separator="\n", strip=True)
            else:
                self.text = "No body content found"
                
        except requests.RequestException as e:
            # Handle network errors gracefully
            print(f"Error fetching website: {e}")
            self.title = "Error loading page"
            self.text = "Could not load page content"

In [None]:
# =====================================
# STEP 3: AI ASSISTANT FUNCTIONS
# =====================================

def house_renting(system_prompt, user_prompt):
    """
    Send prompts to OpenAI's GPT model and get rental recommendations.
    
    This function:
    - Formats the conversation for the AI model
    - Sends requests to GPT-4o-mini (cost-effective model)
    - Returns the AI's response as a string
    
    Args:
        system_prompt (str): Instructions for how the AI should behave
        user_prompt (str): The user's specific request with property data
        
    Returns:
        str: AI-generated rental recommendations in markdown format
    """
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    
    # Call OpenAI API
    response = openai.chat.completions.create(
        model="gpt-4o-mini",  # Cost-effective model, good for this task
        messages=messages,
    )
    
    return response.choices[0].message.content

In [None]:
# =====================================
# STEP 4: AI SYSTEM CONFIGURATION
# =====================================

# Define how the AI assistant should behave
# This is crucial for getting consistent, helpful responses
system_prompt = """
You are a helpful real estate assistant specializing in UK property rentals. Your job is to guide users in finding houses to rent, especially in Durham. Follow these rules:

1. Always ask clarifying questions if user input is vague. Determine location, budget, number of bedrooms, and tenant type (e.g. student, family, professional).
2. Use structured data provided from the website (like property listings) to identify relevant options.
3. If listings are provided, filter and rank them based on the user's preferences.
4. Recommend up to 5 top properties with rent price, bedroom count, key features, and location.
5. Always respond in markdown with clean formatting using headers, bold text, and bullet points.
6. If no listings match well, provide tips (e.g. "try adjusting your budget or search radius").
7. Stay concise, helpful, and adapt to whether the user is a student, family, couple, or solo tenant.
"""

def user_prompt_for_renting(website, user_needs):
    """
    Create a formatted prompt that combines user requirements with scraped property data.
    
    This function:
    - Takes user preferences and website content
    - Formats them into a clear prompt for the AI
    - Limits content to first 4000 characters to stay within token limits
    
    Args:
        website (Website): The scraped website object
        user_needs (str): Description of what the user is looking for
        
    Returns:
        str: Formatted prompt ready to send to the AI
    """
    user_prompt = f"""
I want to rent a house and here's what I'm looking for:
{user_needs}

Here are the property listings I found on the website titled: "{website.title}".
Please analyze them and recommend the best 3–5 options that match my needs. If none are suitable, tell me why and offer suggestions.

The page content is below:
{website.text[:4000]}  # Truncated to first 4000 characters to manage token usage
"""
    return user_prompt

In [None]:
# =====================================
# STEP 5: MAIN EXECUTION
# =====================================

if __name__ == "__main__":
    print("Starting AI Property Rental Assistant...")
    print("=" * 50)
    
    # Configure the property search
    website_url = "https://www.onthemarket.com/to-rent/property/durham/"
    print(f"🔍 Scraping properties from: {website_url}")
    
    # Scrape the website
    website = Website(website_url)
    
    # Display scraping results
    print(f"Website Title: {website.title}")
    print(f"Content Length: {len(website.text)} characters")
    print(f"Successfully scraped property listings")
    print()
    
    # Define user requirements
    # TODO: Make this interactive by adding input() statements
    user_needs = "I'm a student looking for a 2-bedroom house in Durham under £2,000/month"
    print(f"User Requirements: {user_needs}")
    print()
    
    # Generate AI prompt
    user_prompt = user_prompt_for_renting(website, user_needs)
    
    # Get AI recommendations
    print("Generating AI recommendations...")
    output = house_renting(system_prompt, user_prompt)
    
    # Display results
    display(Markdown(output))
