This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [2]:
from dotenv import load_dotenv
load_dotenv()
from langchain.llms import OpenAI
import os




In [27]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain

# Step 1: Define the prompt template
real_estate_prompt = PromptTemplate(
    input_variables=["region", "num_listings", "output_format"],
    template="""
Generate a diverse and realistic set of {num_listings} real estate listings in {region}.
Each listing should contain the following fields:

- Property ID  
- Title  
- Address (Street, City, Province)  
- Price (CAD)  
- Bedrooms  
- Bathrooms  
- Area (sqft)  
- Type (e.g., Detached House, Condo, Apartment, Townhouse, Commercial Space, Cottage, Bungalow)  
- Status (For Sale, Sold, Pending, For Rent)  
- Description  
- Image URL  
- Agent Name  
- Agent Phone  
- Agent Email  

Ensure:
- Listings are realistic for the Canadian real estate market
- Data uses Canadian locations and formatting
- Output should be structured as {output_format}

Begin generating the listings now.
"""
)
# model_name = "gpt-3.5-turbo"
# Step 2: Initialize the LLM

llm = OpenAI(
    model_name="gpt-4o",
    temperature=0.7,
    openai_api_base=os.environ["OPENAI_API_BASE"],
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

# Step 3: Create the LLM chain
real_estate_chain = LLMChain(
    llm=llm,
    prompt=real_estate_prompt
)

# Step 4: Run the chain with input values
response = real_estate_chain.run({
    "region": "Canada",
    "num_listings": "10",
    "output_format": "a CSV-formatted table"
})

# Output the generated listings
print(response)
# save the response to a text file
with open("real_estate_listings.txt", "w") as f:
    f.write(response)

with open("real_estate_listings.csv", "w") as f:
    f.write(response)





```csv
Property ID,Title,Address,Price (CAD),Bedrooms,Bathrooms,Area (sqft),Type,Status,Description,Image URL,Agent Name,Agent Phone,Agent Email
101,"Charming Family Home","123 Maple St, Toronto, ON",850000,4,3,2300,"Detached House","For Sale","A spacious family home located in a quiet neighborhood, featuring a large backyard and modern finishes.","https://example.com/image1.jpg","Emily Clark","416-555-1234","emily.clark@example.com"
102,"Luxury Downtown Condo","456 Bay St, Vancouver, BC",1150000,2,2,1100,"Condo","For Sale","A luxurious condo in the heart of downtown Vancouver with stunning city views and top-notch amenities.","https://example.com/image2.jpg","James Harris","604-555-5678","james.harris@example.com"
103,"Cozy Suburban Bungalow","789 Pine Ave, Calgary, AB",450000,3,1,1400,"Bungalow","Pending","This cozy bungalow is perfect for small families, located in a friendly suburban community.","https://example.com/image3.jpg","Sarah Thompson","403-555-9876","sarah.thompson@exampl

In [2]:
# load_the csv file and print the first 5 lines skipping the first line.
# remve nan values and print the last line
import pandas as pd
df = pd.read_csv("real_estate_listings.csv", skiprows=1)
df = df.dropna()


In [3]:
# print without truncation
pd.set_option('display.max_colwidth', None) 
print(df['Description'].head(1))

0    A spacious family home located in a quiet neighborhood, featuring a large backyard and modern finishes.
Name: Description, dtype: object


In [4]:
# Create structured prompts
def create_embedding_prompt(row):
    return (
        f"Property ID: {row['Property ID']}\n"
        f"Title: {row['Title']}\n"
        f"Address: {row['Address']}\n"
        f"Price: ${row['Price (CAD)']:,} CAD\n"
        f"Bedrooms: {row['Bedrooms']}\n"
        f"Bathrooms: {row['Bathrooms']}\n"
        f"Area: {row['Area (sqft)']} sqft\n"
        f"Type: {row['Type']}\n"
        f"Status: {row['Status']}\n"
        f"Agent: {row['Agent Name']} | {row['Agent Phone']} | {row['Agent Email']}\n"
        f"Image URL: {row['Image URL']}\n"
        f"Description: {row['Description']}"
    )

df["Embedding Prompt"] = df.apply(create_embedding_prompt, axis=1)
df["Embedding Prompt"] 

0               Property ID: 101\nTitle: Charming Family Home\nAddress: 123 Maple St, Toronto, ON\nPrice: $850,000.0 CAD\nBedrooms: 4.0\nBathrooms: 3.0\nArea: 2300.0 sqft\nType: Detached House\nStatus: For Sale\nAgent: Emily Clark | 416-555-1234 | emily.clark@example.com\nImage URL: https://example.com/image1.jpg\nDescription: A spacious family home located in a quiet neighborhood, featuring a large backyard and modern finishes.
1                    Property ID: 102\nTitle: Luxury Downtown Condo\nAddress: 456 Bay St, Vancouver, BC\nPrice: $1,150,000.0 CAD\nBedrooms: 2.0\nBathrooms: 2.0\nArea: 1100.0 sqft\nType: Condo\nStatus: For Sale\nAgent: James Harris | 604-555-5678 | james.harris@example.com\nImage URL: https://example.com/image2.jpg\nDescription: A luxurious condo in the heart of downtown Vancouver with stunning city views and top-notch amenities.
2                          Property ID: 103\nTitle: Cozy Suburban Bungalow\nAddress: 789 Pine Ave, Calgary, AB\nPrice: $450,000.0 CAD\

In [None]:
# Define prompt template
template = PromptTemplate(
    input_variables=["structured_listing"],
    template="""
You are a real estate copywriter. Given the structured property information below, write a detailed and compelling real estate listing description in fluent English. Avoid listing the fields directly—convert them into natural narrative language.

Structured Listing:
{structured_listing}

Your task: Write a paragraph that combines all the details into a smooth and attractive property description suitable for websites or brochures.
"""
)

# Initialize LangChain with OpenAI (set your API key)
llm = OpenAI(temperature=0.7)
chain = LLMChain(prompt=template, llm=llm)

# Generate descriptions
df["Natural Description"] = df["Embedding Prompt"].apply(lambda row: chain.run(structured_listing=row))

#Drop Embedding Prompt

# Save the enriched CSV
df.to_csv("real_estate_with_descriptions.csv", index=False)