In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv()
PERPLEXITY_API_KEY = os.environ.get('PERPLEXITY_API_KEY')

In [5]:
def print_row_as_dict(index):
    df = pd.read_csv('../final_data_cleaning/final_cleaned_data.csv')
    print(max(df['precinct_safety_rank']))
    row_dict = df.iloc[index].to_dict()
    print(row_dict)
print_row_as_dict(0)

74
{'Unnamed: 0': 0, 'listedAt': '2024-11-25', 'daysOnMarket': 1.0, 'availableFrom': '2024-11-25', 'street': '171 East 96th Street #4A', 'price': 1855.0, 'borough': 'Brooklyn', 'neighborhood': 'brownsville', 'zip': 11212, 'propertyType': 'rental', 'beds': 1, 'baths': 1.0, 'latitude': 40.66218554, 'longitude': -73.92229257, 'amenities': "['fios_available', 'hardwood_floors', 'nyc_evacuation_6']", 'builtIn': 1926.0, 'description': "Large newly renovated 1-bedroom \n\nFeatures:                  \n\n- HARDWOOD FLOORS\n- HIGH CEILINGS\n- LOTS OF NATURAL SUNLIGHT\n- STAINLESS STEEL APPLIANCES\n- TONS OF CLOSET SPACE\n- STEPS FROM TRANSIT\n\nDon't wait on this. Wont last.", 'hasVideo': 1, 'PhotosNum': 4, 'Number of Households': 32091.0, 'Median Household Income': 35840.0, 'Per Capita Income': 21502.0, 'Total Income Distribution': 32091.0, 'Median Gross Rent': 1150.0, 'Median Home Value': 565200.0, 'Occupied Housing Units': 32091.0, 'Vacant Housing Units': 2438.0, 'Owner-Occupied Units (value 

In [33]:
def create_property_context(df_row):
    """Convert DataFrame row to formatted context string"""
    # Helper function to handle null values
    def format_value(value, is_numeric=False):
        if pd.isna(value):
            return "No information available"
        if is_numeric and isinstance(value, (int, float)):
            return f"{value:,.2f}" if isinstance(value, float) else str(int(value))
        return str(value)
    
    return f"""
    Property Details:
    - Address: {format_value(df_row['street'])}
    - Price: ${format_value(df_row['price'], True)}
    - Location: {format_value(df_row['neighborhood'])}, {format_value(df_row['borough'])}
    - Beds/Baths: {format_value(df_row['beds'], True)}/{format_value(df_row['baths'], True)}
    - Built: {format_value(df_row['builtIn'], True)}
    - Description: {format_value(df_row['description'])}
    
    Neighborhood Statistics:
    - Demographics: {format_value(df_row['Black or African American Alone_ratio']*100 if not pd.isna(df_row['Black or African American Alone_ratio']) else None, True)}% Black, 
      {format_value(df_row['Hispanic or Latino_ratio']*100 if not pd.isna(df_row['Hispanic or Latino_ratio']) else None, True)}% Hispanic,
      {format_value(df_row['White Alone_ratio']*100 if not pd.isna(df_row['White Alone_ratio']) else None, True)}% White,
      {format_value(df_row['Asian Alone_ratio']*100 if not pd.isna(df_row['Asian Alone_ratio']) else None, True)}% Asian
    - Income: ${format_value(df_row['Median Household Income'], True)} median household
    - Safety Rank: {format_value(df_row['precinct_safety_rank'])}/74, where higher the rank, safer the area is. Lower the rank, higher the crime rate for the area
    - Transit: {format_value(df_row['nearby_subway_stations'])} nearby subway stations
    """

def generate_llm_report(client, df_row):
    """Generate property report using Perplexity API"""
    context = create_property_context(df_row)
    messages = [{
        "role": "system",
        "content": "You are a real estate analyst. Generate detailed property reports in markdown format."
    }, {
        "role": "user",
        "content": f"""
        Generate a detailed markdown property report based on this data:
        {context}
        The context provided is absolute truth. Do not provide data that is contradictory to it.
        The report should:
        1. Analyze the property's value proposition
        2. Evaluate the neighborhood
        3. Compare rental proposed price to market metrics
        4. Assess transportation and safety
        5. Include demographic insights
        6. Make renting recommendation
        
        Format in markdown with clear sections and bullet points where appropriate.
        """
    }]
    
    try:
        response = client.chat.completions.create(
            model="llama-3.1-sonar-small-128k-online",
            temperature=0.3,
            messages=messages
        )
        print(response.citations)
        citation_str =""
        for i,citation in enumerate(response.citations):
            citation_str = citation_str + f"{i+1}. {citation}\n"
        return response.choices[0].message.content + "\n\nCitations:\n" + citation_str
    except Exception as e:
        print(f"API Error: {str(e)}")
        return None

def generate_single_report(client, df, index):
    """Generate report for a single property at specified index"""
    try:
        row = df.iloc[index]
        property_id = row['street'] if not pd.isna(row['street']) else f"Property_{index}"
        report = generate_llm_report(client, row)
        
        if report:
            reports = {property_id: report}
            # save_reports(reports, f"property_report_{index}.json")
            return reports
        
    except Exception as e:
        print(f"Error processing index {index}: {str(e)}")
        return None

# def save_reports(reports, filename="property_reports.json"):
#     """Save reports to file"""
#     with open(filename, 'w') as f:
#         json.dump(reports, f, indent=4)    

In [34]:
# Initialize Perplexity client
client = OpenAI(
    api_key=PERPLEXITY_API_KEY,
    base_url="https://api.perplexity.ai"
)

# Read your DataFrame
df = pd.read_csv('../final_data_cleaning/final_cleaned_data.csv')

# Specify the index for which you want to generate the report
index = 1000  # Change this to the desired index

# Generate report for specified index
reports = generate_single_report(client, df, index)

# Print the report
if reports:
    property_id = list(reports.keys())[0]
    print(f"Report for {property_id}:")
    print(reports[property_id])


['https://docs.tacticalrmm.com/ee/reporting/functions/reporting_basics/', 'https://www.bkreader.com/neighborhood/ditmas-park/neighborhood-profile-ditmas-park-8087667', 'https://streeteasy.com/building/1234-flatbush-avenue-brooklyn/2a?similar=1&model_name=rental-b&model_version=v1&tracking_id=bf1e1d95-29c6-4701-a671-b857e40a54ff', 'https://www.royfrancis.com/minty/', 'https://www.compass.com/neighborhood-guides/nyc/ditmas-park/']
Report for 1234 Flatbush Avenue #2A:
# Detailed Property Report for 1234 Flatbush Avenue #2A, Ditmas Park, Brooklyn

## Property Details

### Address
1234 Flatbush Avenue #2A

### Price
$3,200.00

### Location
Ditmas Park, Brooklyn

### Beds/Baths
4/1.00

### Built
1940.00

### Description
Welcome to your urban sanctuary This stylish townhouse boasts sleek modern finishes and an abundance of natural light that dances through the generous windows. The heart of this home is the renovated kitchen, a haven for aspiring chefs. Imagine whipping up culinary delights o