In [None]:
pip install bitsandbytes accelerate

In [None]:
import os
from typing import Dict
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
from google.colab import userdata
import gc

import torch

# Check if GPU is available
if not torch.cuda.is_available():
    print("⚠️ WARNING: GPU is not available. Go to 'Runtime' > 'Change runtime type' and select GPU for better performance.")
else:
    gpu_name = torch.cuda.get_device_name(0)
    print(f"✅ GPU detected: {gpu_name}")

# Function to format a property for prediction
def format_property_input(property_dict):
    price_in_millions = property_dict['Price'] / 1000000
    sq_ft_in_thousands = property_dict['Square Footage'] / 1000

    features = property_dict['Features']
    if isinstance(features, list):
        features = ', '.join(features)

    property_text = f"Price: ${price_in_millions:.6f}M, Bedrooms: {property_dict['Bedrooms']}, " \
                    f"Bathrooms: {property_dict['Bathrooms']}, " \
                    f"Square Footage: {sq_ft_in_thousands:.4f}K sq ft, " \
                    f"Lot Size: {property_dict['Lot Size (Acres)']:.3f} acres, " \
                    f"Features: {features}"

    return property_text

def load_model():
    """Load the LLaMA 3.1 model and tokenizer on GPU with fp16."""
    hf_token = userdata.get('llama3.1token')
    os.environ['HUGGINGFACE_TOKEN'] = hf_token
    login(token=hf_token)

    model_id = "meta-llama/Llama-3.1-8B-Instruct"

    tokenizer = AutoTokenizer.from_pretrained(model_id)

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        device_map="auto"  # Automatically uses GPU if available
    )

    return model, tokenizer

# Load once globally (reuses RAM)
model, tokenizer = load_model()

def generate_property_description(
    property_dict: Dict,
    style: str = "balanced",
    length: str = "medium"
) -> str:
    # Use global model and tokenizer
    global model, tokenizer

    property_text = format_property_input(property_dict)

    style_guide = {
        "professional": "Write in a professional, matter-of-fact tone highlighting key selling points.",
        "luxurious": "Use elegant, sophisticated language that emphasizes luxury, quality, and exclusivity.",
        "friendly": "Write in a warm, conversational tone that helps buyers envision living in the home.",
        "balanced": "Use a balanced approach with professional language and emotional appeal."
    }

    length_guide = {
        "short": "Write 2-3 concise sentences.",
        "medium": "Write 3-4 detailed sentences.",
        "long": "Write 5-6 comprehensive sentences."
    }

    price_tier = "luxury" if property_dict['Price'] > 750000 else "mid-range" if property_dict['Price'] > 350000 else "affordable"
    size_descriptor = "spacious" if property_dict['Square Footage'] > 2000 else "comfortable" if property_dict['Square Footage'] > 1200 else "cozy"
    lot_descriptor = "expansive" if property_dict['Lot Size (Acres)'] > 0.5 else "generous" if property_dict['Lot Size (Acres)'] > 0.2 else "well-proportioned"

    prompt = f"""<s>[INST]
I need you to write a compelling real estate listing description for a property with the following details:

{property_text}

Additional analysis:
- Price tier: {price_tier}
- Size category: {size_descriptor}
- Lot description: {lot_descriptor}

{style_guide.get(style, style_guide["balanced"])}
{length_guide.get(length, length_guide["medium"])}

Focus on the most attractive features and create a unique description that will appeal to potential buyers.
Make sure to highlight spatial qualities, premium features, and neighborhood benefits when applicable.
DO NOT mention the exact price, but do reflect the appropriate quality level for the price point.

Write ONLY the description, with no additional commentary.
[/INST]"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    temperature = 0.7
    top_p = 0.9
    repetition_penalty = 1.1

    if style == "professional":
        temperature = 0.5
        repetition_penalty = 1.2
    elif style == "luxurious":
        temperature = 0.8
        top_p = 0.95
    elif style == "friendly":
        temperature = 0.75
        repetition_penalty = 1.05

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=temperature,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            do_sample=True
        )

    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    description = generated_text.split("[/INST]")[1].strip()

    # Free memory
    del inputs, output
    torch.cuda.empty_cache()

    return description

'''
# Example usage
if __name__ == "__main__":
    test_property = {
        'Price': 425000,
        'Bedrooms': 3,
        'Bathrooms': 2.5,
        'Square Footage': 1850,
        'Lot Size (Acres)': 0.18,
        'Features': ['Hardwood floors', 'Granite countertops', 'Stainless steel appliances', 'Deck', 'Finished basement']
    }

    print("Professional style:")
    print(generate_property_description(test_property, style="professional"))
    print("\nLuxurious style:")
    print(generate_property_description(test_property, style="luxurious"))
    print("\nFriendly style:")
    print(generate_property_description(test_property, style="friendly"))
'''

In [None]:
import pandas as pd
from google.colab import files

# 1. Upload your CSV file
uploaded = files.upload()  # Prompts you to upload a file

# 2. Read CSV (replace 'your_file.csv' with your actual uploaded file name)
input_df = pd.read_csv(list(uploaded.keys())[0])  # Automatically uses uploaded file name

# 3. Generate descriptions
descriptions = []
for _, row in input_df.iterrows():
    try:
        # Prepare property dictionary
        property_dict = {
            'Price': row['Price'],
            'Bedrooms': row['Bedrooms'],
            'Bathrooms': row['Bathrooms'],
            'Square Footage': row['Square Footage'],
            'Lot Size (Acres)': row['Lot Size (Acres)'],
            'Features': row['Features'].split(',') if isinstance(row['Features'], str) else []
        }

        # Generate listing description (you can change the style/length)
        desc = generate_property_description(property_dict, style="balanced", length="medium")
        descriptions.append(desc)

    except Exception as e:
        descriptions.append(f"Error generating description: {e}")

# 4. Add the results to the DataFrame
input_df['Listing Description'] = descriptions

# 5. Save to new CSV
output_filename = "generated_listings.csv"
input_df.to_csv(output_filename, index=False)

# 6. Download the output file
files.download(output_filename)


In [None]:
def generate_property_description(
    property_dict: Dict,
    style: str = "balanced",
    length: str = "medium"
) -> str:
    # Use global model and tokenizer
    global model, tokenizer

    property_text = format_property_input(property_dict)

    style_guide = {
        "professional": "Write in a professional, matter-of-fact tone highlighting key selling points.",
        "luxurious": "Use elegant, sophisticated language that emphasizes luxury, quality, and exclusivity.",
        "friendly": "Write in a warm, conversational tone that helps buyers envision living in the home.",
        "balanced": "Use a balanced approach with professional language and emotional appeal."
    }

    length_guide = {
        "short": "Write 2-3 concise sentences.",
        "medium": "Write 3-4 detailed sentences.",
        "long": "Write 5-6 comprehensive sentences."
    }

    price_tier = "luxury" if property_dict['Price'] > 750000 else "mid-range" if property_dict['Price'] > 350000 else "affordable"
    size_descriptor = "spacious" if property_dict['Square Footage'] > 2000 else "comfortable" if property_dict['Square Footage'] > 1200 else "cozy"
    lot_descriptor = "expansive" if property_dict['Lot Size (Acres)'] > 0.5 else "generous" if property_dict['Lot Size (Acres)'] > 0.2 else "well-proportioned"

    prompt = f"""<s>[INST]
I need you to write a compelling real estate listing description for a property with the following details:

{property_text}

Additional analysis:
- Price tier: {price_tier}
- Size category: {size_descriptor}
- Lot description: {lot_descriptor}

{style_guide.get(style, style_guide["balanced"])}
{length_guide.get(length, length_guide["medium"])}

Focus on the most attractive features and create a unique description that will appeal to potential buyers.
Make sure to highlight spatial qualities, premium features, and neighborhood benefits when applicable.
DO NOT mention the exact price, but do reflect the appropriate quality level for the price point.

Write ONLY the description, with no additional commentary.
[/INST]"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    temperature = 0.7
    top_p = 0.9
    repetition_penalty = 1.1

    if style == "professional":
        temperature = 0.5
        repetition_penalty = 1.2
    elif style == "luxurious":
        temperature = 0.8
        top_p = 0.95
    elif style == "friendly":
        temperature = 0.75
        repetition_penalty = 1.05

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=temperature,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            do_sample=True
        )

    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    description = generated_text.split("[/INST]")[1].strip()

    # Free memory
    del inputs, output
    torch.cuda.empty_cache()

    return description


In [None]:
import pandas as pd
import os
from google.colab import files
from datetime import datetime

# 1. Upload your CSV
uploaded = files.upload()
input_filename = list(uploaded.keys())[0]

# 2. Load input CSV
df = pd.read_csv(input_filename)

# 3. Check if 'Listing Description' already exists (for resuming)
if 'Listing Description' not in df.columns:
    df['Listing Description'] = None

# 4. Set output file (checkpoint version)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"generated_listings_{timestamp}.csv"

# 5. Generate and save row-by-row
for idx, row in df.iterrows():
    if pd.notnull(row['Listing Description']):
        print(f"✅ Row {idx} already processed. Skipping.")
        continue

    try:
        property_dict = {
            'Price': row['Price'],
            'Bedrooms': row['Bedrooms'],
            'Bathrooms': row['Bathrooms'],
            'Square Footage': row['Square Footage'],
            'Lot Size (Acres)': row['Lot Size (Acres)'],
            'Features': row['Features'].split(',') if isinstance(row['Features'], str) else []
        }

        description = generate_property_description(property_dict, style="balanced", length="medium")
        df.at[idx, 'Listing Description'] = description
        print(f"✅ Row {idx} done.")

    except Exception as e:
        df.at[idx, 'Listing Description'] = f"Error: {e}"
        print(f"❌ Error on row {idx}: {e}")

    # Save after each row
    df.to_csv(output_filename, index=False)

print("🎉 All available rows processed.")
files.download(output_filename)
