In [1]:
from geopy.geocoders import Nominatim
import time
import random
import math
import requests
import json
import pandas as pd
import os
from collections import defaultdict
import google.generativeai as genai
from PIL import Image
import re
import string

  from .autonotebook import tqdm as notebook_tqdm


# Load Data

source: https://github.com/emanhamed/Houses-dataset

In [2]:
DATAPATH = "hdataset"

with open("secret.json", 'r') as f:
    data = json.load(f)

metadata = pd.read_csv(f"{DATAPATH}/HousesInfo.txt", sep=" ")
print(metadata.dtypes)
print(len(metadata))
metadata.head()

nob        int64
noba     float64
area       int64
zip        int64
price      int64
dtype: object
535


Unnamed: 0,nob,noba,area,zip,price
0,4,4.0,4053,85255,869500
1,4,3.0,3343,36372,865200
2,3,4.0,3923,85266,889000
3,5,5.0,4022,85262,910000
4,3,4.0,4116,85266,971226


In [3]:
def group_images_by_id(folder_path):
    grouped_images = defaultdict(list)

    for filename in os.listdir(folder_path):
        if not filename.endswith(".txt"):
            file_id = filename.split("_")[0]
            grouped_images[int(file_id)].append(f"{folder_path}/{filename}")
    
    return grouped_images

imagepaths = group_images_by_id(DATAPATH)
len(imagepaths)

535

# Generate Random Addresses

In [4]:
locgcode = {
    "chennai": "13.0836939,80.270186", 
    "kerala": "10.3528744,76.5120396", 
    "madurai" : "9.9261153,78.1140983", 
    "hyderabad": "24.8296995,67.1279058", 
    "kanchipuram" : "12.9647163,79.9839686", 
    "delhi": "28.6273928,77.1716954",
    "bangalore": "12.98815675,77.62260003796"
}

locrange = {
    "chennai" :(1,13), 
    "kerala" :(1,29), 
    "madurai": (2, 9), 
    "hyderabad": (1, 20), 
    "kanchipuram": (1,8), 
    "delhi": (3,50),
    "bangalore": (1,10),
}


def km_to_lat_lon_offset(km, latitude):
    lat_offset = km / 111.0

    lon_offset = km / (111.0 * math.cos(math.radians(latitude)))

    return lat_offset, lon_offset


def get_nearby_locations(lat, lon, radius_range=(2, 10), n=10):
    geolocator = Nominatim(user_agent="geo_locator")

    nearby_locations = []

    for i in range(n):
        distance_km = random.uniform(*radius_range)

        lat_offset_max, lon_offset_max = km_to_lat_lon_offset(distance_km, lat)

        lat_offset = random.uniform(-lat_offset_max, lat_offset_max)
        lon_offset = random.uniform(-lon_offset_max, lon_offset_max)

        offset_point = (lat + lat_offset, lon + lon_offset)
        try:
            location = geolocator.reverse(offset_point)
            if location and location.address:
                nearby_locations.append(
                    {
                        "address": location.address,
                        "latitude": offset_point[0],
                        "longitude": offset_point[1],
                    }
                )
            time.sleep(1)
        except Exception as e:
            print(f"Error with location {offset_point}: {e}")

    return nearby_locations

fpath = "random_address_dump.json"
if not os.path.exists(fpath):
    print("Generating Addresses...")
    locale_to_address = {}
    for k, v in locgcode.items():
        lat, lon = v.split(',')
        locale_to_address[k] = get_nearby_locations(float(lat), float(lon), locrange[k], n=50)
        
    with open(fpath, "w") as f:
        json.dump(locale_to_address, f, indent=4)
    print("Completed!")
else: 
    with open(fpath, "r") as f:
        locale_to_address = json.load(f)
    print("Loaded saved dump!")

Loaded saved dump!


# Get Property Details with LVLM

In [15]:
genai.configure(api_key=data["GEMINI_API_KEY"])

generation_config = {
  "temperature": 1.42,
  "top_p": 0.95,
  "top_k": 40,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
)

proptypes = {
  "apartment",
  "house",
  "room",
  "condo",
  "studio",
  "townhouse",
  "villa",
};

all_amenities = {
  "gym",
  "wifi",
  "swimming pool",
  "parking",
  "ac",
  "pet friendly",
  "security system",
  "laundry service",
  "children's play area",
  "room service",
  "bbq area",
  "rooftop terrace",
  "elevator",
  "balcony",
  "library",
  "game room",
  "tennis court",
  "basketball court",
  "sauna",
  "hot tub",
  "breakfast included",
  "daily housekeeping",
  "in-room safe",
  "mini-bar",
  "24-hour reception",
  "water purifier",
  "grocery delivery",
  # "kitchen",
}

proplist =''.join(f'- {t}\n' for t in proptypes)
aminlist = ', '.join(all_amenities)

allchars = string.ascii_lowercase + string.digits 

def getrands(n=3):
    random_string = ''.join(random.choice(allchars) for _ in range(n))
    return random_string
    
allgeneratedids= {'init', }
def get_legal_documentid():
    legalid = 'init' 
    while legalid in allgeneratedids:
        legalid = f"{getrands(2)}axfhgsd8{getrands(3)}"
    return legalid

    
def sort_and_check_kitchen(data):
    sorted_data = sorted(data, key=lambda x: (not 'frontal' in x, x))
    kitchen_available = any('kitchen' in item for item in data)
    return sorted_data, kitchen_available

def get_property_type(image):
    prompt = f"""
Analyze the given image of the property and tell what type of property it is from the below list:
{proplist}

Just give the property type back don't tell anything except your selected property type.
""".strip()
    response = model.generate_content([prompt, image]).text

    ptype = re.sub(r'[^a-zA-Z]', '', response).lower().strip() 
    return ptype if ptype in proptypes else "house"


def get_amenities(image,ptype, addKitchen):
    amenities = set()
    
    if addKitchen:
        amenities.add("kitchen")

    prompt = f"""
You are given an image of a {ptype}. Based on the characteristics of the property and your assessment, select which amenities it would likely have from the following list: [ {aminlist}]. 
Consider both the visual cues in the image and the common features of similar properties to make your selection. 
Return only the amenities in Python list format like this: ['a1', 'a2', ...].""".strip()
    
    response = model.generate_content([prompt, image]).text
    a = response.find('[')
    b = response.rfind(']')
    retamn = [re.sub(r'[^a-zA-Z]', '', amn).lower().strip() for amn in response[a+1:b].split(',')]
    for amn in retamn:
        if amn in all_amenities:
            amenities.add(amn)

    # default ameinites 
    if len(amenities) <=1:
        for amn in random.choices(list(all_amenities),k=4):
            amenities.add(amn)
    else:
        amenities.add("wifi")
    
    return list(amenities)


def get_description(metad, address, ptype, image):
    prompt = f"""
Examine the provided image of a {ptype.capitalize()}, it has {int(metad['nob'])} bedrooms and {int(metad['noba'])} bathrooms and it is located in {address}. Generate a well-rounded description (up to 200 words) contains distinctive elements visible in the image, aiming to convey a comprehensive sense of the property.

Additionally, create a short, compelling title for this property in no more than 20 characters and two words.

Format your response in JSON as follows: 
{{"description": "<description>", "title": "<title>"}}
""".strip()
    response = model.generate_content([prompt, image]).text.strip()
    a = response.find('{')
    b = response.rfind('}')
    data = json.loads(response[a:b+1])
    desc = data["description"].strip()
    if len(desc) > 500:
        desc = desc[0:500]
    return desc, data["title"]
    
def get_price_inr(ogprice):
    return int(ogprice*.69 / 12)

def get_property_details(metaentry, images, address):
    images, kitchenaval = sort_and_check_kitchen(images)
    
    frontpic = Image.open(images[0])

    ptype = get_property_type(frontpic)
    time.sleep(.3)
    amenities = get_amenities(frontpic, ptype, kitchenaval) 
    time.sleep(.3)
    description, title = get_description(metaentry,  address, ptype, frontpic)
    time.sleep(.3)
    
    rent = get_price_inr(metaentry["price"])
    time.sleep(1)
    return  amenities, ptype, rent, description, title, images

# Fill the database

In [18]:
SERVER_URL = "http://localhost:6969"

def send_property_data(form, image_files, token):
    files = []
    for image_path in image_files:
        filename = os.path.basename(image_path)
        mime_type = 'image/jpeg'
        if filename.lower().endswith('.png'):
            mime_type = 'image/png'
            
        
        files.append(
            ('images', (filename, open(image_path, 'rb'), mime_type))
        )
        
    headers = {
        # "Content-Type": "application/json",
        "authorization": f"Bearer {token}"
    }
    
    form = {key: str(value) for key, value in form.items()}

    try:
        response = requests.post(
            f"{SERVER_URL}/owner/property",
            data=form,
            files=files,
            headers=headers
            
        )
        # 
        response.raise_for_status()
        return
        
    except requests.exceptions.RequestException as e:
        print(f"Error sending request: {e}")
        print(form)
        print("message", response.json())
        raise
    
    finally:
        for _, file_tuple in files:
            # if idx != "images":
                # continue
                
            try:
                file_tuple[1].close()
            except:
                pass

In [31]:
from tqdm import tqdm
import logging
logging.basicConfig(filename="datafiller_errors.log", level=logging.ERROR, 
                    format="%(asctime)s - %(levelname)s - %(message)s")

prop_range = (7, 20)
picked_props = {0, }

def get_random_property():
    propidx = 0
    while propidx in picked_props:
        propidx = random.randint(1, len(metadata))
    picked_props.add(propidx) 
    return propidx 

for uname, info in tqdm(data['USER_DATA'].items(), desc="Users", unit="user"):
    propcount = random.randint(*prop_range)

    print(f"Adding {propcount} properties for @{uname}")
    addresses = random.sample(locale_to_address[info["locale"]], propcount)

    for i in tqdm(range(propcount), desc="Properties", unit="property"):
        try:
            prop = get_random_property() 
            address = addresses[i]["address"]
            metaentry = metadata.loc[prop]
            amenities, ptype, rent, description, title, images = get_property_details(address=address, images=imagepaths[prop], metaentry=metaentry)
            form = {
                "address": address, 
                "title": title, 
                "description": description, 
                "rent": rent, 
                "propertyType": ptype, 
                "amenities": json.dumps(amenities), 
                "numberOfBedrooms": int(metaentry["nob"]), 
                "legalDocumentId": get_legal_documentid()
            }
            send_property_data(form, images, info['token'])
            time.sleep(1)
        except Exception as e:
            logging.error(f"Error adding property {prop} for user {uname}:\n"
                          f"Form: {form}\n"
                          f"Error: {str(e)}")
            raise e
        

    print("Completed!!\n")

Users: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 36381.82user/s]


In [35]:
print("Added:", len(picked_props))
with open("pickpr.json", 'w') as f:
    json.dump(list(picked_props), f)

Added: 115
