In [None]:
import pandas as pd
import numpy as np
import os
import math
import requests
import time
from PIL import Image
from io import BytesIO
from tqdm.notebook import tqdm # Progress bar

In [4]:
train_df = pd.read_excel("train(1).xlsx", engine="openpyxl")
test_df = pd.read_excel("test2.xlsx", engine="openpyxl")
print(len(train_df))
print(len(test_df))

16209
5404


In [None]:


# 2. Mathematical Magic: Convert Lat/Lon to Tile Coordinates (Mercator Projection)
def deg2num(lat_deg, lon_deg, zoom):
    lat_rad = math.radians(lat_deg)
    n = 2.0 ** zoom
    xtile = int((lon_deg + 180.0) / 360.0 * n)
    ytile = int((1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * n)
    return xtile, ytile

# 3. The Downloader Function
def download_image(lat, lon, house_id, save_dir="property_images"):
    zoom = 18 # Zoom 19 is high res (house level)
    xtile, ytile = deg2num(lat, lon, zoom)

    # Esri World Imagery URL
    url = f"https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{zoom}/{ytile}/{xtile}"

    try:
        # User Agent is important so we look like a browser, not a bot
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
        response = requests.get(url, headers=headers, timeout=10)

        if response.status_code == 200:
            img = Image.open(BytesIO(response.content))
            img.save(f"{save_dir}/{house_id}.jpg")
            return True
        else:
            return False

    except Exception as e:
        print(f"Error downloading {house_id}: {e}")
        return False

In [6]:
# 1. Create a directory to save images
os.makedirs("property_images_train_zoom_18", exist_ok=True)

In [None]:
subset_df = train_df

print(f"Starting download for {len(subset_df)} properties...")

success_count = 0
for index, row in tqdm(subset_df.iterrows(), total=len(subset_df)):
    status = download_image(row['lat'], row['long'], row['id'],save_dir="property_images_train_zoom_18")
    if status:
        success_count += 1

    # Be polite! Wait 0.1 seconds between requests to avoid IP Ban
    time.sleep(0.001)

print(f"Download Complete. Successfully retrieved {success_count} images.")

In [None]:
from pathlib import Path

folder = Path("/content/drive/MyDrive/property_val/property_images_train_zoom_18")
print(len(list(folder.glob("*.jpg"))))
folder = Path("/content/drive/MyDrive/property_val/test_property_images_train_zoom_18")
print(len(list(folder.glob("*.jpg"))))

16110
5396
