 Satellite Image Download

In [14]:
import requests 
import os
import time
import pandas as pd
import numpy as np
import math
from tqdm import tqdm

In [15]:
df = pd.read_excel("train(1).xlsx")

In [16]:
API_KEY = os.getenv("GOOGLE_MAPS_API_KEY")

if API_KEY is None:
    raise ValueError("API key not found. Set GOOGLE_MAPS_API_KEY environment variable.")

In [17]:
def download_image(lat, lon, image_id, api_key, zoom=18, size="400x400"):
    url = (
        "https://maps.googleapis.com/maps/api/staticmap"
        f"?center={lat},{lon}"
        f"&zoom={zoom}"
        f"&size={size}"
        f"&maptype=satellite"
        f"&key={API_KEY}"
    )

    response = requests.get(url)

    if response.status_code == 200:
        path = os.path.join(IMAGE_DIR, f"{image_id}.png")
        with open(path, "wb") as f:
            f.write(response.content)
        return True
    else:
        print(f"Failed for ID {image_id} | Status {response.status_code}")
        return False


In [18]:
BATCH_SIZE = 4000     
SLEEP_BETWEEN_CALLS = 1     
SLEEP_BETWEEN_BATCHES = 60

In [19]:
num_rows = len(df)
num_batches = math.ceil(num_rows / BATCH_SIZE)

print(f"Total rows: {num_rows}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Total batches: {num_batches}")


Total rows: 16209
Batch size: 4000
Total batches: 5


In [20]:
import os
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm


In [21]:
IMAGE_DIR = "Satellite Imagess"
os.makedirs(IMAGE_DIR, exist_ok=True)


In [22]:
def download_row(row):
    image_path = f"{IMAGE_DIR}/{row['id']}.png"
    
    if os.path.exists(image_path):
        return None  
    
    success = download_image(
        lat=row["lat"],
        lon=row["long"],
        image_id=row["id"],
        api_key=API_KEY
    )
    
    time.sleep(SLEEP_BETWEEN_CALLS)  
    return row["id"] if not success else None


In [23]:
MAX_WORKERS = 5   


In [26]:
failed_ids = []

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    
    futures = [
        executor.submit(download_row, row)
        for _, row in df.iterrows()
    ]
    
    for future in tqdm(
        as_completed(futures),
        total=len(futures),
        desc="Downloading satellite images (parallel)"
    ):
        result = future.result()
        if result is not None:
            failed_ids.append(result)


Downloading satellite images (parallel): 100%|█████████████████████████████████| 16209/16209 [3:02:19<00:00,  1.48it/s]


In [28]:
import os

IMAGE_DIR_TEST = "images_test"
os.makedirs(IMAGE_DIR_TEST, exist_ok=True)


In [29]:
import pandas as pd
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm


In [30]:
df_test = pd.read_excel("test2.xlsx")

In [31]:
def download_image(lat, lon, image_id, api_key, image_dir):
    url = (
        "https://maps.googleapis.com/maps/api/staticmap"
        f"?center={lat},{lon}"
        f"&zoom=18"
        f"&size=400x400"
        f"&maptype=satellite"
        f"&key={api_key}"
    )
    
    r = requests.get(url)
    
    if r.status_code == 200:
        with open(f"{image_dir}/{image_id}.png", "wb") as f:
            f.write(r.content)
        return True
    else:
        print(f"Failed ID {image_id} | Status {r.status_code}")
        return False


In [34]:
MAX_WORKERS = 5   # keep low to avoid throttling
failed_test_ids = []

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    
    futures = [
        executor.submit(download_test_row, row)
        for _, row in df_test.iterrows()
    ]
    
    for future in tqdm(
        as_completed(futures),
        total=len(futures),
        desc="Downloading test satellite images"
    ):
        result = future.result()
        if result is not None:
            failed_test_ids.append(result)


Downloading test satellite images: 100%|███████████████████████████████████████████| 5404/5404 [15:39<00:00,  5.75it/s]
