In [15]:
import cv2
import io
import numpy as np
import pandas as pd
from scipy import stats
from urllib import request
from PIL import Image
import matplotlib.pyplot as plt
import glob  # Add this import statement

IMAGE_FOLDER_PATH = "./tmp"
GROUNDTRUTH_FOLDER_PATH = "./tmp"

# Function definitions (get_random_locations, get_random_image_sizes, get_google_maps_image, process_images, get_one_data_pair, save_images, find_file_saving_index) will be provided in the next cell

In [5]:
# Cell 2: Define necessary functions

def get_random_locations(city, n_locations=None):
    """
    Generate random locations within the bounding box of the city.

    :param city: a dict element containing the bounding box information and number of images
    :param n_locations: Number of random locations to generate
    :return: N x 2 numpy array with lat-lon pairs (N = n_locations)
    """
    if n_locations is None:
        n_locations = city['n-images']
    lat_min = city['latitude-min']
    lat_max = city['latitude-max']
    lon_min = city['longitude-min']
    lon_max = city['longitude-max']

    lat_samples = np.random.uniform(lat_min, lat_max, n_locations)
    lon_samples = np.random.uniform(lon_min, lon_max, n_locations)
    location_samples = np.transpose(np.vstack((lat_samples, lon_samples)))
    return location_samples


def get_random_image_sizes(city):
    """
    Generate random image sizes within a specified range.

    :param city: a dict element containing the number of images
    :return: Array of random image sizes
    """
    n_sizes = city['n-images']
    mu, sigma = 350, 50
    lower, upper = 250, 450
    X = stats.truncnorm((lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma)
    sizes = np.round(X.rvs(n_sizes)).astype(int)
    return sizes


def get_google_maps_image(api_key, lat, lon, size, maptype):
    """
    Retrieve a Google Maps image for a specified location, size, and map type.

    :param api_key: Google Maps API key
    :param lat: Latitude of the location
    :param lon: Longitude of the location
    :param size: Size of the image
    :param maptype: Type of map (satellite or terrain)
    :return: RGB image as a numpy array
    """
    url = "https://maps.googleapis.com/maps/api/staticmap?" \
          "center=" + str(lat) + "," + str(lon) + \
          "&zoom=18" \
          "&size=" + str(size) + "x" + str(size) + \
          "&maptype=" + maptype + \
          "&key=" + api_key + \
          "&style=feature:all|element:labels|visibility:off" \
          "&scale=2"

    response = request.urlopen(url)
    image_pil = Image.open(io.BytesIO(response.read()))
    image_pil = image_pil.convert("RGB")
    image_BGR = np.asarray(image_pil)
    image_RGB = np.copy(image_BGR)
    image_RGB[:, :, 0] = image_BGR[:, :, 2]
    image_RGB[:, :, 2] = image_BGR[:, :, 0]
    return image_RGB


def process_images(satellite_image, road_image):
    """
    Process satellite and road images to extract road information.

    :param satellite_image: Satellite image as a numpy array
    :param road_image: Road image as a numpy array
    :return: Tuple indicating if the image is valid, processed satellite image, and processed road image
    """
    height = np.size(satellite_image, 0)
    width = np.size(satellite_image, 1)
    satellite_image = satellite_image[0:height - 40, 40:, :]
    road_image = road_image[0:height - 40, 40:, :]
    road_image = cv2.resize(road_image, (400, 400), interpolation=cv2.INTER_NEAREST)
    road_image = cv2.cvtColor(road_image, cv2.COLOR_BGR2GRAY)
    _, road_image = cv2.threshold(road_image, 253, 255, type=cv2.THRESH_BINARY)

    is_road = road_image == 255
    percentage_road = np.sum(is_road.flatten()) / ((height - 40) * (width - 40))

    if percentage_road < 0.03:
        return False, None, None

    road_image = cv2.GaussianBlur(road_image, (13, 13), 5, borderType=cv2.BORDER_REFLECT101)
    _, road_image = cv2.threshold(road_image, 80, 255, type=cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(road_image, mode=cv2.RETR_LIST, method=cv2.CHAIN_APPROX_NONE)

    large_contours = []
    for contour in contours:
        if cv2.contourArea(contour) > 50:
            large_contours.append(contour)

    road_image_final = np.zeros((400, 400, 3))
    cv2.drawContours(road_image_final, large_contours, -1, (255, 255, 255), cv2.FILLED)
    satellite_image = cv2.resize(satellite_image, (400, 400), interpolation=cv2.INTER_AREA)

    return True, satellite_image, road_image_final


def get_one_data_pair(city, api_key, location, size):
    """
    Get a pair of satellite and road images for a given location.

    :param city: City bounding box information
    :param api_key: Google Maps API key
    :param location: Latitude and longitude of the location
    :param size: Size of the image
    :return: Tuple of satellite and road images
    """
    lat = location[0]
    lon = location[1]
    satellite_image = get_google_maps_image(api_key, lat, lon, size, maptype="satellite")
    road_image = get_google_maps_image(api_key, lat, lon, size, maptype="terrain")
    is_valid, satellite_image, road_image = process_images(satellite_image, road_image)
    while not is_valid:
        location = get_random_locations(city, 1).ravel()
        lat = location[0]
        lon = location[1]
        satellite_image = get_google_maps_image(api_key, lat, lon, size, maptype="satellite")
        road_image = get_google_maps_image(api_key, lat, lon, size, maptype="terrain")
        is_valid, satellite_image, road_image = process_images(satellite_image, road_image)

    return satellite_image, road_image

from skimage.io import imsave

def save_images(city_name, satellite_image, road_image, index):
    """
    Save the satellite and road images to disk.

    :param city_name: Name of the city
    :param satellite_image: Satellite image as a numpy array
    :param road_image: Road image as a numpy array
    :param index: Index for the image filenames
    """
  
    filename_image = f"{IMAGE_FOLDER_PATH}/"+city_name + "-" + str(index).zfill(6) + ".png"
    filename_mask = f"{GROUNDTRUTH_FOLDER_PATH}/"+city_name + "-" + str(index).zfill(6) + ".png"


    imsave(filename_image, satellite_image)
    imsave(filename_mask, road_image)


def find_file_saving_index(city_name):
    """
    Find the index for saving new images by checking existing images.

    :param city_name: Name of the city
    :return: Index for the new image filenames
    """
    existing_images = sorted(glob.glob('images/' + city_name + '*'))

    if len(existing_images) == 0:
        return 0
    else:
        last_image_name = existing_images[-1]
        extension_index = last_image_name.rfind(".")
        last_image_index = int(last_image_name[extension_index - 6:extension_index])
        return last_image_index + 1

In [12]:
# Cell 3: Define city bounding boxes and parameters

# Define the bounding boxes and parameters for the cities
cities = {
    "barcelona": {'latitude-min': 41.317144, 'latitude-max': 41.469576, 'longitude-min': 2.035251, 'longitude-max': 2.228779, 'n-images': 5},
    "paris": {'latitude-min': 48.815573, 'latitude-max': 48.915590, 'longitude-min': 2.224199, 'longitude-max': 2.421137, 'n-images': 5},
    "london": {'latitude-min': 51.384940, 'latitude-max': 51.672343, 'longitude-min': -0.510375, 'longitude-max': 0.334015, 'n-images': 5},
    "new_york_city": {'latitude-min': 40.477399, 'latitude-max': 40.917577, 'longitude-min': -74.259090, 'longitude-max': -73.700272, 'n-images': 5}
}
google_api_key = "AIzaSyAxKYVrnXYW2eYN9NuplgdkENGgKB8QqQw"

In [17]:
# Cell 4: Collect and save images for each city

for city_name in cities:
    city = cities[city_name]
    locations = get_random_locations(city=city)
    image_sizes = get_random_image_sizes(city=city)
    start_index = find_file_saving_index(city_name)
    n_images = city["n-images"]
    
    for i in range(n_images):
        image_size = image_sizes[i]
        location = locations[i]
        satellite_image, road_image = get_one_data_pair(city=city, api_key=google_api_key, location=location, size=image_size)
        save_images(city_name, satellite_image, road_image.astype(np.uint8), start_index + i)

        if i % 1 == 0:  # Print progress for every image
            print(city_name, i + 1, '/', n_images)

# Display a sample of the collected images
plt.figure(figsize=(10, 10))
sample_image = cv2.imread('images/barcelona-000001.png')
plt.imshow(cv2.cvtColor(sample_image, cv2.COLOR_BGR2RGB))
plt.title('Sample Image from Barcelona')
plt.show()

barcelona 1 / 5
barcelona 2 / 5
barcelona 3 / 5
barcelona 4 / 5




barcelona 5 / 5
paris 1 / 5
paris 2 / 5
paris 3 / 5
paris 4 / 5
paris 5 / 5
london 1 / 5
london 2 / 5
london 3 / 5
london 4 / 5
london 5 / 5
new_york_city 1 / 5
new_york_city 2 / 5
new_york_city 3 / 5
new_york_city 4 / 5
new_york_city 5 / 5


[ WARN:0@412.879] global loadsave.cpp:241 findDecoder imread_('images/barcelona-000001.png'): can't open/read file: check file path/integrity


error: OpenCV(4.10.0) /io/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


<Figure size 1000x1000 with 0 Axes>