In [None]:
def printf(message):
    print(message, flush=True) # For slurms stdout buffering

printf("Generating Training Model!")

import os

TRAIN_DATASET = os.getenv("TRAIN_DATASET", "milwaukee")
TRAIN_DATASET_FOLDER = f"datasets/{TRAIN_DATASET}"

os.makedirs(TRAIN_DATASET_FOLDER, exist_ok=True)

MIN_LATTITUDE = float(os.getenv("MIN_LATTITUDE", -88.52972))
MAX_LATTITUDE = float(os.getenv("MAX_LATTITUDE", -87.82298))
MIN_LONGITUDE = float(os.getenv("MIN_LONGITUDE", 42.84222))
MAX_LONGITUDE = float(os.getenv("MAX_LONGITUDE", 43.19223))

RESOLUTION = int(os.getenv("RESOLUTION", 120)) # Would be nice if we could ground this in pixels per degree of lattitude or something idk

TILE_SIZE = int(os.getenv("TILE_SIZE", 256) )

In [None]:
import networkx as nx
import osmnx as ox

printf(f"Running osmnx: v{ox.__version__}")

In [None]:
# download/model a street network for some city then visualize it
from shapely import Polygon

MIN = (MIN_LATTITUDE, MIN_LONGITUDE)
MAX = (MAX_LATTITUDE, MAX_LONGITUDE)

ASPECT_RATIO = (MAX[0] - MIN[0]) / (MAX[1] - MIN[1])
printf(f"Downloading Rect Lattitude: {MIN[0]} to {MAX[0]} Longitude: {MIN[1]} to {MAX[1]}")

from time import time

SHAPE = ((MIN[0], MIN[1]), (MIN[0], MAX[1]), (MAX[0], MAX[1]), (MAX[0], MIN[1]), (MIN[0], MIN[1]))
polygon = Polygon(SHAPE)
printf(f"Downloading...") # Usually takes 240s
start_time = time()
G = ox.graph.graph_from_polygon(polygon, network_type="all") # Takes 5 minutes
printf(f"Done Downloading")
end_time = time()
printf(f"Time Take: {end_time-start_time}")

In [None]:
# Download buildings
printf("Downloading buildings...") # Usually takes 260s
start_time = time()
bldg = ox.features_from_polygon(polygon, {"building": True})
printf(f"Done buildings in {time()-start_time:.1f}s")

# Keep only polygonal building geometries (footprints)
bldg = bldg[bldg.geometry.type.isin(["Polygon", "MultiPolygon"])].copy()

In [None]:
HEIGHT = RESOLUTION
WIDTH = ASPECT_RATIO * HEIGHT

import matplotlib.pyplot as plt

# TODO: Matplotlib is probably pretty inefficient at all this. Can we use OpenGL?

printf("Drawing Roads...")
fig, ax = ox.plot.plot_graph(G, node_size=0, edge_color="#FF0000", bgcolor="#FFFFFF", edge_linewidth=0.5, figsize=(WIDTH, HEIGHT))

printf("Drawing Buildings...")
bldg.plot(
    ax=ax,
    facecolor="#0000FF",
    edgecolor="none",
    linewidth=0,
    alpha=1.0,
)


ax.set_axis_off()
ax.set_position([0, 0, 1, 1])
ax.margins(0.0)
printf(f"Saving {TRAIN_DATASET_FOLDER}/full_train.png")
fig.savefig(f"{TRAIN_DATASET_FOLDER}/full_train.png", transparent=False)

In [None]:
from PIL import Image
import PIL
PIL.Image.MAX_IMAGE_PIXELS = 3000000000
import numpy as np

img = Image.open(f"{TRAIN_DATASET_FOLDER}/full_train.png")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

MARGIN_PERCENT = 0.05

cropped_image = img.crop((img.width*MARGIN_PERCENT, img.height*MARGIN_PERCENT, img.width*(1.0-MARGIN_PERCENT), img.height*(1.0-MARGIN_PERCENT)))

tiles = []

def slice(image_to_slice, slice_size, offset, transposition=None):
    tiles = []

    cropped_img = image_to_slice.crop((offset[0], offset[1], image_to_slice.width, image_to_slice.height))
    if transposition != None:
        cropped_img = cropped_img.transpose(transposition)
    
    for x in range(0, cropped_img.width-slice_size, slice_size):
        for y in range(0, cropped_img.height-slice_size,slice_size):
            tile = np.array(cropped_img.crop((x,y,x+slice_size,y+slice_size)))
            tile[:,:,-1] = 255
            blue_pixel_count = (tile[:,:,2] > tile[:,:,1]).sum()
            red_pixel_count = (tile[:,:,0] > tile[:,:,1]).sum()
            white_pixel_count = slice_size*slice_size - blue_pixel_count - red_pixel_count
            total_pixel_count = slice_size*slice_size
            if red_pixel_count != 0 and blue_pixel_count != 0 and (blue_pixel_count+red_pixel_count < total_pixel_count*0.4) and (blue_pixel_count > total_pixel_count*0.02): # At least 2% blue
                tiles.append(tile)

    return tiles

printf(f"{cropped_image.width}, {cropped_image.height}")

tiles = slice(cropped_image, TILE_SIZE, (0,0)) + slice(cropped_image, TILE_SIZE, (TILE_SIZE//2,TILE_SIZE//2)) \
 + slice(cropped_image, TILE_SIZE, (0,0), Image.ROTATE_90) + slice(cropped_image, TILE_SIZE, (TILE_SIZE//2,TILE_SIZE//2), Image.ROTATE_90) \
 + slice(cropped_image, TILE_SIZE, (0,0), Image.ROTATE_180) + slice(cropped_image, TILE_SIZE, (TILE_SIZE//2,TILE_SIZE//2), Image.ROTATE_180) \
 + slice(cropped_image, TILE_SIZE, (0,0), Image.ROTATE_270) + slice(cropped_image, TILE_SIZE, (TILE_SIZE//2,TILE_SIZE//2), Image.ROTATE_270)

# TODO: Can probably add flip

printf(f"Total Tiles: {len(tiles)}")

In [None]:
A = 10
SAMPLES = A*A
fig,axs = plt.subplots(A,A, figsize=(A,A))

import random
random.seed(42)

for i in range(SAMPLES):
    axs[i//A,i%A].imshow(tiles[random.randint(0, len(tiles))])
    axs[i//A,i%A].axis("off")

fig.suptitle("Train Tile Samples")
fig.savefig(f"{TRAIN_DATASET_FOLDER}/train_samples.png", transparent=False)

In [None]:
# Convert into a data format
import os
from tqdm import tqdm

os.makedirs(f"{TRAIN_DATASET_FOLDER}/data/train", exist_ok=True)

printf("Saving training data...")

i=0
for tile in tqdm(tiles):
    img = Image.fromarray(tile, 'RGBA')
    img.save(f"{TRAIN_DATASET_FOLDER}/data/train/tile_{i:05}.png")
    i += 1