In [None]:
T

# Setup packages & paths

In [1]:
# numeric packages
import numpy as np
import pandas as pd

# filesystem and OS
import sys, os, time
import glob

# plotting
from matplotlib import pyplot as plt
%matplotlib inline

import seaborn as sns
sns.set_style("whitegrid", {'axes.grid' : False})

# widgets and interaction
from ipywidgets import FloatProgress
from IPython.display import display, clear_output

import warnings
warnings.filterwarnings('ignore')

# these magics ensure that external modules that are modified are also automatically reloaded
%load_ext autoreload
%autoreload 2

In [None]:
import pysatml

Load locations data

In [None]:
# path to save data

outPath = "/home/data/urban-atlas/extracted-data/"

if not os.path.exists(outPath):
	os.makedirs(outPath)

In [None]:
locations_path = "/home/data/urban-atlas/extracted-data/"

grid_location_files = glob.glob("%s/*/sample_locations_raster_25.csv"%locations_path)
grid_location_files = {f.split("/")[-2]:f for f in grid_location_files}

more_location_files = glob.glob("%s/*/additional_sample_locations.csv"%locations_path)
more_location_files = {f.split("/")[-2]:f for f in more_location_files}

# Extract imagery from Google Maps

In [None]:
# satellite imagery modules

import sys
sys.path.append("../../satellite-image-tools/satimage-processing/")
import satimg 
 
import sys
sys.path.append("../../satellite-image-tools/google-maps-api/")
import GoogleMaps as gmaps 

In [None]:
googleAPIKey = "AIzaSyBQylCwJQambEffgyj_fGytRw-HMNwxYY0"# "AIzaSyBx0DIgYwQIV8l9tUCNwTQeDFalwweCCho"
gmClient = gmaps.GoogleMaps(key=googleAPIKey)

MAX_REQUESTS = 25000
MAX_TRIES    = 2
img_size     = 224
ZOOM         = 17

In [None]:
# cities = ["bucuresti", "berlin", "barcelona", "paris", "athina", \
#           "firenze", "dublin", "london", "tallinn", "bremen"]

cities = ["eindhoven", "london", "belfast", "budapest", "bremen", "roma", "madrid"]

In [None]:
def load_locations():
    grid_locations_df = pd.read_csv("sample_locations_raster_25.csv").drop("Unnamed: 0", 1)
    more_locations_df = pd.read_csv("additional_sample_locations.csv")\
                            .rename(columns={"ITEM":"class"})\
                            .drop("Unnamed: 1", 1)
    print "Grid samples: %d. Additional samples: %d" % \
            (len(grid_locations_df), len(more_locations_df))
        
    more_locations_df['grid-i'] = np.nan
    more_locations_df['grid-j'] = np.nan
    columns = ["lon", "lat", "grid-i", "grid-j", "class"]
    # locations = more_locations_df[columns]
    locations = grid_locations_df[columns]
    # locations = pd.concat([grid_locations_df[columns], more_locations_df[columns]])
    locations = locations.reset_index().drop("index", 1)
    
    return locations

In [None]:
city = "madrid"

workdir = "%s/%s" % (outPath, city)
os.chdir(workdir)

locations = load_locations()
print len(locations)

locations.groupby("class").apply(len)

In [None]:
def download_images(locations, prefix="", out_path="./"):
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    
    global n_requests
    
    for i,r in locations.iterrows():
        clear_output(wait=True)
        print "Pulling image %d/%d... (# API requests = %d)"%(i,len(locations), n_requests)
        label, lat, lon, grid_i, grid_j = r['class'], r['lat'], r['lon'], r['grid-i'], r['grid-j']

        basename = "%s/%s/%s_z%d_%2.5f_%2.5f"%(out_path, label, prefix, ZOOM, lat, lon)
        if not np.isnan(grid_i) and not np.isnan(grid_j):
            cur_filename = "%s_grid-i%d_grid-j%d.jpg"%(basename, grid_i, grid_j)
        else:
            cur_filename = "%s.jpg"%basename
        print cur_filename

        if os.path.exists(cur_filename):
            continue

        req = gmClient.construct_static_url((lat,lon), maptype="satellite", zoom=ZOOM, \
                                            imgsize=(int(img_size*1.18), int(img_size*1.18)))
        img = gmClient.get_static_map_image(req, filename=cur_filename, \
                                            max_tries=MAX_TRIES,\
                                            crop=True)

        if img is None or n_requests >= MAX_REQUESTS:
            print "API requests quota exceeded!"       
            break
        n_requests += 1

        # display samples every now and then
        if i % 100 == 0:
            plt.imshow(img)
            plt.title("image %d (label = %s)"%(i,label))
            plt.show()
            time.sleep(5)   

In [None]:
n_requests = 0

In [None]:
city = "budapest"

workdir = "%s/%s" % (outPath, city)
os.chdir(workdir)

locations = load_locations()

download_images(locations, prefix=city, out_path="./img")

In [None]:
n_requests

# Example extracted images

In [None]:
def plot_examples(images, labels, classes=None, \
                  nExamples=10, thumbSize = (50,50), title="example"):
    # build example canvass 
    from skimage.transform import resize
    
    clustLabels = np.unique(labels)
    nClusters = clustLabels.size
    canvas = np.zeros((thumbSize[0]*nClusters, nExamples*thumbSize[1], 3))
    for i,c in enumerate(clustLabels):
        cur_class_samples = np.where(labels==c)[0]
        idx = np.random.choice(cur_class_samples, replace=False, size=min([nExamples, len(cur_class_samples)]))
        for j in range(len(idx)):
            img = images[idx[j],:,:,:3]
            img = resize(img, thumbSize)
            canvas[i*thumbSize[0]:(i+1)*thumbSize[0], j*thumbSize[1]:(j+1)*thumbSize[1]] = img
    
    # plot examples of each class
    fig,ax = plt.subplots(1, figsize=(12,10))
    plt.tight_layout()
    print canvas.shape
    ax.imshow(canvas.swapaxes(0,1))#, aspect='auto')
    ax.set_title(title, fontsize=18)
    ax.set_ylabel("-- examples --", fontsize=16)
    ax.set_xlabel("-- land classes --", fontsize=16)
    # Turn off tick labels
    if classes is None: classes = clustLabels
    ax.set_xticks([thumbSize[0]*(0.5 + x) for x in range(nClusters)])
    ax.set_xticklabels(classes, fontsize=16, rotation=90)
    ax.set_yticklabels([])
    #plt.axis("off")
    plt.show()

In [None]:
img_dir = "/home/data/urban-atlas/extracted-data/berlin/img/"

import glob

files = np.array(glob.glob(img_dir + "/*/*.jpg"))

files_df =  pd.DataFrame(files).rename(columns={0:"filename"})
files_df['class'] = files_df['filename'].apply(lambda x: x.split("/")[-2])
files_df['city'] = files_df['filename'].apply(lambda x: x.split("/")[-3])

files_df.head()

In [None]:
files_df['class'] = files_df['class'].apply(
    lambda x: consolidate_classes[x] if x in consolidate_classes else x)
files_df = files_df[files_df['class'].isin(include_classes)]

In [None]:
n_samples = 10

sel_df = files_df.groupby("class").apply(lambda x: x.sample(n_samples))

from skimage.io import imread
images = np.array(sel_df['filename'].apply(imread).values.tolist())
labels = sel_df['class'].apply(split_str)

In [None]:
plot_examples(images, labels, nExamples=10, thumbSize = (50,50), title="Urban Atlas Dataset: Examples")

In [None]:
plot_examples(images, labels, nExamples=5, thumbSize = (50,50), title="Urban Atlas Dataset: Examples")