In [None]:
%matplotlib inline
%load_ext autoreload 
%autoreload 2

# Math Stuff
import ee, scipy.misc, random, os
import numpy as np
from threading import Thread

# GEE stuff
from gee_library import *
ee.Initialize()

# debug stuff
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def tile_at(center_long_lat, meters, pixels, bands):
    """ Returns a (pixels, pixels, len(bands)) dimention numpy array of a 
    meters by meters section of earth centered at center_long_lat."""
    # Calculate resolution
    resolution = meters/pixels
    
    # Get GPS box
    tile_bounds = square_centered_at(
        point = center_long_lat,
        half_distance = meters
    )
    
    # load map
    monterey_collection = ee.ImageCollection('USDA/NAIP/DOQQ')\
        .filterBounds(tile_bounds)
#         .filter(ee.Filter.date('2016-01-01', '2017-01-01'))
        
    # request imagery
    tiles = img_at_region(monterey_collection, resolution, bands, tile_bounds)
    # resize img to requested size
    np_band_array = [scipy.misc.imresize(tiles[b], (pixels, pixels)) for b in bands]
    # and stack the images in a matrix
    return np.dstack(np_band_array)


def random_tile_within(coords, meters, pixels, bands):
    """ Chooses a random tile within the bounds of the coords.
    This function finds a random coordinate within the bounds defined by
    coords, then calls `tileat()` to grab that tile.
    
    `coords`: ((longmin, latmin),(longmax, latmax))
    
    Hits the server twice :/"""
    
    ((longmin, latmin),(longmax, latmax)) = coords
    
    # get random coords
    longitude = random.uniform(longmin, longmax)
    latitude = random.uniform(latmin, latmax)
    
    return tile_at((longitude, latitude), meters, pixels, bands)

def save_random_tile_at(coords, meters, pixels, bands, file_name):
    try:
        img = random_tile_within(coords, meters, pixels, bands)
        scipy.misc.toimage(img, cmin=0.0, cmax=-1).save(file_name)
    except ServerError as e:
        print e, file_name
    except Exception as e:
        print e, file_name, coords
    return
    

In [None]:
import time
from threading import Thread


# Define the bounds of each geographical area
# Cities
brooklyn= ((-73.965471, 40.614974), (-73.920207, 40.693991))
longisland= ((-73.918610, 40.713007), (-73.840551, 40.775980))
queens= ((-73.821792, 40.749724), (-73.760813, 40.780303))
# chicago1= ((-87.769646, 41.839960), (-87.646641, 41.967390))
# chicago2= ((-87.799684, 41.667181), (-87.603884, 41.843095))
sf1= ((-122.453085, 37.719024), (-122.394265, 37.789567))
sanjose= ((-122.033408, 37.243222), (-121.832452, 37.414198))
sandiego= ((-117.144966, 32.743224), (-117.098079, 32.761772))
sandiego2= ((-117.098079, 32.690284), (-117.021168, 32.743224))
denver= ((-105.127158, 39.569603), (-104.890206, 39.825191))
neworleans= ((-90.229627, 29.967342), (-90.034561, 30.016651))
baltimore= ((-76.651899, 39.287861), (-76.609940, 39.311176))
# Farmland
kentucky= ((-84.479444, 38.110622), (-84.335569, 38.258371))
kansas= ((-97.533941, 38.105647), (-96.815051, 38.366043))
montana= ((-108.994821, 45.875502), (-108.770538, 46.105918))
california= ((-121.789047, 38.223409), (-121.575699, 38.473852))
virginia= ((-76.838359, 36.483186), (-76.609497, 36.684365))
# Mountains
cascades = ((-121.575448, 48.224966), (-120.395554, 48.955637))
bc = ((-126.871931, 50.727633), (-122.548457, 51.421085))
sierranevadas = ((-120.479266, 38.206113), (-120.198767, 39.346931))
yellowstone = ((-110.042831, 43.716602), (-109.379713, 44.437358))
rockies = ((-106.790375, 38.610576), (-106.352140, 39.315902))
rockies2 = ((-107.872873, 37.627164), (-106.433726, 38.047526))
jasper = ((-118.961940, 51.490493), (-116.910005, 52.873976))
yosemite = ((-119.956063, 37.535445), (-119.282902, 38.176927))


training_cities = [brooklyn, longisland, queens, sf1, sanjose, sandiego, sandiego2]
test_cities = [baltimore, neworleans]

training_mountains = [cascades, bc, sierranevadas, yellowstone, rockies, rockies2]
test_mountains=[jasper, yosemite]

training_farms=[montana, kansas, kentucky]
test_farms=[virginia, california]


# Define where we will save the data
DATA_DIR="./ch3_data_try_two"
TRAIN_IMG_DIR=os.path.join(DATA_DIR, "train_imgs")
TRAIN_IMG_LABELS=os.path.join(DATA_DIR, "train.txt")
TEST_IMG_DIR=os.path.join(DATA_DIR, "test_imgs")
TEST_IMG_LABELS=os.path.join(DATA_DIR, "test.txt")

# make directories if they don't exist
for d in [DATA_DIR, TRAIN_IMG_DIR, TEST_IMG_DIR]:
    if not os.path.exists(d):
        os.makedirs(d)


def download_data(gps_bound_list, number_of_examples, directory, delay=3):
    """ Downloads random tiles from a list of regions to `directory`.
    Spawns a thread for each image with a delay of `delay` seconds
    between thread spawns. """
    
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    for i in range(number_of_examples):
        if i%100 == 0:
            print i
        t = Thread(target=save_random_tile_at,
                   args=(random.choice(gps_bound_list),
                         200,
                         50,
                         ['R', 'G', 'B'],
                         os.path.join(directory, str(i)+'.png')))
        t.start()
        time.sleep(delay)
    

#
# Download training data
#
number_per_training_class = 4000
print 'Grabbing training farms...'
# download_data(training_farms, number_per_training_class, os.path.join(TRAIN_IMG_DIR,'farms'))
print 'Grabbing training cities...'
# download_data(training_cities, number_per_training_class, os.path.join(TRAIN_IMG_DIR,'cities'))
print 'Grabbing training mountains...'
# download_data(training_mountains, number_per_training_class, os.path.join(TRAIN_IMG_DIR,'mountains'))


#
# Download test data
#
number_per_test_class = 500
print 'Grabbing testing farms...'
download_data(test_farms, number_per_test_class, os.path.join(TEST_IMG_DIR,'farms'))
print 'Grabbing testing cities...'
download_data(test_cities, number_per_test_class, os.path.join(TEST_IMG_DIR,'cities'))
print 'Grabbing testing mountains...'
download_data(test_mountains, number_per_test_class, os.path.join(TEST_IMG_DIR,'mountains'))
        


Grabbing training farms...
Grabbing training cities...
Grabbing training mountains...
Grabbing testing farms...
0
Status code 429 ./ch3_data_try_two/test_imgs/farms/2.png
Status code 429 ./ch3_data_try_two/test_imgs/farms/6.png
Status code 429 ./ch3_data_try_two/test_imgs/farms/7.png
EOF occurred in violation of protocol (_ssl.c:661) ./ch3_data_try_two/test_imgs/farms/3.png ((-121.789047, 38.223409), (-121.575699, 38.473852))
Status code 429 ./ch3_data_try_two/test_imgs/farms/12.png
Status code 429 ./ch3_data_try_two/test_imgs/farms/17.png
Status code 429 ./ch3_data_try_two/test_imgs/farms/22.png
Status code 429 ./ch3_data_try_two/test_imgs/farms/25.png
Status code 429 ./ch3_data_try_two/test_imgs/farms/28.png


In [None]:
#
# Populate label files as comma-separated values (CSV)
#

# Training Data
with open(TRAIN_IMG_LABELS, "w") as myfile:
    klass = 0
    path = os.path.join(TRAIN_IMG_DIR,'farms')
    for f in os.listdir(path):
        filename = os.path.join(path, f)
        myfile.write(filename + "," + str(klass) + "\n")
            
    klass = 1
    path = os.path.join(TRAIN_IMG_DIR,'cities')
    for f in os.listdir(path):
        filename = os.path.join(path, f)
        myfile.write(filename + "," + str(klass) + "\n")
        
    klass = 2
    path = os.path.join(TRAIN_IMG_DIR,'mountains')
    for f in os.listdir(path):
        filename = os.path.join(path, f)
        myfile.write(filename + "," + str(klass) + "\n")
        

# Test Data
with open(TEST_IMG_LABELS, "w") as myfile:
    klass = 0
    path = os.path.join(TEST_IMG_DIR,'farms')
    for f in os.listdir(path):
        filename = os.path.join(path, f)
        myfile.write(filename + "," + str(klass) + "\n")
            
    klass = 1
    path = os.path.join(TEST_IMG_DIR,'cities')
    for f in os.listdir(path):
        filename = os.path.join(path, f)
        myfile.write(filename + "," + str(klass) + "\n")
        
    klass = 2
    path = os.path.join(TEST_IMG_DIR,'mountains')
    for f in os.listdir(path):
        filename = os.path.join(path, f)
        myfile.write(filename + "," + str(klass) + "\n")

# Machine Learning

In [None]:
import tensorflow as tf

#
# Create Data Reader
#

# Create a string-producer tensor to actually input the filename into the system
filename_queue = tf.train.string_input_producer([TRAIN_IMG_LABELS])

# Connect it to a line-reader tensor
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

# Connect that raw line-reader to a csv decoder
filename, labels = tf.decode_csv(value, record_defaults=[["error_filename"], ["error_label"]])

# Since the CSV has filenames in it, direct those filenames to another filereader (this time reads
# the whole file since they're png files)
file_contents = tf.read_file(filename)

# connect that file reader to a png decoder
png_data = tf.image.decode_png(file_contents, channels=3)
png_data_resized = tf.image.resize_images(png_data, (50, 50)) # Because we need to define an explicit size

# Since pngs are 8-bit integers, let's convert them to floats for the NN
features = tf.cast(png_data_resized, tf.float32)

# Now `features` is an image queue, and `label` is a label queue!

# Once you start training, don't forget to start up the queue!
# coord = tf.train.Coordinator()
# threads = tf.train.start_queue_runners(coord=coord)
# https://www.tensorflow.org/programmers_guide/reading_data

In [None]:
from scipy import misc

def gee_batch(data_label_file, batch_size):
    """Generates batches of size `batch_size`"""
    
    # read file
    lines = open(data_label_file).read().splitlines()
    
    img_list = []
    label_list = []
    for i in range(batch_size):
        png_path, label = random.choice(lines).split(',')
        img_list.append(misc.imread(png_path).flatten())
#         img_list.append(np.zeros(50*50*3))
        one_hot = np.zeros(3)
        one_hot[int(label)] = 1
        label_list.append(one_hot)
        
    return np.stack(img_list, axis=0), np.stack(label_list, axis=0)

In [None]:
#
# Model
#


x = tf.placeholder(tf.float32, [None, 50*50*3])
y_ = tf.placeholder(tf.float32, [None, 3])
training = tf.placeholder(tf.bool)



# Ensure our images are the correct shape
input_layer = tf.reshape(x, [-1, 50, 50, 3])


#
# Convolutions
#

# Convolutional Layer #1
conv1 = tf.layers.conv2d(
    inputs=input_layer,
    filters=32,
    kernel_size=[3, 3],
    padding="valid",
    activation=tf.nn.relu)

# Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

# Convolutional Layer #2 and Pooling Layer #2
conv2 = tf.layers.conv2d(
    inputs=pool1,
    filters=64,
    kernel_size=[3, 3],
    padding="valid",
    activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

# Convolutional Layer #3 (no pooling)
conv3 = tf.layers.conv2d(
    inputs=pool2,
    filters=64,
    kernel_size=[3, 3],
    padding="valid",
    activation=tf.nn.relu)



#
# Fully Connected
#


# Dense Layer
conv3_flat = tf.reshape(conv3, [-1, 9 * 9 * 64])
dense = tf.layers.dense(inputs=conv3_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(
    inputs=dense,
    rate=0.4,
    training= True)

# Logits Layer
logits = tf.layers.dense(inputs=dropout, units=3)



#
# Loss
#

# Turn our numerical labels into one-hot arrays
# onehot_labels = tf.one_hot(indices=tf.cast(y_, tf.int32), depth=3)
onehot_labels = y_

loss = None
train_op = None

loss = tf.losses.softmax_cross_entropy(
    onehot_labels=onehot_labels, # ground truth
    logits=logits) # network output




In [None]:
#
# Train
# 

# # optimizer
# optimizer = tf.contrib.layers.optimize_loss(
#     loss=loss,
#     global_step=tf.contrib.framework.get_global_step(),
#     learning_rate=0.001,
#     optimizer="SGD")

optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)


correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Initialize tensorflow
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())


# And run 20k iterations
for i in range(1000):
    # make batch
    example_batch, label_batch = gee_batch(TRAIN_IMG_LABELS, 9)
    
    
    # Debug output
    if i%100 == 0:
        train_accuracy = accuracy.eval(feed_dict={x : example_batch,
                                                  y_: label_batch,
                                                  training: False})
        print("step %d, training accuracy %g"%(i, train_accuracy))
        
    # run an iteration
    optimizer.run(feed_dict={x:example_batch,
                             y_: label_batch,
                             training: True})

    
# Final accuracy
print("test accuracy %g"%accuracy.eval(feed_dict={
    x: mnist.test.images, y_: mnist.test.labels, training: False}))


In [None]:
# Prediction

# farm, city, mountain

example_batch, label_batch = gee_batch(TEST_IMG_LABELS, 1)

predictions = sess.run(logits, feed_dict={x:example_batch, y_: label_batch, training: False})

answers = np.argmax(label_batch, axis=1)
predictions_max = np.argmax(predictions, axis=1)

print np.count_nonzero((answers == predictions_max)*1) / float(len(predictions_max))

img = example_batch.reshape((50,50,3))
plt.imshow(img); plt.show()

print "predictions:", predictions
print "ground truth:", label_batch
