In [1]:
import numpy as np
import pandas as pd

import sys, os, time
import glob

from matplotlib import pyplot as plt
%matplotlib inline

# these magics ensure that external modules that are modified are also automatically reloaded
%load_ext autoreload
%autoreload 2



In [52]:
import gzip
import cPickle as pickle

In [4]:
# satellite imagery modules

import sys
sys.path.append("../../satellite-image-tools/satimage-processing/")
import satimg 

In [5]:
# path to save data

outPath = "/home/ubuntu/data/2500-cities/"

if not os.path.exists(outPath):
	os.makedirs(outPath)

In [6]:
# some reference location for testing

locSF = (37.7749, -122.4194) # San Francisco lat/lon

# Set up sampling locations

#### Load locations of interest: the top 2500 cities in Africa by population

In [7]:
cityInfo = pd.read_csv("/home/ubuntu/data/point_coordinates_town.txt", sep="\t")
cityInfo['TARGET_FID'] = cityInfo['TARGET_FID'].astype(int)
cityInfo.head()

Unnamed: 0,TARGET_FID,POINT_X,POINT_Y,ISO_CC,Name,Population,DFCC_Descr
0,0,16.427135,-28.552621,,Oranjemund,"10,000 to 50,000",Town
1,1,14.495265,-22.971726,,Walvis Bay,"50,000 to 100,000",Town
2,2,14.530498,-22.678504,,Swakopmund,"10,000 to 50,000",Administrative Division Center (major)
3,3,16.645626,-20.47117,,Otjiwarongo,"10,000 to 50,000",Administrative Division Center (major)
4,4,15.994973,-17.923148,,Ondangwa,"10,000 to 50,000",Town


#### Sample 100 locations within a 100km x 100km bounding box around loc=(lat,lon)

In [8]:
np.random.seed(0)

In [13]:
locations = map(lambda i, lat, lon: \
                [(i, lat, lon, l[0], l[1]) \
                 for l in satimg.generate_locations_around_latlon((lat,lon), W=400, nSamples=100)],\
                cityInfo['TARGET_FID'], cityInfo['POINT_Y'], cityInfo['POINT_X'])

In [14]:
locations = np.array(locations)
locations = locations.reshape((locations.shape[0]*locations.shape[1], locations.shape[2]))

In [15]:
locations = pd.DataFrame(locations, columns=["sample ID", "city lat", "city lon", "image lat", "image lon"])
locations['sample ID'] = locations['sample ID'].astype(int)
locations.set_index('sample ID', inplace=True)

In [16]:
locations.head()

Unnamed: 0_level_0,city lat,city lon,image lat,image lon
sample ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,-28.552621,16.427135,-29.114299,17.077197
0,-28.552621,16.427135,-27.064779,14.647832
0,-28.552621,16.427135,-27.69416,17.913133
0,-28.552621,16.427135,-27.944938,16.794878
0,-28.552621,16.427135,-30.096896,14.888792


#### Filter out locations that are not on land

In [17]:
from IPython import parallel

rc = parallel.Client()
all_engines = rc[:]
lbv = rc.load_balanced_view()

print len(all_engines)

8


In [18]:
%%px

import ogr
from IPython import embed
import sys
from IPython.display import display, clear_output

drv = ogr.GetDriverByName('ESRI Shapefile') #We will load a shape file
ds_in = drv.Open("/home/ubuntu/data/land-polygons-complete-4326/land_polygons.shp")    #Get the contents of the shape file
lyr_in = ds_in.GetLayer(0)    #Get the shape file's first layer

#Put the title of the field you are interested in here
idx_reg = lyr_in.GetLayerDefn().GetFieldIndex("P_Loc_Nm")

#If the latitude/longitude we're going to use is not in the projection
#of the shapefile, then we will get erroneous results.
#The following assumes that the latitude longitude is in WGS84
#This is identified by the number "4236", as in "EPSG:4326"
#We will create a transformation between this and the shapefile's
#project, whatever it may be
geo_ref = lyr_in.GetSpatialRef()
point_ref=ogr.osr.SpatialReference()
point_ref.ImportFromEPSG(4326)
ctran=ogr.osr.CoordinateTransformation(point_ref,geo_ref)


In [19]:
def check(latlon):
    i, lat, lon = latlon
    sys.stdout.flush()
    clear_output(wait=True)
    print i
    #Transform incoming longitude/latitude to the shapefile's projection
    [lon,lat,z]=ctran.TransformPoint(lon,lat)

    #Create a point
    pt = ogr.Geometry(ogr.wkbPoint)
    pt.SetPoint_2D(0, lon, lat)

    #Set up a spatial filter such that the only features we see when we
    #loop through "lyr_in" are those which overlap the point defined above
    lyr_in.SetSpatialFilter(pt)
    return len(lyr_in) > 0

In [20]:
locs = map(lambda i, lat, lon: (i, lat, lon), range(len(locations)), locations['image lat'], locations['image lon'])

In [21]:
amr = lbv.map_async(check, locs)

In [50]:
[s for s in amr.stdout if len(s)>0][-1]

u'249599\n'

In [53]:
with gzip.open(outPath + "location_is_on_land.pickle.gz", "wb") as f:
    pickle.dump(amr.result(), f)

In [60]:
isOnLand = np.array(amr.result())

print isOnLand.sum()

locations['is on land'] = isOnLand

locations.head()

212154


Unnamed: 0_level_0,city lat,city lon,image lat,image lon,is on land
sample ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,-28.552621,16.427135,-29.114299,17.077197,True
0,-28.552621,16.427135,-27.064779,14.647832,False
0,-28.552621,16.427135,-27.69416,17.913133,True
0,-28.552621,16.427135,-27.944938,16.794878,True
0,-28.552621,16.427135,-30.096896,14.888792,False


In [62]:
locations[locations['is on land']].to_csv(outPath + "/sample-locations.csv")