Skip to content

Commit

Permalink
Merge pull request #15 from WFP-VAM/jb_dev
Browse files Browse the repository at this point in the history
changes in polygon json and standardization of all features at the end
  • Loading branch information
lorenzori authored Jun 6, 2018
2 parents 62f2ec8 + 6ff36b9 commit 679e053
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 41 deletions.
25 changes: 4 additions & 21 deletions Src/osm.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,11 @@

class OSM_extractor:

def __init__(self, df):
self.minlat, self.maxlat, self.minlon, self.maxlon = self.__boundaries(df)

def __boundaries(self, df, buffer=0.05, lat_col="gpsLatitude", lon_col="gpsLongitude"):
'''
Get GPS coordinates of the boundary box of a DataFrame and add some buffer around it.
'''
from numpy import round
minlat = df["gpsLatitude"].min()
maxlat = df["gpsLatitude"].max()
minlon = df["gpsLongitude"].min()
maxlon = df["gpsLongitude"].max()
from utils import df_boundaries

lat_buffer = (maxlat - minlat) * buffer
lon_buffer = (maxlon - minlon) * buffer

minlat = round(minlat - lat_buffer, 5)
maxlat = round(maxlat + lat_buffer, 5)
minlon = round(minlon - lon_buffer, 5)
maxlon = round(maxlon + lon_buffer, 5)
class OSM_extractor:

return minlat, maxlat, minlon, maxlon
def __init__(self, df):
self.minlat, self.maxlat, self.minlon, self.maxlon = df_boundaries(df)

def download(self, tag_key='amenity', tag_value='school'):
'''
Expand Down
3 changes: 1 addition & 2 deletions Src/sentinel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def gee_sentinel_raster(start_date, end_date, large_area, agg="max", ind="NDVI")
sentinel = ee.ImageCollection('COPERNICUS/S2') \
.filterDate(start_date, end_date) \
.filterBounds(large_area) \
.filterMetadata('CLOUDY_PIXEL_PERCENTAGE', 'less_than', 40) \
.select(['B3', 'B4', 'B8', 'B11'])

def addIndices(image):
Expand All @@ -66,7 +65,7 @@ def addIndices(image):

sentinel_w_indices = sentinel.map(addIndices)

maxraster = sentinel_w_indices.select(ind).reduce(agg)
maxraster = sentinel_w_indices.select(ind).reduce(agg).clip(large_area)
return maxraster


Expand Down
38 changes: 32 additions & 6 deletions Src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,40 @@ def aggregate(input_rst, output_rst, scale):

def squaretogeojson(lon, lat, d):
from math import pi, cos
from geojson import Polygon
r_earth = 6378000
minx = lon - ((d / 2) / r_earth) * (180 / pi)
miny = lat - ((d / 2) / r_earth) * (180 / pi) / cos(lon * pi / 180)
maxx = lon + ((d / 2) / r_earth) * (180 / pi)
maxy = lat + ((d / 2) / r_earth) * (180 / pi) / cos(lon * pi / 180)
minlon = lon - ((d / 2) / r_earth) * (180 / pi)
minlat = lat - ((d / 2) / r_earth) * (180 / pi) / cos(lon * pi / 180)
maxlon = lon + ((d / 2) / r_earth) * (180 / pi)
maxlat = lat + ((d / 2) / r_earth) * (180 / pi) / cos(lon * pi / 180)
#return minx,miny,maxx,maxy
square = Polygon([[(minx, miny), (maxx, miny), (maxx, maxy), (minx, maxy)]])
square = points_to_polygon(minlon, minlat, maxlon, maxlat)
return square


def df_boundaries(df, buffer=0.05, lat_col="gpsLatitude", lon_col="gpsLongitude"):
'''
Get GPS coordinates of the boundary box of a DataFrame and add some buffer around it.
'''
from numpy import round
minlat = df["gpsLatitude"].min()
maxlat = df["gpsLatitude"].max()
minlon = df["gpsLongitude"].min()
maxlon = df["gpsLongitude"].max()

lat_buffer = (maxlat - minlat) * buffer
lon_buffer = (maxlon - minlon) * buffer

minlat = round(minlat - lat_buffer, 5)
maxlat = round(maxlat + lat_buffer, 5)
minlon = round(minlon - lon_buffer, 5)
maxlon = round(maxlon + lon_buffer, 5)

return minlat, maxlat, minlon, maxlon


def points_to_polygon(minlon, minlat, maxlon, maxlat):
from geojson import Polygon
square = Polygon([[(minlon, minlat), (maxlon, minlat), (maxlon, maxlat), (minlon, maxlat), (minlon, minlat)]])
return square


Expand Down
27 changes: 15 additions & 12 deletions scripts/master.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from nn_extractor import NNExtractor
from osm import OSM_extractor
from sentinel_utils import gee_sentinel_raster, gee_raster_mean
from utils import squaretogeojson, date_range
from utils import squaretogeojson, date_range, df_boundaries, points_to_polygon


def run(id):
Expand Down Expand Up @@ -79,6 +79,10 @@ def run(id):

data["i"], data["j"] = list_i, list_j

# Get Polygon Geojson of the boundaries
minlat, maxlat, minlon, maxlon = df_boundaries(data, buffer=0.05, lat_col="gpsLatitude", lon_col="gpsLongitude")
area = points_to_polygon(minlon, minlat, maxlon, maxlat)

# --------------------------- #
# GROUP CLUSTERS IN SAME TILE #
# --------------------------- #
Expand Down Expand Up @@ -140,17 +144,11 @@ def wavg(g, df, weight_series):
# --------------- #
# add nightlights #
# --------------- #
from geojson import Polygon
from nightlights import Nightlights

area = Polygon([[(max(data.gpsLongitude), max(data.gpsLatitude)),
(max(data.gpsLongitude), min(data.gpsLatitude)),
(min(data.gpsLongitude), min(data.gpsLatitude)),
(min(data.gpsLongitude), max(data.gpsLatitude))]])
from nightlights import Nightlights

NGT = Nightlights(area, '../Data/Geofiles/nightlights/', nightlights_date_start, nightlights_date_end)
data['nightlights'] = NGT.nightlights_values(data)
data['nightlights'] = (data['nightlights'] - np.mean(data['nightlights'])) / np.std(data['nightlights'])

# ---------------- #
# add OSM features #
Expand All @@ -167,7 +165,6 @@ def wavg(g, df, weight_series):
dist = data.apply(OSM.distance_to_nearest, args=(osm_tree,), axis=1)
#density = data.apply(OSM.density, args=(osm_gdf["value"],), axis=1)
data['distance_{}'.format(value)] = dist.apply(lambda x: np.log(0.0001 + x))
data['distance_{}'.format(value)] = (data['distance_{}'.format(value)] - np.mean(data['distance_{}'.format(value)]))/np.std(data['distance_{}'.format(value)])
osm_features.append('distance_{}'.format(value))
#data['density_{}'.format(value)] = density.apply(lambda x: np.log(0.0001 + x))
#osm_features.append('density_{}'.format(value))
Expand All @@ -178,8 +175,8 @@ def wavg(g, df, weight_series):
# TODO: Use efficiently maxNDBImaxNDVImaxNDWI_sum_todf
print('INFO: getting NDBI, NDVI, NDWI ...')

start_date = nightlights_date_start
end_date = nightlights_date_end
start_date = "2017-01-01" # TODO: Add to config, be careful no image before 2015
end_date = "2018-01-01"
for i in date_range(start_date, end_date, 3):
print('INFO: getting max NDVI between dates: {}'.format(i))
gee_ndvi_max_raster = gee_sentinel_raster(i[0], i[1], area, ind="NDVI")
Expand All @@ -197,13 +194,19 @@ def wavg(g, df, weight_series):
# save features #
# --------------- #

features_list = list(set(data.columns) - set(data_cols) - set(['i', 'j']))

# Standardize Features (0 mean and 1 std)
data[features_list] = (data[features_list] - data[features_list].mean()) / data[features_list].std()

data.to_csv("../Data/Features/features_all_id_{}_evaluation.csv".format(id), index=False)

# --------------- #
# model indicator #
# --------------- #
data = data.sample(frac=1, random_state=1783).reset_index(drop=True) # shuffle data
data_features = data[list(set(data.columns) - set(data_cols) - set(['i', 'j']))] # take only the CNN features

data_features = data[features_list]

# if take log of indicator
if config['log'][0]:
Expand Down

0 comments on commit 679e053

Please sign in to comment.