In [None]:
import json
import geopyspark as gps
import fiona
import pyproj

from functools import partial
from pyspark import SparkContext
from geopyspark_netcdf.datasets import Gddp
from shapely.geometry import shape, MultiPoint
from shapely.ops import transform
from geonotebook.wrappers import TMSRasterData, VectorData

In [None]:
conf = gps.geopyspark_conf(appName="120+ Degrees")
conf.set('spark.ui.enabled', True)
sc = SparkContext(conf=conf)

# Reading in the Geometries

In [None]:
M.set_center(-112.335, 33.497, 10)

In [None]:
us_geojson = json.loads(open("/tmp/cont_usa.json").read())
us = shape(us_geojson['features'][0]['geometry'])

with fiona.open("/tmp/cont_usa.json") as source:
    us_crs = source.crs['init']

In [None]:
with fiona.open("/tmp/airports.geojson") as source:
    airports = MultiPoint([shape(f['geometry']) for f in source])
    airports_crs = source.crs['init']

In [None]:
# Reproject each Shapely geometry to EPSG:3857 so it can be
# displayed on the map

def create_partial_reprojection_func(crs):
    return partial(pyproj.transform,
                   pyproj.Proj(init=crs),
                   pyproj.Proj(init='epsg:3857'))

In [None]:
reprojected_aps = [transform(create_partial_reprojection_func(airports_crs), ap) for ap in airports]
reprojected_us = transform(create_partial_reprojection_func(us_crs), us)

# Reading in the NetCDF Data

In [None]:
#uri_2017 = "/tmp/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2017.nc"
#uri_2040 = "/tmp/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2040.nc"
#uri_2075 = "/tmp/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2075.nc"

uri_2017 = 's3://nasanex/NEX-GDDP/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2017.nc'
uri_2040 = 's3://nasanex/NEX-GDDP/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2040.nc'
uri_2075 = 's3://nasanex/NEX-GDDP/BCSD/rcp85/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_rcp85_r1i1p1_GFDL-CM3_2075.nc'

In [None]:
days = range(0, 365)

In [None]:
layer_2017 = Gddp.rdd_of_rasters(uri_2017, us.bounds, days)
layer_2040 = Gddp.rdd_of_rasters(uri_2040, us.bounds, days)
layer_2075 = Gddp.rdd_of_rasters(uri_2075, us.bounds, days)

# Reclassifying the NetCDF Data

In [None]:
def reclassify_layer(layer):
    (layer_min, _) = layer.get_min_max()
    
    return layer.reclassify(
        {layer_min: 0.0, 322.039: 1.0},
        float,
        gps.ClassificationStrategy.GREATER_THAN_OR_EQUAL_TO,
        layer.layer_metadata.no_data_value)

In [None]:
reclassified_layer_2017 = reclassify_layer(layer_2017)
reclassified_layer_2040 = reclassify_layer(layer_2040)
reclassified_layer_2075 = reclassify_layer(layer_2075)

# Aggregating and Formatting the Data

In [None]:
unioned_layer = gps.union(reclassified_layer_2017,
                          reclassified_layer_2040,
                          reclassified_layer_2075)

In [None]:
spatial_tiled_layer = unioned_layer.to_spatial_layer(merge_duplicates=False)

In [None]:
aggregated_tiled_layer = spatial_tiled_layer.aggregate_by_cell(gps.Operation.SUM).repartition(32)

In [None]:
retiled_layer = aggregated_tiled_layer.tile_to_layout(gps.GlobalLayout(zoom=8),
                                                      target_crs=3857
                                                     ).repartition(32)

In [None]:
masked = retiled_layer.mask(reprojected_us)

# Displaying the Results

In [None]:
pyramid = masked.pyramid().cache()

In [None]:
hist = pyramid.get_histogram()

In [None]:
breaks = [value for value, count in hist.bin_counts()]

In [None]:
colormap = gps.ColorMap.build(breaks=breaks, colors='plasma')

In [None]:
tms_server = gps.TMS.build(pyramid, display=colormap)

In [None]:
M.add_layer(TMSRasterData(tms_server), name="Hot Days")

In [None]:
M.add_layer(VectorData("/tmp/airports.geojson"), name="Airports", colors=[0xff0000])

In [None]:
for x in range(0, len(M.layers)):
    M.remove_layer(M.layers[x])