# Define system environment

In [1]:
#Add all dependencies to PYTHON_PATH
import sys
sys.path.append("/usr/lib/spark/python")
sys.path.append("/usr/lib/spark/python/lib/py4j-0.10.4-src.zip")
sys.path.append("/usr/lib/python3/dist-packages")
sys.path.append("/data/local/jupyterhub/modules/python")

#Define environment variables
import os
os.environ["HADOOP_CONF_DIR"] = "/etc/hadoop/conf"
os.environ["PYSPARK_PYTHON"] = "python3"
os.environ["PYSPARK_DRIVER_PYTHON"] = "ipython"

#Load PySpark to connect to a Spark cluster
from pyspark import SparkConf, SparkContext

# User imports

In [20]:
import shapely
from shapely import wkt
from shapely.errors import WKTReadingError

# Connect to Spark

In [4]:
appName = "feature_extraction"
masterURL="spark://ecolidar0.eecolidar-nlesc.surf-hosted.nl:7077"

#A context needs to be created if it does not already exist
try:
    sc.stop()
except NameError:
    print("A  new Spark Context will be created.")
    
sc = SparkContext(conf = SparkConf().setAppName(appName).setMaster(masterURL))

A  new Spark Context will be created.


# Configuration

We have now the notebook connected to Spark and to laserchicken. The user should set here the parameters, such as area of interest and list of features, and then request a run of the **feature extraction** phase.

In [27]:
#Data location
data_location = "/user/hadoop/"

#Define the LiDAR data set
lidar_data_set = data_location + "ahn3/"

#Define the area of interest, it can be a complex polygon or a bounding box
#area_interest_polygon_wkt = "POLYGON()"
area_interest_polygon_wkt = "POLYGON(( 243590.0 572110.0, 243640.0 572160.0, 243700.0 572110.0, 243640.0 572060.0, 243590.0 572110.0 ))"


#Choose classification metric < (H = Height) | (C = Cover) | (Vv = Vertical variability) | (Hv = Horizontal variability) >
classification_metric = "Hs"

#Choose object structure < (O = Object) | (SL = Single Layer) | (ML = Multi Layer) >
object_structure = "SL"

#If you chose SL for object structure, the type of single layer < (C = Canopy) | (U = Understory) | (O = Other layer) >
type_of_single_layer = "C"

#Choose the neighborhood structure < (G = Grid Cell) | (V = Voxel) | (S = Sphere) | (C = Cylinder) > and its radius.
neighborhood_structure = "G"
neighborhood_radious = 1.0

# Verification

In this part of the notebook we verify all configuration done by the user. Once it is verified, we will set the last parameters before running feature extraction.

In [28]:
#Verify Polygon
try:
    area_interest_polygon = wkt.loads(area_interest_polygon_wkt)
except WKTReadingError as err:
    print("WKTReadingError: {0}\n".format(err))
else:
    if (isinstance(area_interest_polygon, shapely.geometry.polygon.Polygon)):
        print("The Polygon for the area of interest is a valid one.\n")
    else:
        print("ERROR: The polygon for the area of interest is invalid.\n")
    
#Verify classification metric
if classification_metric in [ "H", "C", "Vv", "Hv"]:
    print("Classification metric is correct.\n")
else:
    print("ERROR: classification metric is incorrect, it should be: \n < (H = Height) | (C = Cover) | (Vv = Vertical variability) | (Hv = Horizontal variability) >.\n")

#Verify object structure
if object_structure in [ "O", "SL", "ML" ]:
    print("Object structure is correct.\n")
else:
    print("ERROR: object structure is incorrect, it should be: \n < (O = Object) | (SL = Single Layer) | (ML = Multi Layer) >.\n")

#Verify object structure
if object_structure == "SL":
    if object_structure in [ "C", "U", "O" ]:
        print("Type of single layer is correct.\n")
    else:
        print("ERROR: type of single layer is incorrect, it should be: \n < (C = Canopy) | (U = Understory) | (O = Other layer) >.\n")

#Verify neighborhood structure
if neighborhood_structure in [ "G", "V", "S", "C" ]:
    print("Neighborhood structure is correct.\n")
    if (neighborhood_radious <= 0.0):
        print("ERROR: the radious for neighborhood structure should be greather than 0.\n")
else:
    print("ERROR: neighborhood structure is incorrect, it should be: \n < (G = Grid Cell) | (V = Voxel) | (S = Sphere) | (C = Cylinder) >.\n")

The Polygon for the area of interest is a valid one.

ERROR: classification metric is incorrect, it should be: 
 < (H = Height) | (C = Cover) | (Vv = Vertical variability) | (Hv = Horizontal variability) >.

Object structure is correct.

ERROR: type of single layer is incorrect, it should be: 
 < (C = Canopy) | (U = Understory) | (O = Other layer) >.

Neighborhood structure is correct.



# Support functions

# Read Data

## Read LiDAR data

## Extract area of interest

# Feature calculation

# Save result