<a href="https://colab.research.google.com/github/jlc2295/DSIRiverProject/blob/master/1_4_Classification_Comparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import ee
from IPython.display import Image

# Trigger the authentication flow.
ee.Authenticate()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://accounts.google.com/o/oauth2/auth?client_id=517222506229-vsmmajv00ul0bs7p89v5m89qs8eb9359.apps.googleusercontent.com&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fearthengine+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&response_type=code&code_challenge=ZffDomER5DeIYZBnch9AxIpapbRIlE4tWGdEGe1BWOY&code_challenge_method=S256

The authorization workflow will generate a code, which you should paste in the box below. 
Enter verification code: 4/2wH_5G2DYT1qFMzsKYqdUd27iWaIQXGj5Do-ZQHV31uErqmVN8KnAXI

Successfully saved authorization token.


In [None]:
ee.Initialize()

def get_images(path_list, row_list, satellite, start_date, end_date, max_cloud_percentage):
  coll = ee.ImageCollection(satellite).filterDate(start_date, end_date)\
            .filter(ee.Filter.inList('WRS_PATH', path_list))\
            .filter(ee.Filter.inList('WRS_ROW', row_list))\
            .filter(ee.Filter.lt('CLOUD_COVER', max_cloud_percentage))
  image_ids = list(map(lambda x: x['id'], coll.getInfo()['features']))

  images = list(map(lambda x: ee.Image(x), image_ids))
  return images


In [None]:
'''
Functions needed to display images and features/featurecollections are given here. 
These functions are all from other sources. Mostly from google tutorials.
'''
!pip install geojson
!pip install pygeoj
import geojson
import json
import pygeoj
import numpy as np

import folium

# Define a method for displaying Earth Engine image tiles to folium map.
def add_ee_layer(self, ee_image_object, vis_params, name):
  map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)
  folium.raster_layers.TileLayer(
    tiles = map_id_dict['tile_fetcher'].url_format,
    attr = "Map Data © Google Earth Engine",
    name = name,
    overlay = True,
    control = True
  ).add_to(self)

# Add EE drawing method to folium.
folium.Map.add_ee_layer = add_ee_layer

#@title Mapdisplay: Display GEE objects using folium.
def Mapdisplay(center, dicc, Tiles="OpensTreetMap",zoom_start=10):
    '''
    :param center: Center of the map (Latitude and Longitude).
    :param dicc: Earth Engine Geometries or Tiles dictionary
    :param Tiles: Mapbox Bright,Mapbox Control Room,Stamen Terrain,Stamen Toner,stamenwatercolor,cartodbpositron.
    :zoom_start: Initial zoom level for the map.
    :return: A folium.Map object.
    '''
    mapViz = folium.Map(location=center,tiles=Tiles, zoom_start=zoom_start)
    for k,v in dicc.items():
      if ee.image.Image in [type(x) for x in v.values()]:
        folium.TileLayer(
            tiles = v["tile_fetcher"].url_format,
            attr  = 'Google Earth Engine',
            overlay =True,
            name  = k
          ).add_to(mapViz)
      else:
        folium.GeoJson(
        data = v,
        name = k
          ).add_to(mapViz)
    mapViz.add_child(folium.LayerControl())
    return mapViz



In [None]:
#Create list of images from location

import ee.mapclient

#image = np.array([ee.Image('LANDSAT/LC08/C01/T1_TOA/LC08_044034_20140318')])
path = [44]
row = [34]
satellite = 'LANDSAT/LC08/C01/T1_TOA'
start_date = '2017-03-18'
end_date = '2018-03-18'
cc = 10
image_list = get_images(path, row, satellite, start_date, end_date, cc)

In [None]:
sfsite = ee.Geometry.Polygon([[-123.59202891324108,38.53717638470941],[-120.70262461636608,37.004492952240454],[-120.70262461636608,38.53717638470941],[-123.59202891324108,37.004492952240454]])

In [None]:
#training = inputimg.sample({'region': nycsite, 'scale': 30.0, 'numPixels': 5000});
def trained_cluster (inputimg):
  training = inputimg.sample(region = sfsite, scale = 30.0, numPixels = 5000) #Training is based on 5000 randomly chosen pizels from within NYC area
  numClusters = 2 #Number of clusters, I suspect. I think this should correspond to land and water.
  clusterer = ee.Clusterer.wekaKMeans(numClusters).train(training) #Not sure what wekaMeans is - just the code used in the GEE tutorial
  return clusterer

def classify (inputimg):
  training = inputimg.sample(region = sfsite, scale = 30.0, numPixels = 5000) #Training is based on 5000 randomly chosen pizels from within NYC area
  numClusters = 2 #Number of clusters, I suspect. I think this should correspond to land and water.
  clusterer = ee.Clusterer.wekaKMeans(numClusters).train(training) #Not sure what wekaMeans is - just the code used in the GEE tutorial
  result = inputimg.cluster(clusterer) #Clustered result
  return result

In [None]:
#Supervised Classification starts here

#Import polygons for supervised classification

!pip install -U -q PyDrive
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
# choose a local (colab) directory to store the data.
local_download_path = os.path.expanduser('~/data')
try:
  os.makedirs(local_download_path)
except: pass

# 2. Auto-iterate using the query syntax
#    https://developers.google.com/drive/v2/web/search-parameters
file_list = drive.ListFile(
    {'q': "'1Gf4TqCciu4LZFZ7VQBaaZlBpIrBpyZ1l' in parents"}).GetList()

for f in file_list:
  # 3. Create & download by id.
  print('title: %s, id: %s' % (f['title'], f['id']))
  fname = os.path.join(local_download_path, f['title'])
  print('downloading to {}'.format(fname))
  f_ = drive.CreateFile({'id': f['id']})
  f_.GetContentFile(fname)

with open(fname, 'r') as f:
  print(f.read())

title: water2.json, id: 1cz2TK_LMBLVBxVmhuEpE3kr9bgkujoGQ
downloading to /root/data/water2.json
title: water1.json, id: 1n4WrjynqhBpN9WtkNwuP_XcaPLxxBH7x
downloading to /root/data/water1.json
title: nonwater2.json, id: 1mb8W8iHemXIm-nVddExrZ4ejDQglo3RP
downloading to /root/data/nonwater2.json
title: nonwater1.json, id: 1E0MJWaY9vwxk3RkTCx0PAYm0l4bae4k0
downloading to /root/data/nonwater1.json
{"type":"FeatureCollection","features":[
{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-122.42123859839,37.7490213426433,0],[-122.418823285194,37.7442911094521,0],[-122.418050993777,37.7429443091202,0],[-122.413332130966,37.7410214428332,0],[-122.412616460707,37.7410252826137,0],[-122.410672628798,37.741312986586,0],[-122.410519780595,37.7414333300924,0],[-122.408674015725,37.7436859960884,0],[-122.408461803442,37.7442934196917,0],[-122.408453225454,37.7447010810645,0],[-122.408436112952,37.7454381757864,0],[-122.42123859839,37.7490213426433,0]]]},"properties":{"name":"nonwater1"

In [None]:
jsonfiles = ['water1.json','water2.json','nonwater1.json','nonwater2.json']
classalloc = [1, 1, 0, 0];

coords_dict = {}
ee_dict = {}
randomPts_dict = {}
features_dict = {}

n = 0
for jsonfile in jsonfiles:
    jsonfilepath = '/root/data/'+ jsonfile
    with open(jsonfilepath) as f:
        data = geojson.load(f)

    #creating a dictionary of coordinates
    coords_dict[jsonfile + 'coords'] = np.array(data['features'][0]['geometry']['coordinates'][0])[:,0:2].tolist()
    
    #creating a polygon from coordinate list
    ee_dict[jsonfile + 'ee'] = ee.Geometry.Polygon(coords_dict[jsonfile + 'coords'])
    
    
    #randomPoints = ee.FeatureCollection.randomPoints(region=ee_dict[jsonfile + 'ee'],points=100, {'name': jsonfile, 'landcover': classalloc[n]})
    randomPoints = ee.FeatureCollection.randomPoints(region=ee_dict[jsonfile + 'ee'],points=30)
    
    def addProp(feature):
      return feature.set({'landcover': classalloc[n]})

    randomPoints = randomPoints.map(addProp) #This is to add a property named
    
    #randomPts_dict[jsonfile+'Pts'] = ee.Feature(ee.Geometry.MultiPoint(ee.List(coords_dict[jsonfile + 'coords'])), 
    #                                            {'name': jsonfile, 'class': classalloc[n]})
    
    randomPts_dict[jsonfile+'Pts'] = randomPoints
    
    
    
    #randomPts_dict[jsonfile + 'rdnmPts'] = ee.FeatureCollection.randomPoints(ee_dict[jsonfile + 'ee'], 100)
    features_dict[jsonfile + 'feature'] = ee.Feature(ee_dict[jsonfile + 'ee'], {'name': jsonfile, 'landcover': classalloc[n]})

    n = n+1

'''
The individual features are combined as shown below to create a feature collection.

You can get some information about the features in the collection using commands as shown below.
'''
sfFC = ee.FeatureCollection(list(features_dict.values()))

sfFCpts = ee.FeatureCollection(list(randomPts_dict.values()))

fc = ee.FeatureCollection([])
for x in randomPts_dict.keys():
    #print(x) #I had used this to make sure the loop was okay
    fc = fc.merge(randomPts_dict[x])


print(sfFC.size().getInfo())

print(type(sfFC.getInfo()))

print(sfFC.getInfo()['features'][3]['properties'])

4
<class 'dict'>
{'landcover': 0, 'name': 'nonwater2.json'}


In [None]:
'''
Second step in any supervised classification process - training using the training data. The corresponding
code in Google EE tutorial is as follows:
// Get the values for all pixels in each polygon in the training.
var training = image.sampleRegions({
  // Get the sample from the polygons FeatureCollection.
  collection: polygons,
  // Keep this list of properties from the polygons.
  properties: ['class'],
  // Set the scale to get Landsat pixels in the polygons.
  scale: 30
});

The code is in javascript. I had to convert it to Python. Mistakes could be created/transmitted because of this
translation
'''
bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B10', 'B11']
#bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']

def trained_classifier (image):
  training_image = image.select(bands).sampleRegions(collection =  fc, properties = ['landcover'], scale = 30.0)
  #print(training.geometry().getInfo()['type'])

  '''
  We can view feature collections using Mapdisplay, the function we encountered earlier. THe way to do it is as follows:
  Mapdisplay(center,dicc={'poly1':hudson02ee.getInfo(),'poly2':hudson01ee.getInfo(), 'poly3': Astoria01ee.getInfo(), 'poly4': Bronx01ee.getInfo()},zoom_start=2)
  '''

  training_centroid = training_image.geometry().centroid().getInfo()['coordinates']
  training_centroid.reverse()

  '''
  training_centroid is also a featurecollection but different from nycFC. Try print(training.size().getInfo()) - 
  you'll see that the size is about 20000, not 4. Since I cannot display training, I think there must be a mistake 
  right here. Need to rectify this - understand image.sampleRegions better before proceeding further.
  '''
  #Train the classifier.
  #var trained = classifier.train(training, 'class', bands);
  #trained = classifier.train(training, 'landcover', bands);
  trained = ee.Classifier.smileCart().train(training_image, 'landcover', bands)
  #print(type(trained))
  return trained

def training (image):
  training_image = image.select(bands).sampleRegions(collection =  fc, properties = ['landcover'], scale = 30.0)
  #print(training.geometry().getInfo()['type'])

  '''
  We can view feature collections using Mapdisplay, the function we encountered earlier. THe way to do it is as follows:
  Mapdisplay(center,dicc={'poly1':hudson02ee.getInfo(),'poly2':hudson01ee.getInfo(), 'poly3': Astoria01ee.getInfo(), 'poly4': Bronx01ee.getInfo()},zoom_start=2)
  '''

  training_centroid = training_image.geometry().centroid().getInfo()['coordinates']
  training_centroid.reverse()

  '''
  training_centroid is also a featurecollection but different from nycFC. Try print(training.size().getInfo()) - 
  you'll see that the size is about 20000, not 4. Since I cannot display training, I think there must be a mistake 
  right here. Need to rectify this - understand image.sampleRegions better before proceeding further.
  '''
  #Train the classifier.
  #var trained = classifier.train(training, 'class', bands);
  #trained = classifier.train(training, 'landcover', bands);
  trained = ee.Classifier.smileCart().train(training_image, 'landcover', bands)
  #print(type(trained))
   #Classify the image.
  '''
  This is the step at which the code is failing. If I try to interrogate classified in anyway, the computation is 
  never completed. For example, if I did classified.getInfo(), an asterix * would appear next to the cell, never to 
  finish.
  '''
  classified = image.select(bands).classify(trained)
  return classified

  #print(type(classified))

In [None]:
#Comparison
for image in image_list:
  unsupervised = trained_cluster(image)
  supervised = trained_classifier(image)

  #Confusion matrix representing resubstitution accuracy for unsupervised
  #Reduced number of clusters for unsupervised because confusion matrix works better for 2 classes.
  trainAccuracy = unsupervised.confusionMatrix()
  print('Resubstitution error matrix: ', trainAccuracy)
  print('Training overall accuracy: ', trainAccuracy.accuracy())
  
  #Confusion matrix representing resubstitution accuracy for supervised
  trainAccuracy = supervised.matchingMatrix()
  print('Resubstitution error matrix: ', trainAccuracy)
  print('Training overall accuracy: ', trainAccuracy.accuracy())

  #Get a confusion matrix representing expected accuracy. Not sure if we can do 
  #this because while MODIS satellite has "Land_Cover_Type_1," LANDSAT does not.
  #testAccuracy = validated.errorMatrix('Land_Cover_Type_1', 'classification')
  #print('Validation error matrix: ', testAccuracy)
  #print('Validation overall accuracy: ', testAccuracy.accuracy())

AttributeError: ignored