## BGP Classification Assignment 

Wenjie Cheng

October 2024

### Connect to GEE

In [1]:
import ee
import geemap

In [2]:
ee.Authenticate()

True

In [3]:
try:
    # Initialize the library.
    ee.Initialize()
    print('Google Earth Engine has initialized successfully!')
except ee.EEException as e:
    print('Google Earth Engine has failed to initialize!')
except:
    print("Unexpected error:", sys.exc_info()[0])
    raise

Google Earth Engine has initialized successfully!


### Define Study Area

The study area is a region around London. The map cneter is London Victoria Station (-0.14, 51.49). The bounding box is a rectangle with width around 440 kilometers and height around 330 kilometers.

In [4]:
Map = geemap.Map()
Map.setCenter(-0.14, 51.49, 7)

# the region of interest is a bounding box. Width: 4 degrees(about 440 kilometers). Height: 3 degrees.
London_Bbox = ee.Geometry.BBox(-2.14, 49.99, 1.86, 52.99)
Map.addLayer(London_Bbox, name = "Study Region")

Map

Map(center=[51.49, -0.14], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGU…

### Get Image

Get Image Collection in 2024 Summer

In [5]:
image = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED") \
    .filterBounds(London_Bbox) \
    .filterDate('2024-6-01', '2024-9-30') \
    .filterMetadata('CLOUDY_PIXEL_PERCENTAGE', 'less_than', 20) \
    .select("B[2-8]")\
    .median()

In [6]:
image = image.clip(London_Bbox)

stats = geemap.image_stats(image,region = London_Bbox).getInfo()
stats["mean"]

{'B2': 701.8527386028205,
 'B3': 828.9991800590358,
 'B4': 746.491308625779,
 'B5': 1008.5319776976058,
 'B6': 1558.4519514594947,
 'B7': 1788.0040997048213,
 'B8': 1833.3729091505409}

In [7]:
vis_params = {"min": 0, 
              "max": 2000, 
              "bands": ["B4", "B3", "B2"]}  # R,G,B

Map.addLayer(image,vis_params, name = "image")
Map

Map(center=[51.49, -0.14], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGU…

### Create training data

In [8]:
# Map.user_rois.getInfo()

In [9]:
# geemap.ee_to_shp(Map.user_rois,"training.shp")

In [10]:
training_path = "training/training_data.shp"
training_polygons = geemap.shp_to_ee(training_path)
Map.addLayer(training_polygons, name = "ROI")
Map

Map(center=[51.49, -0.14], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGU…

In [11]:
samples = image.sampleRegions(
    collection = training_polygons,
    properties = ["landcover"],
    scale = 50,
    tileScale = 16,
    geometries = True,
)

# Map.addLayer(training, name = "training")

In [12]:
samples.size().getInfo()

102664

In [13]:
samples.first().getInfo()

{'type': 'Feature',
 'geometry': {'geodesic': False,
  'type': 'Point',
  'coordinates': [-0.12419208802952385, 51.50827549730822]},
 'id': '0_0',
 'properties': {'B2': 772,
  'B3': 825,
  'B4': 918,
  'B5': 950,
  'B6': 950,
  'B7': 993,
  'B8': 1076,
  'landcover': 5}}

### Random Forest Classification

In [14]:
split = 0.7

samples = samples.randomColumn()
training = samples.filter(ee.Filter.lt("random", split))
test = samples.filter(ee.Filter.gte("random", split))

print("Training:{},Test:{}".format(training.size().getInfo(), test.size().getInfo()))

Training:71908,Test:30756


In [15]:
bands = ["B2", "B3", "B4", "B5", "B6", "B7", "B8"]
rf_classifier = ee.Classifier.smileRandomForest(10).train(
    features = training,
    classProperty = "landcover",
    inputProperties = bands
)


In [16]:
result = image.select(bands).classify(rf_classifier)

legend_keys = ["water", "farmland", "bareland", "vegetation", "urban"]
legend_colors = ["3388ff", "42ff68", "ffc342", "258e31", "b1beb8"]

landcover = result.set("classification_class_values", [1,2,3,4,5])
landcover = landcover.set("classification_class_palette", legend_colors)

Map.addLayer(landcover, {}, "Land cover")
Map.add_legend(keys=legend_keys, colors=legend_colors, position="bottomleft")
Map

Map(bottom=11199.0, center=[51.49, -0.14], controls=(WidgetControl(options=['position', 'transparent_bg'], wid…

### Assessment

In [17]:
train_accuracy = rf_classifier.confusionMatrix()

train_accuracy.getInfo()

[[0, 0, 0, 0, 0, 0],
 [0, 54395, 0, 0, 0, 4],
 [0, 0, 1663, 0, 4, 1],
 [0, 0, 0, 2803, 0, 1],
 [0, 0, 7, 0, 10149, 1],
 [0, 3, 1, 2, 2, 2872]]

In [18]:
train_accuracy.accuracy().getInfo()

0.9996384268787896

In [22]:
train_accuracy.kappa().getInfo()

0.9991052261076222

In [19]:
tested = test.classify(rf_classifier)
test_accuracy = tested.errorMatrix("landcover", "classification")
test_accuracy.getInfo()

[[0, 0, 0, 0, 0, 0],
 [0, 23272, 0, 0, 0, 11],
 [0, 0, 630, 1, 15, 1],
 [0, 0, 2, 1249, 0, 0],
 [0, 0, 23, 0, 4274, 1],
 [0, 5, 3, 14, 1, 1254]]

In [20]:
test_accuracy.accuracy().getInfo()

0.9974964234620887

In [21]:
test_accuracy.kappa().getInfo()

0.9937990275483285

Comments: The overall accuracy seems good, but I also notice some errors. One obvious one is that the cloud is classified as urban area.