# Random Forest Classification Using r.learn.ml in GRASS GIS

The following notebook will preprocess geojson files as training data and tif files as rasters to be classified. In a second step, the classification will be conducted using r.learn.ml. For all steps, pygrass is used.

# Preprocessing

__Imports__

In [1]:
import os
from grass.pygrass.modules.shortcuts import raster as r, vector as v, general as g, imagery 

__Install GRASS GIS extensions__

In [2]:
g.extension_(extension="r.learn.ml")
g.extension_(extension="r.clip")

Module('g.extension')

__Change working directory__

In [3]:
wd_path = "" # enter your file path
os.chdir(wd_path)

__Choose Year__

In [4]:
year = "2020" #enter 2020 or 2015

__Training Data__

In [5]:
all_datasets = ""
i = 0
# import all files in directory with extension ".geojson"
for file in os.listdir("osmQueries/" + year):
    if file.endswith(".geojson"):
        
        current_file = str(os.path.join(file)) # file name with extension
        filepath = str(wd_path + "osmQueries/" + year + "/" + current_file) # path to file
        output = str(current_file).split(".")[0] # file name without extension for output names
        
        # import data into grass
        v.import_(input=filepath, output=output, overwrite=True)
        print("imported " + current_file)
        
        # extract corine class from file name, add column, and add corine class to new column
        corine_class = output.split("_")[-1]
        v.db_addcolumn_(map=output, columns="class int")
        v.db_update_(map=output, column="class", value=corine_class)
        
        # create string with all output names for merging
        if i == 0:
            all_datasets += output
           
        else: 
            all_datasets += "," + output
        i+=1
    
# merge all vector datasets
v.patch_(input=all_datasets, output="merged_training_data", flags="e", overwrite=True)
print("merged vector files")

# set region to extent of training data
g.region_(vector="merged_training_data")

# rasterize training data
v.to_rast_(input="merged_training_data", 
           output="training_raster", use="attr", 
           attribute_column="class", 
           overwrite=True)
print("rasterized merged training data")

# random sample of training and test data
r.random_(input="training_raster", npoints=100000, raster="training_sample", seed=1, overwrite=True)

print("extracted random test and training sample")

imported y2020_1112.geojson
imported y2020_13.geojson
imported y2020_2.geojson
imported y2020_31.geojson
imported y2020_32.geojson
imported y2020_5.geojson
merged vector files
rasterized merged training data
extracted random test and training sample


__Raster Data__

In [6]:
g.remove_(type="group", name="raster_group", flags="f")
all_bands = ""
i = 0
for file in os.listdir("satData/" + year + "/"):
    if file.endswith(".tif") or file.endswith(".TIF"):
        
        current_file = str(os.path.join(file)) # file name with extension
        filepath = str(wd_path + "satData/" + year + "/" + current_file) # path to file
        output = str(current_file).split(".")[0] # file name without extension for output names
        
        r.import_(input=filepath, output=output, overwrite=True)
        print("imported " + current_file)
        
        # clip to computational region 
        band_nr = str(list(output)[-1]) #extract band number from file name
        band = "b_" + band_nr
        r.clip(input=output, output=band, overwrite=True)
        
        # remove imported rasters to only keep clipped ones
        g.remove_(type="raster", name=output, flags="f")
        
        # fill empty string with all output names for grouping
        if i == 0:
            all_bands += band
        else: 
            all_bands += "," + band
        i+=1
# group all raster bands
imagery.group_(group="raster_group", input=all_bands)


imported T32UMV_20200723T103031_B02.tif
imported T32UMV_20200723T103031_B03.tif
imported T32UMV_20200723T103031_B04.tif
imported T32UMV_20200723T103031_B08.tif


Module('i.group')

__Classification__

In [None]:
r.learn_ml_(group="raster_group", 
            trainingmap="training_sample", 
            output="rf_class", 
            classifier="RandomForestClassifier", 
            n_estimators=10, 
            overwrite=True)