# This code is to compare different classifiers.Compared classifiers include: random forest, extra-tress, decision tree, SVM and Neural network

Note that the default parameters for each model are used in this test. 

You are suggested to explore more parameter combinitions. 

More details on model fitting and paramters:

Random Forest:https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
Extra tress:https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html
Decision Tree:https://scikit-learn.org/stable/modules/tree.html
SVM: https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
Neural network: https://scikit-learn.org/stable/modules/neural_networks_supervised.html

In [1]:
import os,sys
sys.path.append(r"../templates/")
import PYEO_model
sys.path.append(r"/opt/pyeo")
import pyeo.core as pyeo

from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.neural_network import MLPClassifier


  from numpy.core.umath_tests import inner1d


Tenacity, Planet and Multiprocessing are required for Planet data downloading


In [2]:
#set input parameters 
os.chdir('/home/jupyter-yaqing/')

training_tif = "Data/Brazil/2018.tif"
training_shp = "Data/Brazil/shp/training/training.shp"
training_filedname = 'Id'

tobe_classified = training_tif

validation_shp = "Data/Brazil/shp/validation/validation.shp"
validation_filedname = 'Id'

nodata = 0

In [3]:
#set output folder
out_dir = "Output/template4_comparing_models/"

out_training_summary = os.path.join(out_dir, 'training_summary.csv')

In [4]:
# Models that will be compared, add any model you are interested in here, and feel free to change the parameters
model_pool = {
    'RF':RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=2, random_state=0),
    
 #   'SVM': svm.SVC(gamma='auto'),
    
    'Extree': ExtraTreesClassifier(bootstrap=False, criterion="gini", max_features=0.55, min_samples_leaf=2,
    min_samples_split=16, n_estimators=100, n_jobs=-1, class_weight='balanced'),
    
    'DTree': DecisionTreeClassifier(max_depth=None, min_samples_split=2, random_state=0),
    
    'NN':MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
    }

In [5]:
# Get training data and summaries the statistics:
features, classes = PYEO_model.get_training_data(training_tif, training_shp, attribute=training_filedname)

PYEO_model.summarise_training(in_classes= classes,out_csv= out_training_summary, sumarise_type = 'count')

  training_summary.to_csv(out_csv)


In [6]:
# Train each model 
for key, value in model_pool.items():
    model_name = key
    model_format = value 
    
    trained_model = os.path.join(out_dir,model_name +'_model.pkl')
    
    print('Fitting model format...' + model_name )
    model, scores = PYEO_model.train_model(features,classes, model_format)
    print('score is: ' + str(scores.mean()))
    
    print('saving the model to: ' + trained_model)
    PYEO_model.save_model(model, trained_model)   

Fitting model format...RF
score is: 0.8859979984937958
saving the model to: Output/template4_comparing_models/RF_model.pkl
Fitting model format...Extree
score is: 0.8850761990197666
saving the model to: Output/template4_comparing_models/Extree_model.pkl
Fitting model format...DTree
score is: 0.8681375178691603
saving the model to: Output/template4_comparing_models/DTree_model.pkl
Fitting model format...NN
score is: 0.32028621390167944
saving the model to: Output/template4_comparing_models/NN_model.pkl


In [9]:
#classify image using each model 
    out_classified = os.path.join(out_dir, model_name + '_classified.tif')
    PYEO_model.classify_image(in_image_path= tobe_classified, model=model, out_image_path= out_classified ,num_chunks =10)#need to chagne num_chunks

Classifying image
Classifying 0
Classifying 1
Classifying 2
Classifying 3
Classifying 4
Classifying 5
Classifying 6
Classifying 7
Classifying 8
Classifying 9


In [10]:
#validate the map and generate accuracy matrix 
    PYEO_model.validate_classes(inRaster=out_classified, shpdir= validation_shp , field_name=validation_filedname, 
                            out_fldr=out_dir,nodata = nodata)

~validating ... Data/Brazil/shp/validation/validation.shp
The clipped shapefile to the extent of the raster, resultant shp is saved in Output/template4_comparing_models/outline_clip.shp
rasterise the shapefile
['0.0', '3.0', 34807252, 1.0]
['1.0', '3.0', 3116, 1.0]
['2.0', '3.0', 801, 1.0]
['4.0', '3.0', 2793, 1.0]
