In [1]:
import otbApplication as otb
import pyotb
import os

2024-01-14 00:00:17 (INFO) [pyotb] Successfully loaded 117 OTB applications


In [2]:
project_dir = "/home/rustt/Documents/Projects/S1_S2_classification"
data_dir = os.path.join(project_dir, "sentinel_classification/data/processed")
model_dir = os.path.join(project_dir, "sentinel_classification/models")
out_dir = os.path.join(project_dir, "sentinel_classification/data/final")

In [3]:
s2_stack_path = os.path.join(data_dir, "s2", "clip_s2_pan_sharp.tif")
labels_path = os.path.join(data_dir, "ROI_Classif_corrected_clf.shp")

### OTB classifier general params

#### Maximum training sample size per class `-sample.mt` int Default value: 1000

Maximum size per class (in pixels) of the training sample list (default = 1000) (no limit = -1). If equal to -1, then the maximal size of the available training sample list per class will be equal to the surface area of the smallest class multiplied by the training sample ratio.

####  Maximum validation sample size per class `-sample.mv` int Default value: 1000

Maximum size per class (in pixels) of the validation sample list (default = 1000) (no limit = -1). If equal to -1, then the maximal size of the available validation sample list per class will be equal to the surface area of the smallest class multiplied by the validation sample ratio.

####  Bound sample number by minimum `-sample.bm` int Default value: 1

Bound the number of samples for each class by the number of available samples by the smaller class. Proportions between training and validation are respected. Default is true (=1).

####  Training and validation sample ratio `-sample.vtr` float Default value: 0.5

Ratio between training and validation samples (0.0 = all training, 1.0 = all validation) (default = 0.5).

####  Field containing the class integer label for supervision `-sample.vfn` string

Field containing the class id for supervision. The values in this field shall be cast into integers.

### Sklearn vs otb
- max features == rf.var default : 0 <==> sqrt(p)

### PyOTB

In [4]:
out_model = os.path.join(model_dir, "rf_test.txt")
out_confmat = os.path.join(model_dir, "rf_test.csv")

In [7]:
params_io = {
    # input
    "io.il":s2_stack_path, 
    "io.vd":labels_path,
    # output
    "io.out": out_model,
    "io.confmatout":out_confmat, 
    
}

params_sample = {
    "sample.vfn": "classid", #class label column
    "sample.mt":1000, # max sample train set
    "sample.mv":1000, # max sample test set
    "sample.bm":1, # keep balanced dataset
    "sample.vtr":0.3, # size test
}

params_clf = {
    "classifier":"rf",
    "classifier.rf.max":5,
    "classifier.rf.min": 10,
    "classifier.rf.var": 0, # number of features use for building tree
    "classifier.rf.nbtrees":100,
    
}
params_other = {
    "ram":6144
}

params_training = dict(
    **params_io, 
    **params_clf,
    **params_sample, 
    **params_other,

)

In [9]:
params_training

{'io.il': '/home/rustt/Documents/Projects/S1_S2_classification/sentinel_classification/data/processed/s2/clip_s2_pan_sharp.tif',
 'io.vd': '/home/rustt/Documents/Projects/S1_S2_classification/sentinel_classification/data/processed/ROI_Classif_corrected_clf.shp',
 'io.out': '/home/rustt/Documents/Projects/S1_S2_classification/sentinel_classification/models/rf_test.txt',
 'io.confmatout': '/home/rustt/Documents/Projects/S1_S2_classification/sentinel_classification/models/rf_test.csv',
 'classifier': 'rf',
 'classifier.rf.max': 5,
 'classifier.rf.min': 10,
 'classifier.rf.var': 0,
 'classifier.rf.nbtrees': 100,
 'sample.vfn': 'classid',
 'sample.mt': 1000,
 'sample.mv': 1000,
 'sample.bm': 1,
 'sample.vtr': 0.3,
 'ram': 512}

Erreur avec parametre sample.vfn => modifier pyotb/core.py l1026 : 

workaround trouvé : 
* setParameterString(key, obj) for key == "sample.vfn"
* todo : create issue github

setParameterValue in pyotb code by default

In [10]:
pyotb.TrainImagesClassifier(params_training)

2024-01-14 00:00:59 (INFO) [pyotb] TrainImagesClassifier: argument for parameter "io.il" was converted to list
2024-01-14 00:00:59 (INFO) [pyotb] params io.il set !
2024-01-14 00:00:59 (INFO) [pyotb] TrainImagesClassifier: argument for parameter "io.vd" was converted to list
2024-01-14 00:00:59 (INFO) [pyotb] params io.vd set !
2024-01-14 00:00:59 (INFO) [pyotb] params io.out set !
2024-01-14 00:00:59 (INFO) [pyotb] params io.confmatout set !
2024-01-14 00:00:59 (INFO) [pyotb] params classifier set !
2024-01-14 00:00:59 (INFO) [pyotb] params classifier.rf.max set !
2024-01-14 00:00:59 (INFO) [pyotb] params classifier.rf.min set !
2024-01-14 00:00:59 (INFO) [pyotb] params classifier.rf.var set !
2024-01-14 00:00:59 (INFO) [pyotb] params classifier.rf.nbtrees set !
2024-01-14 00:00:59 (INFO) [pyotb] params sample.vfn set !
2024-01-14 00:00:59 (INFO) [pyotb] params sample.mt set !
2024-01-14 00:00:59 (INFO) [pyotb] params sample.mv set !
2024-01-14 00:00:59 (INFO) [pyotb] params sample.bm

<pyotb.TrainImagesClassifier object, id 140445844211264>

### Remarques
* comment récupérer le train/test ? (qui a été échantilloné)
    * controler sur-apprentissage
    * Utiliser les modules séparemments de la classification : https://www.orfeo-toolbox.org/CookBook/recipes/pbclassif.html
* charger le modèle ? => via cv2 CVRTree

### OTB

In [None]:
app = otbApplication.Registry.CreateApplication("TrainImagesClassifier")

app.SetParameterStringList("io.il", [s2_stack_path])
app.SetParameterStringList("io.vd", [labels_path])
#app.SetParameterString("io.imstat", "EstimateImageStatisticsQB1.xml")
app.SetParameterInt("sample.mv", 1000)
app.SetParameterInt("sample.mt", 1000)
app.SetParameterFloat("sample.vtr", 0.3)
app.SetParameterString("sample.vfn", "classid")
app.SetParameterString("classifier","rf")
app.SetParameterInt("classifier.rf.nbtrees", 100)
app.SetParameterString("io.out", out_model)
app.SetParameterString("io.confmatout", out_confmat)

app.ExecuteAndWriteOutput()

## Annexes

In [4]:
pyotb.get_available_applications()

2024-01-13 19:49:41 (INFO) [pyotb] Successfully loaded 117 OTB applications


('Aggregate',
 'BandMath',
 'BandMathX',
 'BinaryMorphologicalOperation',
 'BlockMatching',
 'BundleToPerfectSensor',
 'ClassificationMapRegularization',
 'ColorMapping',
 'CompareImages',
 'ComputeConfusionMatrix',
 'ComputeImagesStatistics',
 'ComputeModulusAndPhase',
 'ComputeOGRLayersFeaturesStatistics',
 'ConcatenateImages',
 'ConcatenateVectorData',
 'ConnectedComponentSegmentation',
 'ContrastEnhancement',
 'ConvertCartoToGeoPoint',
 'ConvertSensorToGeoPoint',
 'Despeckle',
 'DimensionalityReduction',
 'DisparityMapToElevationMap',
 'DomainTransform',
 'DynamicConvert',
 'EdgeExtraction',
 'EndmemberNumberEstimation',
 'ExtractROI',
 'FastNLMeans',
 'FineRegistration',
 'FusionOfClassifications',
 'GeneratePlyFile',
 'GenericRegionMerging',
 'GrayScaleMorphologicalOperation',
 'GridBasedImageResampling',
 'HaralickTextureExtraction',
 'HomologousPointsExtraction',
 'HooverCompareSegmentation',
 'HyperspectralUnmixing',
 'ImageClassifier',
 'ImageDimensionalityReduction',
 'Image