In [2]:
import pandas as pd

In [3]:
# Leer el archivo CSV
data = pd.read_csv('features_sentinel_urbano.csv')

In [4]:
# Seleccionar las observaciones urbanas con al menos 90% de pixeles urbanos
urbanas = data[(data['klass'] == 1) & (data['klass__1'] >= 0.9)]

# Seleccionar aleatoriamente las observaciones no urbanas con al menos 90% de pixeles no urbanos
no_urbanas = data[(data['klass'] == 2) & (data['klass__2'] >= 0.9)].sample(n=urbanas.shape[0], random_state=42)

# Combinar las observaciones seleccionadas
balanced_data = pd.concat([urbanas, no_urbanas])

In [5]:
# Dividir los datos en conjunto de entrenamiento y prueba
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(balanced_data, test_size=0.3, random_state=42)

In [6]:
train_data

Unnamed: 0,FID,klass,klass__1,klass__2,b1Min,b1Max,b1Mean,b1StdDev,b1Sum,b2Min,...,b10Sum,b11Min,b11Max,b11Mean,b11StdDev,b11Sum,b12Min,b12Max,b12Mean,b12StdDev
708443,708443,2,0.0,1.0,826,1303,1079.13,170.832,107913,408,...,325233,2387,4241,3475.50,393.225,347550,1510,3967,2900.61,467.305
572130,572130,1,1.0,0.0,747,976,899.88,82.332,89988,467,...,260305,1842,3338,2629.10,401.082,262910,1511,2976,2266.24,417.189
586171,586171,1,1.0,0.0,277,463,325.79,42.878,32579,210,...,315038,1997,3413,2636.47,407.337,263647,1208,2490,1766.77,383.180
581294,581294,1,1.0,0.0,384,1233,763.88,245.252,76388,314,...,286983,2116,3654,2850.64,314.174,285064,1455,2977,2226.59,311.830
1572,1572,2,0.0,1.0,303,397,355.66,25.241,35566,423,...,225155,2373,3103,2806.62,135.965,280662,1737,2381,2082.81,120.566
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449425,449425,2,0.0,1.0,349,698,528.48,99.343,52848,476,...,276900,3022,4483,3749.91,443.885,374991,2204,3405,2802.51,359.138
94126,94126,2,0.0,1.0,420,577,512.47,44.291,51247,517,...,263120,2840,3566,3299.30,203.833,329930,2130,2790,2563.79,186.065
79881,79881,1,1.0,0.0,888,1969,1488.93,314.398,148893,572,...,314520,2753,4943,3554.23,532.259,355423,1922,5291,3124.89,728.497
618450,618450,1,1.0,0.0,1143,4395,2226.85,1234.986,222685,1088,...,333220,3152,8792,4493.55,1723.553,449355,2829,9185,4170.82,2024.290


In [7]:
test_data

Unnamed: 0,FID,klass,klass__1,klass__2,b1Min,b1Max,b1Mean,b1StdDev,b1Sum,b2Min,...,b10Sum,b11Min,b11Max,b11Mean,b11StdDev,b11Sum,b12Min,b12Max,b12Mean,b12StdDev
552264,552264,1,1.0,0.0,688,997,839.26,109.358,83926,508,...,331703,2711,3723,3323.26,206.811,332326,2309,2964,2608.83,191.720
1114,1114,2,0.0,1.0,582,712,668.61,35.486,66861,434,...,277708,3069,3389,3300.80,50.671,330080,2352,2935,2783.57,139.277
826465,826465,2,0.0,1.0,289,391,339.43,30.342,33943,457,...,191255,3057,3470,3248.49,89.270,324849,2319,2691,2484.25,85.662
5239,5239,1,1.0,0.0,496,926,683.29,174.115,68329,333,...,274506,2333,3580,2987.44,318.099,298744,1585,2828,2296.45,340.306
573311,573311,1,1.0,0.0,741,1833,1163.04,419.880,116304,704,...,306129,3216,4101,3653.56,225.987,365356,2434,3744,2957.11,415.343
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284270,284270,1,1.0,0.0,511,898,766.01,96.630,76601,588,...,303894,2933,3958,3498.95,228.446,349895,2122,3245,2754.53,239.180
296578,296578,2,0.0,1.0,23,112,41.28,16.203,4128,98,...,197451,1432,2281,1745.49,239.799,174549,889,1573,1120.68,171.904
259377,259377,2,0.0,1.0,92,132,109.33,9.680,10933,114,...,198164,1327,1914,1653.60,149.072,165360,795,1288,1051.68,120.396
43416,43416,1,1.0,0.0,387,651,440.51,70.878,44051,183,...,266861,2231,3649,2929.42,332.185,292942,1514,3162,2167.37,406.496


In [8]:
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import StandardScaler
from tpot.builtins import StackingEstimator
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import classification_report

In [9]:
def fit_and_test(train_data,testing_data, klass, pipeline, nombre_pipeline, klass_test=''):

    if klass_test == '':
        klass_test = klass
        
    features = train_data.drop(["FID",klass,'klass__1','klass__2'], axis=1)
    target = train_data[klass]
    testing_features = testing_data.drop(["FID",klass_test,'klass__1','klass__2'], axis=1)
    test_target = testing_data[klass_test]
    
    print("inicia entrenamiento")
    pipeline.fit(features, target)
    print("clasificación")
    results = pipeline.predict(testing_features)
    print("El resultado de la clasificación con el pipeline: ",nombre_pipeline)
    print(classification_report(test_target, results, digits=4))
    return pipeline

In [10]:
exported_pipeline = make_pipeline(
    StandardScaler(),
    StackingEstimator(estimator=MLPClassifier(alpha=0.01, learning_rate_init=0.5)),
    ExtraTreesClassifier(bootstrap=False, criterion="entropy", max_features=0.8, min_samples_leaf=3, min_samples_split=5, n_estimators=100)
)

In [11]:
klass = 'klass'
nombre_pipeline = "Extra-Trees para GRID 2020"
fit_and_test(test_data,train_data,klass,exported_pipeline,nombre_pipeline)

inicia entrenamiento
clasificación
El resultado de la clasificación con el pipeline:  Extra-Trees para GRID 2020
              precision    recall  f1-score   support

           1     0.9522    0.9198    0.9357     21003
           2     0.9224    0.9538    0.9378     20973

    accuracy                         0.9368     41976
   macro avg     0.9373    0.9368    0.9368     41976
weighted avg     0.9373    0.9368    0.9368     41976

