In [1]:
from utils import Classes, data_get
import numpy as np

# Vegetation Classifier

The idea behind is pretty simple. Make a classifier to discriminate the vegetation:

- grass_healthy
- grass_streesed
- grass_synthetic
- tree

from the rest of the classes.

Then, if vegetation, classify which type of vegetation.

Else, continue.

### Load labeled unormalized data

In [2]:
X, y = data_get()

### Split data into train and test sets

In [3]:
from sklearn.model_selection import StratifiedShuffleSplit

In [4]:
sss = StratifiedShuffleSplit(n_splits=1, random_state=42, test_size=0.2)
__train_idx, __test_idx = next(sss.split(X, y))
X_train, y_train = X[__train_idx], y[__train_idx]
X_test, y_test = X[__test_idx], y[__test_idx]
print("Train", np.unique(y_train, return_counts=True))
print("Test", np.unique(y_test, return_counts=True))

Train (array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15],
      dtype=uint8), array([158, 152, 153, 150, 149, 146, 157, 153, 154, 153, 145, 153, 147,
       145, 150]))
Test (array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15],
      dtype=uint8), array([40, 38, 39, 38, 37, 36, 39, 38, 39, 38, 36, 39, 37, 36, 37]))


In [5]:
train_vegetation_mask = (
    (y_train == Classes.GRASS_HEALTHY)
    | (y_train == Classes.GRASS_STRESSED)
    | (y_train == Classes.GRASS_SYNTHETIC)
    | (y_train == Classes.TREE)
)
test_vegetation_mask = (
    (y_test == Classes.GRASS_HEALTHY)
    | (y_test == Classes.GRASS_STRESSED)
    | (y_test == Classes.GRASS_SYNTHETIC)
    | (y_test == Classes.TREE)
)
y_train_bin = train_vegetation_mask.astype(int)
y_test_bin = test_vegetation_mask.astype(int)

### Choice of classifier

As we want to make a binary classification, we'll use a `sklean.svm.LinearSVC`.

In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.svm import LinearSVC

### Quick test

In [7]:
pipeline = make_pipeline(
    StandardScaler(),
    LinearSVC(random_state=42, max_iter=5000),
)

In [8]:
pipeline.fit(X_train, y_train_bin)



In [9]:
pipeline.score(X_test, y_test_bin)

0.9964726631393298

Wow, 99% accuracy from a pretty naive approach is promissing.

### Meta-parameters optimization

In [10]:
from sklearn.model_selection import GridSearchCV

In [15]:
pipeline = Pipeline(
    [
        ("scaler", StandardScaler()),
        ("linear_svc", LinearSVC()),
    ]
)

### Prepare Grid

In [16]:
grid_params = {
    "linear_svc__random_state": range(0, 100),
    "linear_svc__max_iter": range(5000, 10000, 1000),
    "linear_svc__C": (1, 10, 100, 200, 300),
}

gs = GridSearchCV(pipeline, grid_params, n_jobs=6)

In [17]:
gs.fit(X_train, y_train_bin)



In [19]:
gs.score(X_test, y_test_bin)

0.9964726631393298

### Visualize parameters

In [20]:
import pandas as pd

df = pd.DataFrame(gs.cv_results_)
df = df.sort_values(by="rank_test_score", ascending=True)

In [21]:
df.iloc[0]["params"]

{'linear_svc__C': 200,
 'linear_svc__max_iter': 9000,
 'linear_svc__random_state': 36}

### Confirm

In [22]:
vegetation_discriminator = make_pipeline(
    StandardScaler(), LinearSVC(random_state=36, C=200, max_iter=9000)
)

In [23]:
vegetation_discriminator.fit(X_train, y_train_bin)



In [24]:
vegetation_discriminator.score(X_test, y_test_bin)

0.9964726631393298

# Classify the vegetation

Now that we have a vegetation classifier, it is time to classify the vegetation itself.

In [25]:
from sklearn.linear_model import SGDClassifier

### Prepare Data

In [26]:
X_train_veg = X_train[train_vegetation_mask]
y_train_veg = y_train[train_vegetation_mask]
X_test_veg = X_test[test_vegetation_mask]
y_test_veg = y_test[test_vegetation_mask]
print("Train", np.unique(y_train_veg, return_counts=True))
print("Test", np.unique(y_test_veg, return_counts=True))

Train (array([1, 2, 3, 4], dtype=uint8), array([158, 152, 153, 150]))
Test (array([1, 2, 3, 4], dtype=uint8), array([40, 38, 39, 38]))


### Naive Approach

In [27]:
vegetation_classifier = make_pipeline(
    StandardScaler(),
    SGDClassifier(random_state=42, shuffle=False, early_stopping=True, n_jobs=6),
)

In [28]:
vegetation_classifier.fit(X_train_veg, y_train_veg)

In [29]:
vegetation_classifier.score(X_test_veg, y_test_veg)

1.0

### Save classifiers

In [30]:
from utils import save

In [31]:
save(vegetation_discriminator, "vegetation_discriminator")
save(vegetation_classifier, "vegetation_classifier")