In [1]:
import cv2
from pathlib import Path
import numpy as np
from PIL import Image
from skimage.io import imread
from matplotlib import pyplot as plt
from sklearn import svm, metrics
from sklearn.model_selection import train_test_split, cross_val_score

In [2]:
leaves_dir = Path().resolve().parent / "data" / "leaves"
example_leaf_dir = leaves_dir / "image_0001.jpg"
leaves_classes = [(1, 66), (67, 126), (127, 186)]
leaves_paths = {i :[leaves_dir / 'image_{num:04d}.jpg'.format(num=j) for j in range(*ran)] for i, ran in enumerate(leaves_classes)}

In [3]:
def show_img(img):
    """
    Helper function to display using opencv
    """
    cv2.imshow('image',img)
    k = cv2.waitKey(0)
    if k == 27:         # wait for ESC key to exit
        cv2.destroyAllWindows()

# Testing each step

In [19]:
img = cv2.imread(str(example_leaf_dir),1)
show_img(img)

In [21]:
_,g,_ = cv2.split(img)

In [22]:
ret2,th2 = cv2.threshold(g,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) # perhaps by threshold instead
show_img(th2)

In [23]:
edges = cv2.Canny(g,300,300)
show_img(edges)

# Applying whole process

In [16]:
h, w = 300, 300

In [17]:
def preprocess(path: Path) -> np.array:
    """
    Preprocess applied to each photo.
    Return preprocessed photo as a numpy array.
    """
    img = cv2.imread(str(example_leaf_dir),1)
    _,g,_ = cv2.split(img)
    edges = cv2.Canny(g,300,300)
    reshaped = cv2.resize(edges, (h, w))
    return reshaped

In [18]:
def generate_x_y_dataset(leaves_paths):
    x, y = [], []
    for id,paths in leaves_paths.items():
        for path in paths:
            x.append(preprocess(path))
            y.append(id)
    assert len(x) == len(y)
    x = np.array(x).reshape((len(x),h*w))
    return x, y

In [19]:
x, y = generate_x_y_dataset(leaves_paths)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=0)

In [20]:
clf = svm.SVC(gamma=0.0001, C=1, kernel='linear')
clf.fit(X_train, y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0001, kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [21]:
clf.score(X_test, y_test)

0.26315789473684209

### By cross validation

In [39]:
clf = svm.SVC(gamma=0.0001)
scores = cross_val_score(clf, x, y, cv=5)
print(scores)

[ 0.35135135  0.35135135  0.35135135  0.35135135  0.37142857]


### Classyfing some example from test_sample

In [64]:
classifier.predict([preprocess(example_leaf_dir)])

array(['0'],
      dtype='<U1')