In [2]:
from pathlib import Path
import numpy as np
import shutil
import os

from skimage import color
from sklearn import svm
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import classification_report

from skimage.io import imread
from skimage.transform import resize

In [2]:
def load_image_files(container_path, dimension=(220, 220, 3)):
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            try:
                img = imread(file)

                """Grey Scale segmentation
                gray_scale_image = color.rgb2gray(img)
                img_resized = resize(gray_scale_image, dimension, anti_aliasing=True, mode='reflect')
                """

                """ Edges Canny segmentation
                edges = feature.canny(gray_scale_image, sigma=3)
                image = img_as_float(edges)
                """

                gray_scale_image = color.rgb2gray(img)
                img_resized = resize(gray_scale_image, dimension, anti_aliasing=True, mode='reflect')
                flat_data.append(img_resized.flatten())
                images.append(img_resized)
                target.append(direc.name.lower())
            except:
                if not os.path.exists("errors/" + direc.name):
                    os.makedirs("errors/" + direc.name)
                shutil.move(file, "errors/" + direc.name + "/" + file.name)

    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)

In [3]:
image_dataset = load_image_files("images/")

In [4]:
n_samples = len(image_dataset.images)
X = image_dataset.images.reshape((n_samples, -1))
y = image_dataset.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=0)

In [None]:
param_grid = [{'kernel': ['rbf'], 'gamma': [0.001, 0.0001], 'C': [1, 10, 100, 1000]},
                {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

svc = svm.SVC()
clf = GridSearchCV(svc,
                   param_grid,
                   cv=3,
                   scoring='accuracy')

clf.fit(X_train, y_train)
print("Etapa 3 Finalizada")

print("Best parameters set found on development set:")
print("---------------------------------------------")
print(clf.best_params_)
print()
print("Detailed classification report:")
print("---------------------------------------------")
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))