# Image Classification using `sklearn.svm`

In [127]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split

from skimage.io import imread
from skimage.transform import resize
import skimage

### Load images in structured directory like it's sklearn sample dataset

In [128]:
def load_image_files(container_path, dimension=(64, 64)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    # print('folders before cut',folders)
    folders = folders[1:]
    categories = [fo.name for fo in folders]
    # print('categories',categories)
    # print('folders',folders)
    descr = "A image classification dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        # print('direc,',direc)
        # print('iterdir',direc.iterdir())
        for file in direc.iterdir(): 
            # print('file:','.ipynb_checkpoints' in str(file))
            if '.ipynb_checkpoints' not in str(file):
              # print(file)
              img = skimage.io.imread(file)
              # print(img.shape)
              img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
              print(img_resized.shape)
              flat_data.append(img_resized.flatten()) 
              images.append(img_resized)
              target.append(i)

    # print('into np array',images)
    flat_data = np.array(flat_data)
    target = np.array(target)
    # images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)

In [129]:
image_dataset = load_image_files("/content/images2")
# image_dataset


(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)


### Split data

In [130]:
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3,random_state=109)

### Train data with parameter optimization

In [133]:
param_grid = [
  {'C': [1, 10, 100, 1000,2000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000,2000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
# param_grid = [
#   {'C': [0.01, 0.1, 1, 10, 100, 1000, 10000], 'kernel': ['linear']},
#   {'C': [0.01, 0.1, 1, 10, 100, 1000, 10000], 'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10], 'kernel': ['rbf']},
#  ]
# param_grid = [
#   {'C': [1, 10, 100, 1000,2000], 'kernel': ['linear']},
#   {'C': [1, 10, 100, 1000,2000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
#   {'C': [1, 10, 100, 1000], 'kernel': ['poly'], 'degree': [2, 3, 4,6,10]},
#   {'C': [1, 10, 100, 1000], 'kernel': ['sigmoid'], 'coef0': [0.0, 0.5, 1.0]}
#  ]
svc = svm.SVC()
# clf = GridSearchCV(svc, param_grid,cv=2)
clf = GridSearchCV(svc, param_grid)

clf.fit(X_train, y_train)
print(clf.score(X_train,y_train),clf.score(X_test,y_test))



0.7777777777777778 0.625


### Predict

### Report

In [134]:
y_pred = clf.predict(X_test)
print("Classification report for - \n{}:\n{}\n".format(
    clf, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [1, 10, 100, 1000, 2000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000, 2000],
                          'gamma': [0.001, 0.0001], 'kernel': ['rbf']}]):
              precision    recall  f1-score   support

           0       0.62      1.00      0.77         5
           1       0.00      0.00      0.00         3

    accuracy                           0.62         8
   macro avg       0.31      0.50      0.38         8
weighted avg       0.39      0.62      0.48         8




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [136]:
from sklearn.neural_network import MLPClassifier

clf2 = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(10,), random_state=20)
clf2.fit(X_train, y_train)
from sklearn.metrics import accuracy_score


test_acc=accuracy_score(clf2.predict(X_test), y_test)
# val_acc=accuracy_score(clf.predict(X_test), y_test)
train_acc=accuracy_score(clf2.predict(X_train), y_train)
print(f'train acc: {train_acc}')
print(f'y_train:{y_train}')
print(f'y_train_test:{clf2.predict(X_train)}')
# print(f'val acc: {val_acc}')
print(f'test acc: {test_acc}')
print(f'y_test:{y_test}')
print(f'y_pred_test:{clf2.predict(X_test)}')
# plot_regions(clf, X_val, y_val)
# clf.coefs_

train acc: 1.0
y_train:[1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0]
y_train_test:[1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0]
test acc: 0.875
y_test:[0 0 0 0 0 1 1 1]
y_pred_test:[0 0 0 0 0 1 0 1]


In [138]:
# Tu turno...# modelo final
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
final_degree = 3
final_pipeline_reg = make_pipeline(
    # PolynomialFeatures(degree=final_degree),
    # StandardScaler(),
    LogisticRegression(
        max_iter=100, penalty='l2'        # ,penalty=None
        , C=0.001
        # ,C=1
    )
)

final_pipeline_reg.fit(X_train,y_train)

predict_final_pipe = final_pipeline_reg.predict(X_test)
accuracy_score(predict_final_pipe, y_test)


0.625

In [11]:
# plot_regions(final_pipeline_reg,X_train,y_train)

In [139]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn


def plot_regions(classifier, x, t):
    """Plot results from classification."""
    plt.figure(figsize=(9, 7))

    xx, yy = np.meshgrid(np.linspace(x[:, 0].min()-1, x[:, 0].max()+1, 200),
                         np.linspace(x[:, 1].min()-1, x[:, 1].max()+1, 200))

    # evaluate decision function
    Z = classifier.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # colour regions
    plt.pcolormesh(xx, yy, Z < 0, cmap=plt.cm.bwr, shading='auto', alpha=0.4)
    # decision boundary
    plt.contour(xx, yy, 1/(1 + np.exp(-Z)),
                [0.05, 0.5, 0.95], colors=['0.5', 'k', '0.5'], zorder=1)

    xc1 = x[t == np.unique(t.flatten()).max()]
    xc2 = x[t == np.unique(t.flatten()).min()]

    plt.plot(*xc1.T, 'ob', mfc='None', label='C1')
    plt.plot(*xc2.T, 'or', mfc='None', label='C2')

    # Remove ticks
    plt.xticks(())
    plt.yticks(())
    plt.axis('tight')

    return

In [140]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, random_state=20)

# Define parameter grid for grid search
param_grid = {'hidden_layer_sizes': [(10,), (50,), (100,), (10,10), (50,50), (100,100)]}

# Perform grid search
grid_search = GridSearchCV(clf, param_grid=param_grid, cv=5)
grid_search.fit(X_test, y_test)

# Print best parameter and accuracy score
print("Best hidden layer sizes:", grid_search.best_params_['hidden_layer_sizes'])
print("Best accuracy score:", grid_search.best_score_)



Best hidden layer sizes: (50,)
Best accuracy score: 0.7166666666666667
