# Image Classification using `sklearn.svm`

In [128]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split

from skimage.io import imread
from skimage.transform import resize
import skimage

### Load images in structured directory like it's sklearn sample dataset

In [135]:
def load_image_files(container_path, dimension=(64, 64)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    print('folders before cut',folders)
    folders = folders[1:]
    categories = [fo.name for fo in folders]
    print('categories',categories)
    print('folders',folders)
    descr = "A image classification dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        print('direc,',direc)
        print('iterdir',direc.iterdir())
        for file in direc.iterdir(): 
            print('file:','.ipynb_checkpoints' in str(file))
            if '.ipynb_checkpoints' not in str(file):
              print(file)
              img = skimage.io.imread(file)
              img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
              flat_data.append(img_resized.flatten()) 
              images.append(img_resized)
              target.append(i)
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)

In [138]:
image_dataset = load_image_files("/content/images")
image_dataset.data

folders before cut [PosixPath('/content/images/.ipynb_checkpoints'), PosixPath('/content/images/cut'), PosixPath('/content/images/dont cut')]
categories ['cut', 'dont cut']
folders [PosixPath('/content/images/cut'), PosixPath('/content/images/dont cut')]
direc, /content/images/cut
iterdir <generator object Path.iterdir at 0x7f45ea21beb0>
file: False
/content/images/cut/174369bf-7f59-4d9d-9c6f-e34753419631.jpg
file: False
/content/images/cut/PUNTO-DE-CORTE-TRICOMAS.jpeg
file: False
/content/images/cut/2.jpeg
file: True
file: False
/content/images/cut/images.jpeg
file: False
/content/images/cut/cannabis-trichomes-milky-white-ready-harvest.jpg
file: False
/content/images/cut/images (1).jpeg
file: False
/content/images/cut/b038888d-cloud-and-clear-trichomes-on-cannabis.jpg
file: False
/content/images/cut/main-qimg-8e926056f382a9747c566b5c5e86f75a-lq.jpeg
file: False
/content/images/cut/3.jpeg
file: False
/content/images/cut/Amber-Trichomes__MA6jA6RLdKxIQICG.jpg
direc, /content/images/dont 

array([[0.        , 0.        , 0.        , ..., 0.35023334, 0.2939813 ,
        0.21136205],
       [0.37067714, 0.43014089, 0.18531725, ..., 0.17172926, 0.19226154,
        0.00947196],
       [0.00392157, 0.00392157, 0.00392157, ..., 0.48697885, 0.47947757,
        0.39230689],
       ...,
       [0.3532157 , 0.35895361, 0.35492763, ..., 0.64712754, 0.69663781,
        0.54919546],
       [0.40226899, 0.38046604, 0.33708412, ..., 0.89495691, 0.89048477,
        0.82352825],
       [0.13147557, 0.16604473, 0.00504789, ..., 0.7018407 , 0.80562618,
        0.19511283]])

### Split data

In [139]:
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3,random_state=109)

### Train data with parameter optimization

In [140]:
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid,)

clf.fit(X_train, y_train)

In [146]:
# clf.best_score_
clf.return_train_score

False

### Predict

In [141]:
y_pred = clf.predict(X_test)


### Report

In [143]:
print("Classification report for - \n{}:\n{}\n".format(
    clf, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}]):
              precision    recall  f1-score   support

           0       0.29      1.00      0.44         2
           1       0.00      0.00      0.00         5

    accuracy                           0.29         7
   macro avg       0.14      0.50      0.22         7
weighted avg       0.08      0.29      0.13         7




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
