In [18]:
# from google.colab import drive
# drive.mount('/content/drive')

In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
import skimage
from skimage.io import imread
from skimage.transform import resize

In [2]:
def load_image_files(container_path, dimension=(128, 128)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            print(file)
            img = skimage.io.imread(file)
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
            flat_data.append(img_resized.flatten()) 
            images.append(img_resized)
            target.append(i)
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)

In [4]:
basedir = "../CNN_Data/Training"
image_dataset = load_image_files(basedir)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(image_dataset.data, image_dataset.target, test_size=0.3,random_state=42)

In [8]:
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
  {'C': [0.1, 1, 100, 1000],'gamma': [0.0001, 0.001, 0.005, 0.1, 1, 3, 5], 'kernel': ['rbf']}
 ]
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid)
clf.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']},
                         {'C': [0.1, 1, 100, 1000],
                          'gamma': [0.0001, 0.001, 0.005, 0.1, 1, 3, 5],
                          'kernel': ['rbf']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [9]:
y_pred = clf.predict(X_test)

In [10]:
print("Classification report for - \n{}:\n{}\n".format(clf, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']},
                         {'C': [0.1, 1, 100, 1000],
                          'gamma': [0.0001, 0.001, 0.005, 0.1, 1, 3, 5],
                          'kernel': ['rbf']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0):
              precision    rec

In [11]:
y_pred

array([1, 0, 1, 0, 3, 3, 3, 1, 1, 1, 0, 1, 1, 1, 0, 1, 3, 1, 0, 2, 2, 0,
       3, 0, 2, 2, 1, 1, 2, 2, 1, 0, 2, 0, 3, 0, 3, 0, 3, 1, 3, 2, 0, 0,
       0, 1, 2, 0, 2, 0, 0, 2, 3, 1, 0, 2, 2, 0, 3, 0, 0, 2, 1, 0, 3, 0,
       1, 1, 1, 0, 2, 3, 0, 1, 3, 3, 0, 0, 1, 0, 1, 3, 0, 1, 0, 0, 0, 0,
       2, 0, 0, 3, 2, 2, 3, 3, 1, 2, 0, 0, 0, 3, 0, 2, 0, 3, 1, 3, 0, 0,
       1, 0, 0, 0, 0, 0, 2, 1, 1, 3, 3, 2, 1, 0, 1, 3, 0, 0, 1, 3])

In [12]:
from sklearn.metrics import accuracy_score

# get the accuracy
print(accuracy_score(y_test, y_pred))

0.8076923076923077


In [13]:
# Saving Model
from sklearn.externals import joblib
filename = 'SVM_SKLEARN.sav'
joblib.dump(clf, filename)



['SVM_SKLEARN.sav']

In [14]:
# Loading Model 
from sklearn.externals import joblib
filename = 'SVM_SKLEARN.sav'
loaded_model = joblib.load(filename)
result = loaded_model.score(X_test, y_test)
print(result)

0.8076923076923077


In [17]:
# Loading Model to Predict
from sklearn.externals import joblib

label_output = ['Healthy', 'Rust', "Mosaic_Virus", "Wooly Aphids"]

def load_image_file(file, dimension=(128, 128)):

    images = []
    flat_data = []
    target = []
 
    img = skimage.io.imread(file)
    img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
    flat_data.append(img_resized.flatten()) 

    flat_data = np.array(flat_data)

    return Bunch(data=flat_data,)

filename = 'SVM_SKLEARN.sav'
loaded_model = joblib.load(filename)

image = 'TestImage.jpg'
input_image = load_image_file(image)
result = loaded_model.predict(input_image.data)[0]
print(label_output[result])

Healthy
