# Machine Learning LE

## Importing the Libraries

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Importing the Libraries for PyImageJ

In [None]:
# To install PyImageJ library follow the link below
# https://pyimagej.readthedocs.io/en/latest/Install.html

In [None]:
# Run this cell only once
import imagej
import scyjava as sj

ij = imagej.init('2.9.0', add_legacy = True)

## Feature Extraction

In [None]:
%%time

from skimage.feature import graycomatrix, graycoprops
from skimage.measure import shannon_entropy
import os

Duplicate = sj.jimport('ij.plugin.Duplicator')
Prefs = sj.jimport('ij.Prefs')
Prefs.blackBackground = True

result = []
rootdir = 'Coffee_Image_Dataset'

for rootdir, dirs, files in os.walk(rootdir):
    for name in files:
        if name.endswith(".jpg"):
            filepath = os.path.join(rootdir, name)
            
            # Preprocess 
            img = ij.IJ.openImage(filepath)
            ij.IJ.run(img, "8-bit", "")
            dup = Duplicate().run(img)
            ij.IJ.run(dup, "Median...", "radius=2")
            
            # Thresholding
            ij.IJ.setAutoThreshold(dup, "Huang no-reset");
            ij.IJ.run(dup, "Convert to Mask", "")
            ij.IJ.run(dup, "Close", "")
            
            # Analyzing
            ij.IJ.run("Set Measurements...", "area perimeter bounding fit shape redirect=None decimal=6")
            ij.py.run_plugin(plugin = "Analyze Particles...",
                             args = "exclude",
                             imp = dup)
            
            img_py = ij.py.from_java(img)
            img_py = img_py.values
            imp_py = img_py.astype(np.uint8)
            
            
            # Getting the Textural Features
            GLCM = graycomatrix(img_py,
                                distances = [1],
                                angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4],
                                levels = 256,
                                symmetric = True)
            
            
            # Append Results to the Dataset
            result.append({'Contrast': graycoprops(GLCM, 'contrast')[0][0],
                           'Dissimilarity': graycoprops(GLCM, 'dissimilarity')[0][0],
                           'Homogeneity': graycoprops(GLCM, 'homogeneity')[0][0],
                           'Energy': graycoprops(GLCM, 'energy')[0][0],
                           'Correlation': graycoprops(GLCM, 'correlation')[0][0],
                           'ASM': graycoprops(GLCM, 'ASM')[0][0],
                           'Entropy': shannon_entropy(img_py),
                           'Species': os.path.basename(rootdir)})
            
# Saving the Morphological Features as csv
ij.IJ.saveAs("Results", "Results.csv") # result save as csv
ij.IJ.run("Clear Results")    

textural_features = pd.DataFrame.from_records(result)

## Combining the Dataset

In [None]:
def get_morphological_features(df, area, perimeter, width, height, major, minor):
    feret_diameter = np.sqrt((4 * df[area]) / np.pi)
    rectangular_AR = df[height] / df[width]
    eccentricity = df[minor] / df[major]
    roundness = (4 * np.pi * df[area]) / (np.power(df[perimeter], 2))
    
    return df.assign(Feret_Diameter = feret_diameter,
                     Rectangular_AR = rectangular_AR,
                     Eccentricity = eccentricity,
                     Roundness = roundness)

In [None]:
morphological_features = pd.read_csv('Results.csv')

X = morphological_features.drop(['BX', 'BY', 'Angle', 'Round'], axis = 1)
X = get_morphological_features(X, "Area", "Perim.", "Width", "Height", "Major", "Minor")
X = pd.concat([X, textural_features], axis = 1)
X.to_csv('Results.csv')

## Importing the Dataset

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
dataset = pd.read_csv('Results.csv')

X = dataset.iloc[:, :-1]
X = X.iloc[:, 2:]
y = dataset.iloc[:, -1]

## Encoding Dependent Variable

In [3]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

## Training and Testing Set Split

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 7)

## Feature Scaling

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Hyperparameter Tuning - SVM

In [6]:
%%time

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

parameters = {'C': [10, 20, 30],
              'gamma': ['scale', 1, 5, 'auto'],
              'coef0': [9, 5],
              'kernel': ['linear'],
              'probability': [True],
              'class_weight': ['balanced'],
              'break_ties': [True]}

model = SVC(cache_size = 1500)

grid_search = GridSearchCV(model,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           n_jobs = -1,
                           cv = 10,
                           verbose = 3)

grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Fitting 10 folds for each of 24 candidates, totalling 240 fits
Best Accuracy: 64.73 %
Best Parameters: {'C': 20, 'break_ties': True, 'class_weight': 'balanced', 'coef0': 9, 'gamma': 'scale', 'kernel': 'linear', 'probability': True}
CPU times: total: 6.03 s
Wall time: 8min 14s


## Classification Report - SVM

In [7]:
from sklearn.metrics import classification_report
best_model = grid_search

y_pred = best_model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.70      0.71      0.71       114
           1       0.76      0.78      0.77        97
           2       0.50      0.48      0.49       100

    accuracy                           0.66       311
   macro avg       0.65      0.66      0.66       311
weighted avg       0.66      0.66      0.66       311



## Hyperparameter Tuning - GaussianNB

In [8]:
%%time

from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV

parameters = {'var_smoothing': [0.000000001, 0.1, 1, 3, 5]}
model = GaussianNB()

grid_search = GridSearchCV(model,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           n_jobs = -1,
                           cv = 10,
                           verbose = 3)

grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Fitting 10 folds for each of 5 candidates, totalling 50 fits
Best Accuracy: 43.80 %
Best Parameters: {'var_smoothing': 1e-09}
CPU times: total: 125 ms
Wall time: 215 ms


## Classification Report - GaussianNB

In [9]:
from sklearn.metrics import classification_report
best_model = grid_search

y_pred = best_model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.34      0.10      0.15       114
           1       0.45      0.71      0.55        97
           2       0.35      0.44      0.39       100

    accuracy                           0.40       311
   macro avg       0.38      0.42      0.36       311
weighted avg       0.38      0.40      0.35       311



## Hyperparameter Tuning - KNN

In [37]:
%%time

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

parameters = {'n_neighbors': [1, 3, 5, 10],
              'weights': ['uniform', 'distance', None],
              'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
              'leaf_size': [1, 15, 30, 35],
              'p': [1, 2, 5]}

model = KNeighborsClassifier()
grid_search = GridSearchCV(model,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           n_jobs = -1,
                           cv = 10,
                           verbose = 3)

grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Fitting 10 folds for each of 576 candidates, totalling 5760 fits
Best Accuracy: 39.94 %
Best Parameters: {'algorithm': 'auto', 'leaf_size': 1, 'n_neighbors': 10, 'p': 5, 'weights': 'uniform'}
CPU times: total: 9.58 s
Wall time: 3min 6s


## Classification Report - KNN

In [38]:
from sklearn.metrics import classification_report
best_model = grid_search

y_pred = best_model.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.42      0.46      0.44       114
           1       0.44      0.48      0.46        97
           2       0.30      0.25      0.27       100

    accuracy                           0.40       311
   macro avg       0.39      0.40      0.39       311
weighted avg       0.39      0.40      0.39       311

