# **Parkinson Detection using Image Classification**

In [None]:
from skimage import feature
from imutils import paths
import numpy as np
import argparse
import cv2 as cv
import pandas as pd
from google.colab.patches import cv2_imshow
import os

### **Metode Preprocessing**

In [None]:
def preprocess(image, image_size=128):
  image = cv.cvtColor(image, cv.COLOR_BGR2GRAY) #Ubah menjadi greyscale
  image = cv.resize(image, (image_size, image_size)) #resize gambar menjadi suatu ukuran (default = 128)

  image = cv.threshold(image, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)[1] #melakukan thresholding dan mengambil gambar hasil thresholding

  return image

### **Feature Extraction**

Mengekstrak feature dari gambar dengan image descriptor

1. Histogram of Oriented Gradients (HOG)
2. Local Binary Pattern (LBP)

In [None]:
# From: https://www.pyimagesearch.com/2019/04/29/detecting-parkinsons-disease-with-opencv-computer-vision-and-the-spiral-wave-test/
def quantify_image_hog(image): #HOG Features
  features = feature.hog(image, orientations=9, pixels_per_cell=(10, 10), cells_per_block=(2, 2), transform_sqrt=True, block_norm='L1')

  return features

In [None]:
# From: https://www.pyimagesearch.com/2015/12/07/local-binary-patterns-with-python-opencv/
def quantify_image_lbp(image):
  features = feature.local_binary_pattern(image, 24, 8, method='uniform')
  
  (hist, _) = np.histogram(features.flatten(), bins=np.arange(0, 26), range=(0, 26))

  hist = hist.astype('float')
  hist /= (hist.sum() + 1e-7)

  return hist

In [None]:
#Testing image

image_test = cv.imread('drive/My Drive/Colab Test (Bootcamp ML 2020)/parkinsons/wave/training/parkinson/V01PO02.png')

image_test_preprocessed = preprocess(image_test, image_size=128)

cv2_imshow(image_test)
cv2_imshow(image_test_preprocessed)

features_hog = quantify_image_hog(image_test_preprocessed)
features_lbp = quantify_image_lbp(image_test_preprocessed)

print('HOG')
print(features_hog, len(features_hog))
print('LBP')
print(features_lbp, len(features_lbp))

### **Split Dataset**

In [None]:
def load_split(path, image_size=200, extraction_method='hog'):
  image_paths = list(paths.list_images(path))
  data = []
  labels = []

  for image_path in image_paths:
    label = image_path.split(os.path.sep)[-2]

    image = cv.imread(image_path)
    image = preprocess(image, image_size=image_size)

    if extraction_method == 'hog':
      features = quantify_image_hog(image)
    elif extraction_method == 'lbp':
      features = quantify_image_lbp(image)

    data.append(features)
    labels.append(label)

  return (np.array(data), np.array(labels))

### **Load Dataset**

In [None]:
dataset_dir = 'drive/My Drive/Colab Test (Bootcamp ML 2020)/parkinsons/wave'

training_path = os.path.join(dataset_dir, 'training')
testing_path = os.path.join(dataset_dir, 'testing')

## **HOG dengan Image Size 128**

In [None]:
#HOG 128
resize_image_size_128 = 128
extraction_method = 'hog'

In [None]:
(X_train, y_train) = load_split(training_path, image_size=resize_image_size_128, extraction_method=extraction_method)
(X_test, y_test) = load_split(testing_path, image_size=resize_image_size_128, extraction_method=extraction_method)

print('Data berhasil diupload!')

**Label Encoding**

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

In [None]:
y_train

In [None]:
y_test

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score

**Machine Learning Model**

**1. LinearSVC**

In [None]:
from sklearn.svm import LinearSVC

lin_svc = LinearSVC()
lin_svc.get_params()

In [None]:
C = [int(x) for x in np.linspace(0, 50, 25)]

In [None]:
linsvc_grid = {'C' : C}

print(linsvc_grid)

In [None]:
linsvc = LinearSVC()
linsvc_random = RandomizedSearchCV(estimator=linsvc, param_distributions=linsvc_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
linsvc_random.fit(X_train, y_train)

In [None]:
linsvc_random.best_params_

In [None]:
lin_svc = LinearSVC(C=2)
lin_svc.fit(X_train, y_train)
prediksi_lin_svc_test = lin_svc.predict(X_test)
cm = confusion_matrix(y_test, prediksi_lin_svc_test)
cr = classification_report(y_test, prediksi_lin_svc_test)
print('Nilai akurasi LinearSVC pada testing data adalah {:.3f}'.format(lin_svc.score(X_test, y_test)))
print('Nilai F1 Score LinearSVC pada testing data adalah {:.3f}'.format(f1_score(y_test, prediksi_lin_svc_test, average='macro')))
print('Nilai Precision Score LinearSVC pada testing data adalah {:.3f}'.format(precision_score(y_test, prediksi_lin_svc_test, average='macro')))
print('Nilai Recall Score LinearSVC pada testing data adalah {:.3f}'.format(recall_score(y_test, prediksi_lin_svc_test, average='macro')))
print()
print(cr)
print(cm)
print('==================================================================================================================================================================')

**2. Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()
logreg.get_params()

In [None]:
penalty = ['l1', 'l2']
C = np.logspace(-4,4,20)

In [None]:
logreg_grid = {'penalty' : penalty,
               'C' : C}

print(logreg_grid)

In [None]:
logreg = LogisticRegression()
log_random = RandomizedSearchCV(estimator=logreg, param_distributions=logreg_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
log_random.fit(X_train, y_train)

In [None]:
log_random.best_params_

In [None]:
logreg = LogisticRegression(C=1438.44988828766, penalty='l2')
logreg.fit(X_train, y_train)
prediksi_logreg = logreg.predict(X_test)
cm = confusion_matrix(y_test, prediksi_logreg)
cr = classification_report(y_test, prediksi_logreg)
print('Nilai akurasi Logistic Regression pada testing data adalah {:.3f}'.format(logreg.score(X_test, y_test)))
print('Nilai F1 Score Logistic Regression pada testing data adalah {:.3f}'.format(f1_score(y_test, prediksi_logreg, average='macro')))
print('Nilai Precision Score Logistic Regression pada testing data adalah {:.3f}'.format(precision_score(y_test, prediksi_logreg, average='macro')))
print('Nilai Recall Score Logistic Regression pada testing data adalah {:.3f}'.format(recall_score(y_test, prediksi_logreg, average='macro')))
print()
print(cr)
print(cm)

**3. Random Forest**

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier()
rf.get_params()

In [None]:
from sklearn.model_selection import RandomizedSearchCV

In [None]:
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['auto', 'sqrt']
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

In [None]:
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

print(random_grid)

In [None]:
rf = RandomForestClassifier()
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
rf_random.fit(X_train, y_train)

In [None]:
rf_random.best_params_

In [None]:
rf = RandomForestClassifier(n_estimators=200, bootstrap=True, max_depth=90, max_features='auto', min_samples_leaf=4, min_samples_split=5)
rf.fit(X_train, y_train)
prediksi_rf = rf.predict(X_test)
cm = confusion_matrix(y_test, prediksi_rf)
cr = classification_report(y_test, prediksi_rf)
print('Nilai akurasi Random Forest pada testing data adalah {:.3f}'.format(rf.score(X_test, y_test)))
print('Nilai F1 Score Random Forest pada testing data adalah {:.3f}'.format(f1_score(y_test, prediksi_rf, average='macro')))
print('Nilai Precision Score Random Forest pada testing data adalah {:.3f}'.format(precision_score(y_test, prediksi_rf, average='macro')))
print('Nilai Recall Score Random Forest pada testing data adalah {:.3f}'.format(recall_score(y_test, prediksi_rf, average='macro')))
print()
print(cr)
print(cm)

Algoritma Random Forest memberikan nilai akurasi tertinggi yaitu 0.733 untuk dataset parkinson dengan image size 128 dan image descriptor HOG 

In [None]:
testing_paths = list(paths.list_images(testing_path))
images = []
for testingpath in testing_paths[:25]:
  image = cv.imread(testingpath)
  output = image.copy()
  output = cv.resize(output, (128, 128))

  image = preprocess(image, image_size=resize_image_size_128)

  if extraction_method == 'hog':
    features = quantify_image_hog(image)
  elif extraction_method == 'lbp':
    features = quantify_image_lbp(image)

  preds = rf.predict([features])
  label = le.inverse_transform(preds)[0]

  color = (0, 255, 0) if label == 'healthy' else (0, 0, 255)
  cv.putText(output, label, (3, 20), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
  images.append(output)

In [None]:
from imutils import build_montages

montage = build_montages(images, (128, 128), (5, 5))[0]

cv2_imshow(montage)

## **HOG dengan Image Size 300**

In [None]:
#HOG 300
resize_image_size_300 = 300
extraction_method = 'hog'

**Spilt Dataset**

In [None]:
(X_train, y_train) = load_split(training_path, image_size=resize_image_size_300, extraction_method=extraction_method)
(X_test, y_test) = load_split(testing_path, image_size=resize_image_size_300, extraction_method=extraction_method)

print('Data berhasil diupload!')

**Label Encoding**

In [None]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

In [None]:
y_train

In [None]:
y_test

**Machine Learning Model**

**1. Linear SVC**

In [None]:
C = [int(x) for x in np.linspace(0, 50, 25)]

In [None]:
linsvc_grid = {'C' : C}

print(linsvc_grid)

In [None]:
linsvc = LinearSVC()
linsvc_random = RandomizedSearchCV(estimator=linsvc, param_distributions=linsvc_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
linsvc_random.fit(X_train, y_train)

In [None]:
linsvc_random.best_params_

In [None]:
lin_svc = LinearSVC(C=2)
lin_svc.fit(X_train, y_train)
prediksi_lin_svc_test = lin_svc.predict(X_test)
cm = confusion_matrix(y_test, prediksi_lin_svc_test)
cr = classification_report(y_test, prediksi_lin_svc_test)
print('Nilai akurasi LinearSVC pada testing data adalah {:.3f}'.format(lin_svc.score(X_test, y_test)))
print('Nilai F1 Score LinearSVC pada testing data adalah {:.3f}'.format(f1_score(y_test, prediksi_lin_svc_test, average='macro')))
print('Nilai Precision Score LinearSVC pada testing data adalah {:.3f}'.format(precision_score(y_test, prediksi_lin_svc_test, average='macro')))
print('Nilai Recall Score LinearSVC pada testing data adalah {:.3f}'.format(recall_score(y_test, prediksi_lin_svc_test, average='macro')))
print()
print(cr)
print(cm)
print('==================================================================================================================================================================')

**2. Logistic Regression**

In [None]:
logreg = LogisticRegression()
logreg.get_params()

In [None]:
penalty = ['l1', 'l2']
C = np.logspace(-4,4,20)

In [None]:
logreg_grid = {'penalty' : penalty,
               'C' : C}

print(logreg_grid)

In [None]:
logreg = LogisticRegression()
log_random = RandomizedSearchCV(estimator=logreg, param_distributions=logreg_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
log_random.fit(X_train, y_train)

In [None]:
log_random.best_params_

In [None]:
logreg = LogisticRegression(C=1438.44988828766, penalty='l2')
logreg.fit(X_train, y_train)
prediksi_logreg = logreg.predict(X_test)
cm = confusion_matrix(y_test, prediksi_logreg)
cr = classification_report(y_test, prediksi_logreg)
print('Nilai akurasi Logistic Regression dengan pada testing data adalah {:.3f}'.format(logreg.score(X_test, y_test)))
print('Nilai F1 Score Logistic Regression dengan pada testing data adalah {:.3f}'.format(f1_score(y_test, prediksi_logreg, average='macro')))
print('Nilai Precision Score Logistic Regression dengan pada testing data adalah {:.3f}'.format(precision_score(y_test, prediksi_logreg, average='macro')))
print('Nilai Recall Score Logistic Regression dengan pada testing data adalah {:.3f}'.format(recall_score(y_test, prediksi_logreg, average='macro')))
print()
print(cr)
print(cm)

**3. Random Forest**

In [None]:
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['auto', 'sqrt']
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

In [None]:
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

print(random_grid)

In [None]:
rf = RandomForestClassifier()
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
rf_random.fit(X_train, y_train)

In [None]:
rf_random.best_params_

In [None]:
rf = RandomForestClassifier(bootstrap=True, max_depth=50, max_features='sqrt', min_samples_leaf=1, min_samples_split=10, n_estimators=1200, random_state=10)
rf.fit(X_train, y_train)
prediksi_rf = rf.predict(X_test)
cm = confusion_matrix(y_test, prediksi_rf)
cr = classification_report(y_test, prediksi_rf)
print('Nilai akurasi Random Forest pada testing data adalah {:.3f}'.format(rf.score(X_test, y_test)))
print('Nilai F1 Score Random Forest pada testing data adalah {:.3f}'.format(f1_score(y_test, prediksi_rf, average='macro')))
print('Nilai Precision Score Random Forest pada testing data adalah {:.3f}'.format(precision_score(y_test, prediksi_rf, average='macro')))
print('Nilai Recall Score Random Forest pada testing data adalah {:.3f}'.format(recall_score(y_test, prediksi_rf, average='macro')))
print()
print(cr)
print(cm)

Model dengan algoritma LinearSVC dan Logistic Regression memberikan nilai akurasi terbaik yakni sebesar 0.733

In [None]:
testing_paths = list(paths.list_images(testing_path))
images = []
for testingpath in testing_paths[:25]:
  image = cv.imread(testingpath)
  output = image.copy()
  output = cv.resize(output, (300, 300))

  image = preprocess(image, image_size=resize_image_size_300)

  if extraction_method == 'hog':
    features = quantify_image_hog(image)
  elif extraction_method == 'lbp':
    features = quantify_image_lbp(image)

  preds = lin_svc.predict([features])
  label = le.inverse_transform(preds)[0]

  color = (0, 255, 0) if label == 'healthy' else (0, 0, 255)
  cv.putText(output, label, (3, 20), cv.FONT_HERSHEY_SIMPLEX, 1, color, 3)
  images.append(output)

In [None]:
from imutils import build_montages

montage = build_montages(images, (300, 300), (5, 5))[0]

cv2_imshow(montage)

## **LBP dengan Image Size 128**

In [None]:
#LBP 128
resize_image_size_lbp_128 = 128
extraction_method_lbp = 'lbp'

In [None]:
(X_train_lbp, y_train_lbp) = load_split(training_path, image_size=resize_image_size_lbp_128, extraction_method=extraction_method_lbp)
(X_test_lbp, y_test_lbp) = load_split(testing_path, image_size=resize_image_size_lbp_128, extraction_method=extraction_method_lbp)

print('Data sudah diupload!')

**Label Encoding**

In [None]:
le = LabelEncoder()
y_train_lbp = le.fit_transform(y_train_lbp)
y_test_lbp = le.transform(y_test_lbp)

In [None]:
y_train_lbp

In [None]:
y_test_lbp

**Machine Learning Model**

**1. LinearSVC**

In [None]:
lin_svc_lbp = LinearSVC()
lin_svc_lbp.get_params()

In [None]:
C = [int(x) for x in np.linspace(0, 50, 25)]

In [None]:
linsvc_grid = {'C' : C}

print(linsvc_grid)

In [None]:
linsvc_lbp = LinearSVC()
lin_random_lbp = RandomizedSearchCV(estimator=linsvc_lbp, param_distributions=linsvc_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
lin_random_lbp.fit(X_train_lbp, y_train_lbp)

In [None]:
lin_random_lbp.best_params_

In [None]:
lin_svc_lbp = LinearSVC(C=2, random_state=10)
lin_svc_lbp.fit(X_train_lbp, y_train_lbp)
prediksi_lin_svc_lbp = lin_svc_lbp.predict(X_test_lbp)
cm_lbp = confusion_matrix(y_test_lbp, prediksi_lin_svc_lbp)
cr_lbp = classification_report(y_test, prediksi_lin_svc_lbp)
print('Nilai akurasi LinearSVC pada testing data adalah {:.3f}'.format(lin_svc_lbp.score(X_test_lbp, y_test_lbp)))
print('Nilai F1 Score LinearSVC pada testing data adalah {:.3f}'.format(f1_score(y_test_lbp, prediksi_lin_svc_lbp, average='macro')))
print('Nilai Precision Score LinearSVC pada testing data adalah {:.3f}'.format(precision_score(y_test_lbp, prediksi_lin_svc_lbp, average='macro')))
print('Nilai Recall Score LinearSVC pada testing data adalah {:.3f}'.format(recall_score(y_test_lbp, prediksi_lin_svc_lbp, average='macro')))
print()
print(cr_lbp)
print(cm_lbp)

**2. Logistic Regression**

In [None]:
logreg_lbp = LogisticRegression()
logreg_lbp.get_params()

In [None]:
penalty = ['l1', 'l2']
C = np.logspace(-4,4,20)

In [None]:
log_grid = {'penalty': penalty,
            'C': C}

print(log_grid)

In [None]:
logreg_lbp = LogisticRegression()
log_random_lbp = RandomizedSearchCV(estimator=logreg_lbp, param_distributions=log_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
log_random_lbp.fit(X_train_lbp, y_train_lbp)

In [None]:
log_random_lbp.best_params_

In [None]:
logreg_lbp = LogisticRegression(solver='lbfgs', C=78.47599703514607, penalty='l2')
logreg_lbp.fit(X_train_lbp, y_train_lbp)
prediksi_logreg_lbp = logreg_lbp.predict(X_test_lbp)
cm_lbp = confusion_matrix(y_test_lbp, prediksi_logreg_lbp)
cr_lbp = classification_report(y_test_lbp, prediksi_logreg_lbp)
print('Nilai akurasi Logistic Regression pada testing data adalah {:.3f}'.format(logreg_lbp.score(X_test_lbp, y_test_lbp)))
print('Nilai F1 Score Logistic Regression pada testing data adalah {:.3f}'.format(f1_score(y_test_lbp, prediksi_logreg_lbp, average='macro')))
print('Nilai Precision Score Logistic Regression pada testing data adalah {:.3f}'.format(precision_score(y_test_lbp, prediksi_logreg_lbp, average='macro')))
print('Nilai Recall Score Logistic Regression pada testing data adalah {:.3f}'.format(recall_score(y_test_lbp, prediksi_logreg_lbp, average='macro')))
print()
print(cr_lbp)
print(cm_lbp)  

**3. Random Forest Classifier**

In [None]:
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['auto', 'sqrt']
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

In [None]:
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

print(random_grid)

In [None]:
rf_lbp = RandomForestClassifier()
rf_random_lbp = RandomizedSearchCV(estimator=rf_lbp, param_distributions=random_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
rf_random_lbp.fit(X_train_lbp, y_train_lbp)

In [None]:
rf_random_lbp.best_params_

In [None]:
rf_lbp = RandomForestClassifier(bootstrap=True, max_depth=50, max_features='auto', min_samples_leaf=2, min_samples_split=5, n_estimators=400, random_state=10)
rf_lbp.fit(X_train_lbp, y_train_lbp)
prediksi_rf_lbp = rf_lbp.predict(X_test_lbp)
cm_lbp = confusion_matrix(y_test_lbp, prediksi_rf_lbp)
cr_lbp = classification_report(y_test_lbp, prediksi_rf_lbp)
print('Nilai akurasi Random Forest pada testing data adalah {:.3f}'.format(rf_lbp.score(X_test_lbp, y_test_lbp)))
print('Nilai F1 Score Random Forest pada testing data adalah {:.3f}'.format(f1_score(y_test_lbp, prediksi_rf_lbp, average='macro')))
print('Nilai Precision Score Random Forest pada testing data adalah {:.3f}'.format(precision_score(y_test_lbp, prediksi_rf_lbp, average='macro')))
print('Nilai Recall Score Random Forest pada testing data adalah {:.3f}'.format(recall_score(y_test_lbp, prediksi_rf_lbp, average='macro')))
print()
print(cr_lbp)
print(cm_lbp)

Untuk ketiga algoritma memberikan nilai akurasi yang sama yakni 0.5, yang mana bukanlah hasil yang baik, akan tetapi algoritma Random Forest memberikan nilai f1, precision, dan recall yang lebih baik dibandingkan LinearSVC dan Logistic Regression sehingga algoritma Random Forest akan dijadikan sebagai algoritma terbaik

In [None]:
testing_paths = list(paths.list_images(testing_path))
images = []
for testingpath in testing_paths[:30]:
  image = cv.imread(testingpath)
  output = image.copy()
  output = cv.resize(output, (128, 128))

  image = preprocess(image, image_size=resize_image_size_lbp_128)

  if extraction_method_lbp == 'hog':
    features = quantify_image_hog(image)
  elif extraction_method_lbp == 'lbp':
    features = quantify_image_lbp(image)

  preds = rf_lbp.predict([features])
  label = le.inverse_transform(preds)[0]

  color = (0, 255, 0) if label == 'healthy' else (0, 0, 255)
  cv.putText(output, label, (3, 20), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
  images.append(output)

In [None]:
from imutils import build_montages

montage = build_montages(images, (128, 128), (5, 5))[0]

cv2_imshow(montage)

## **LBP dengan Image Size 300**

In [None]:
#LBP 300
resize_image_size_lbp_300 = 300
extraction_method_lbp = 'lbp'

In [None]:
(X_train_lbp, y_train_lbp) = load_split(training_path, image_size=resize_image_size_lbp_300, extraction_method=extraction_method_lbp)
(X_test_lbp, y_test_lbp) = load_split(testing_path, image_size=resize_image_size_lbp_300, extraction_method=extraction_method_lbp)

print('Data berhasil diupload!')

**Label Encoding**

In [None]:
le = LabelEncoder()
y_train_lbp = le.fit_transform(y_train_lbp)
y_test_lbp = le.transform(y_test_lbp)

In [None]:
y_train_lbp

In [None]:
y_test_lbp

**1. LinearSVC**

In [None]:
linsvc_lbp = LinearSVC()
linsvc_lbp.get_params()

In [None]:
C = [int(x) for x in np.linspace(0, 50, 25)]

In [None]:
linsvc_grid = {'C' : C}

In [None]:
linsvc_lbp = LinearSVC()
lin_random_lbp = RandomizedSearchCV(estimator=linsvc_lbp, param_distributions=linsvc_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
lin_random_lbp.fit(X_train_lbp, y_train_lbp)

In [None]:
lin_random_lbp.best_params_

In [None]:
lin_svc_lbp = LinearSVC(C=43, random_state=10)
lin_svc_lbp.fit(X_train_lbp, y_train_lbp)
prediksi_lin_svc_lbp = lin_svc_lbp.predict(X_test_lbp)
cm_lbp = confusion_matrix(y_test_lbp, prediksi_lin_svc_lbp)
cr_lbp = classification_report(y_test, prediksi_lin_svc_lbp)
print('Nilai akurasi LinearSVC pada testing data adalah {:.3f}'.format(lin_svc_lbp.score(X_test_lbp, y_test_lbp)))
print('Nilai F1 Score LinearSVC pada testing data adalah {:.3f}'.format(f1_score(y_test_lbp, prediksi_lin_svc_lbp, average='macro')))
print('Nilai Precision Score LinearSVC pada testing data adalah {:.3f}'.format(precision_score(y_test_lbp, prediksi_lin_svc_lbp, average='macro')))
print('Nilai Recall Score LinearSVC pada testing data adalah {:.3f}'.format(recall_score(y_test_lbp, prediksi_lin_svc_lbp, average='macro')))
print()
print(cr_lbp)
print(cm_lbp)

**2. Logistic Regression**

In [None]:
logreg_lbp = LogisticRegression()
logreg_lbp.get_params()

In [None]:
penalty = ['l1', 'l2']
C = np.logspace(-4,4,20)

In [None]:
log_grid = {'penalty': penalty, 'C': C}

In [None]:
logreg_lbp = LogisticRegression()
log_random_lbp = RandomizedSearchCV(estimator=logreg_lbp, param_distributions=log_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
log_random_lbp.fit(X_train_lbp, y_train_lbp)

In [None]:
log_random_lbp.best_params_

In [None]:
logreg_lbp = LogisticRegression(C=3792.690190732246, penalty='l2')
logreg_lbp.fit(X_train_lbp, y_train_lbp)
prediksi_logreg_lbp = logreg_lbp.predict(X_test_lbp)
cm_lbp = confusion_matrix(y_test_lbp, prediksi_logreg_lbp)
cr_lbp = classification_report(y_test_lbp, prediksi_logreg_lbp)
print('Nilai akurasi Logistic Regression pada testing data adalah {:.3f}'.format(logreg_lbp.score(X_test_lbp, y_test_lbp)))
print('Nilai F1 Score Logistic Regression pada testing data adalah {:.3f}'.format(f1_score(y_test_lbp, prediksi_logreg_lbp, average='macro')))
print('Nilai Precision Score Logistic Regression pada testing data adalah {:.3f}'.format(precision_score(y_test_lbp, prediksi_logreg_lbp, average='macro')))
print('Nilai Recall Score Logistic Regression pada testing data adalah {:.3f}'.format(recall_score(y_test_lbp, prediksi_logreg_lbp, average='macro')))
print()
print(cr_lbp)
print(cm_lbp)  

**3. Random Forest Classifier**

In [None]:
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_features = ['auto', 'sqrt']
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

In [None]:
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

print(random_grid)

In [None]:
rf_lbp = RandomForestClassifier()
rf_random_lbp = RandomizedSearchCV(estimator=rf_lbp, param_distributions=random_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
rf_random_lbp.fit(X_train_lbp, y_train_lbp)

In [None]:
rf_random_lbp.best_params_

In [None]:
rf_lbp = RandomForestClassifier(bootstrap=False, max_depth=60, max_features='sqrt', min_samples_leaf=2, min_samples_split=5, n_estimators=1200, random_state=10)
rf_lbp.fit(X_train_lbp, y_train_lbp)
prediksi_rf_lbp = rf_lbp.predict(X_test_lbp)
cm_lbp = confusion_matrix(y_test_lbp, prediksi_rf_lbp)
cr_lbp = classification_report(y_test_lbp, prediksi_rf_lbp)
print('Nilai akurasi Random Forest pada testing data adalah {:.3f}'.format(rf_lbp.score(X_test_lbp, y_test_lbp)))
print('Nilai F1 Score Random Forest pada testing data adalah {:.3f}'.format(f1_score(y_test_lbp, prediksi_rf_lbp, average='macro')))
print('Nilai Precision Score Random Forest pada testing data adalah {:.3f}'.format(precision_score(y_test_lbp, prediksi_rf_lbp, average='macro')))
print('Nilai Recall Score Random Forest pada testing data adalah {:.3f}'.format(recall_score(y_test_lbp, prediksi_rf_lbp, average='macro')))
print()
print(cr_lbp)
print(cm_lbp)

Algoritma Logistic Regression dan Random Forest memberikan nilai akurasi terbaik, begitu pula dengan metric-metric lainnya pun algoritma Random Forest dan Logistic Regression yang hasilnya paling baik

In [None]:
testing_paths = list(paths.list_images(testing_path))
images = []
for testingpath in testing_paths[:30]:
  image = cv.imread(testingpath)
  output = image.copy()
  output = cv.resize(output, (300, 300))

  image = preprocess(image, image_size=resize_image_size_lbp_300)

  if extraction_method_lbp == 'hog':
    features = quantify_image_hog(image)
  elif extraction_method_lbp == 'lbp':
    features = quantify_image_lbp(image)

  preds = logreg_lbp.predict([features])
  label = le.inverse_transform(preds)[0]

  color = (0, 255, 0) if label == 'healthy' else (0, 0, 255)
  cv.putText(output, label, (3, 20), cv.FONT_HERSHEY_SIMPLEX, 0.9, color, 3)
  images.append(output)

In [None]:
from imutils import build_montages

montage = build_montages(images, (300, 300), (5, 5))[0]

cv2_imshow(montage)

### **Kesimpulan**

1. HOG dengan Image size 128 -> Model terbaik adalah Random Forest dengan akurasi 0.733
2. HOG dengan Image size 300 -> Model terbaik adalah LinearSVC dan Logistic Regression dengan akurasi 0.733
3. LBP dengan Image size 128 -> Model Linear SVC, Logistic Regression, dan Random Forest memberikan hasil akurasi yang sama kurang baiknya yaitu 0.5
4. LBP dengan Image size 300 -> Model Logistic Regression dan Random Forest memberikan nilai akurasi terbaik yaitu 0.7

**Model terbaik adalah Random Forest dengan image descriptor HOG dan image size 128 serta LinearSVC dengan image descriptor HOG dan image size 300**

# **Image Classification Parkinson Wave dengan Tambahan Data**

In [None]:
dataset_dir = 'drive/My Drive/Colab Test (Bootcamp ML 2020)/parkinsons/wave'

training_path = os.path.join(dataset_dir, 'training_tambahan')
testing_path = os.path.join(dataset_dir, 'testing')

Akan digunakan Metode Preprocess dengan HOG dan image size 300 dengan algoritma Linear SVC dan Logistic Regression

## **HOG dan Image Size 300 dengan Algoritma LinearSVC dan Logistic Regression**

In [None]:
resize_image_size_300 = 300 
extraction_method = 'hog'

In [None]:
(X_train, y_train) = load_split(training_path, image_size=resize_image_size_300, extraction_method=extraction_method)
(X_test, y_test) = load_split(testing_path, image_size=resize_image_size_300, extraction_method=extraction_method)

print('Data berhasil diupload!')

**Label Encoder**

In [None]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

In [None]:
print(len(y_train))
y_train

In [None]:
print(len(y_test))
y_test

**Machine Learning Model**

**1. LinearSVC**

In [None]:
from sklearn.svm import LinearSVC

In [None]:
linsvc = LinearSVC()
linsvc.get_params()

In [None]:
C = [int(x) for x in np.linspace(0, 50, 25)]

In [None]:
linsvc_grid = {'C' : C}

print(linsvc_grid)

In [None]:
linsvc = LinearSVC()
linsvc_random = RandomizedSearchCV(estimator=linsvc, param_distributions=linsvc_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
linsvc_random.fit(X_train, y_train)

In [None]:
linsvc_random.best_params_

In [None]:
linsvc = LinearSVC(C=2)
linsvc.fit(X_train, y_train)
prediksi_linsvc = linsvc.predict(X_test)
cm = confusion_matrix(y_test, prediksi_linsvc)
cr = classification_report(y_test, prediksi_linsvc)
print('Nilai akurasi LinearSVC pada testing data adalah {:.3f}'.format(linsvc.score(X_test, y_test)))
print('Nilai F1 Score LinearSVC pada testing data adalah {:.3f}'.format(f1_score(y_test, prediksi_linsvc, average='macro')))
print('Nilai Precision Score LinearSVC pada testing data adalah {:.3f}'.format(precision_score(y_test, prediksi_linsvc, average='macro')))
print('Nilai Recall Score LinearSVC pada testing data adalah {:.3f}'.format(recall_score(y_test, prediksi_linsvc, average='macro')))
print()
print(cr)
print(cm)

In [None]:
testing_paths = list(paths.list_images(testing_path))
images = []
for testingpath in testing_paths[:25]:
  image = cv.imread(testingpath)
  output = image.copy()
  output = cv.resize(output, (300, 300))

  image = preprocess(image, image_size=resize_image_size_300)

  if extraction_method == 'hog':
    features = quantify_image_hog(image)
  elif extraction_method == 'lbp':
    features = quantify_image_lbp(image)

  preds = linsvc.predict([features])
  label = le.inverse_transform(preds)[0]

  color = (0, 255, 0) if label == 'healthy' else (0, 0, 255)
  cv.putText(output, label, (3, 20), cv.FONT_HERSHEY_SIMPLEX, 0.7, color, 3)
  images.append(output)

In [None]:
from imutils import build_montages

montage = build_montages(images, (300, 300), (5, 5))[0]

cv2_imshow(montage)

**2. Logistic Regression**

In [None]:
logreg = LogisticRegression()
logreg.get_params()

In [None]:
penalty = ['l1', 'l2']
C = np.logspace(-4,4,20)

In [None]:
log_grid = {'penalty': penalty, 'C': C}

In [None]:
logreg = LogisticRegression()
log_random = RandomizedSearchCV(estimator=logreg, param_distributions=log_grid, n_iter=100, cv=3, verbose=2, random_state=10, n_jobs=-1)
log_random.fit(X_train, y_train)

In [None]:
log_random.best_params_

In [None]:
logreg = LogisticRegression(C=3792.690190732246, penalty='l2')
logreg.fit(X_train, y_train)
prediksi_logreg = logreg.predict(X_test)
cm = confusion_matrix(y_test, prediksi_logreg)
cr = classification_report(y_test, prediksi_logreg)
print('Nilai akurasi Logistic Regression pada testing data adalah {:.3f}'.format(logreg.score(X_test, y_test)))
print('Nilai F1 Score Logistic Regression pada testing data adalah {:.3f}'.format(f1_score(y_test, prediksi_logreg, average='macro')))
print('Nilai Precision Score Logistic Regression pada testing data adalah {:.3f}'.format(precision_score(y_test, prediksi_logreg, average='macro')))
print('Nilai Recall Score Logistic Regression pada testing data adalah {:.3f}'.format(recall_score(y_test, prediksi_logreg, average='macro')))
print()
print(cr)
print(cm)  

In [None]:
testing_paths = list(paths.list_images(testing_path))
images = []
for testingpath in testing_paths[:25]:
  image = cv.imread(testingpath)
  output = image.copy()
  output = cv.resize(output, (300, 300))

  image = preprocess(image, image_size=resize_image_size_300)

  if extraction_method == 'hog':
    features = quantify_image_hog(image)
  elif extraction_method == 'lbp':
    features = quantify_image_lbp(image)

  preds = logreg.predict([features])
  label = le.inverse_transform(preds)[0]

  color = (0, 255, 0) if label == 'healthy' else (0, 0, 255)
  cv.putText(output, label, (3, 20), cv.FONT_HERSHEY_SIMPLEX, 0.7, color, 3)
  images.append(output)

In [None]:
from imutils import build_montages

montage = build_montages(images, (300, 300), (5, 5))[0]

cv2_imshow(montage)

## **Kesimpulan**

**HOG dengan Image size 300** :
1. **LinearSVC** mengalami **peningkatan** akurasi dari **0.733** (sebelum penambahan data training) menjadi **0.767** (setelah penambahan data training)

2. **Logistic Regression** mengalami **peningkatan** akurasi dari **0.733** (sebelum penambahan data training) menjadi **0.767** (setelah penambahan data training)