In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from skimage.feature import canny
from sklearn.preprocessing import LabelEncoder

In [2]:
# EDGE BASED SEGMENTATION
def preprocess1(image):
  # resizing
  image = cv2.resize(image,(96,96))
  # gray scaling
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  # log transformation
  c=0
  if np.max(image) == 0:
        c=0
  else:
        c = 255 / np.log(1 + np.max(image)) 
  log_image = c * (np.log(image + 1)) 
  log_image = np.array(log_image, dtype = np.uint8)  
  # canny edge detection
  image = canny(log_image)
  return image

In [3]:
rootdir = 'massey_dataset'
images = []
labels = []

for img in os.listdir(rootdir):
    label=img.split('_')[1]
    img = os.path.join(rootdir, img)
    image = cv2.imread(img)
    image = preprocess1(image)
    images.append(image)
    labels.append(label)
    
encoder = LabelEncoder()
labels = encoder.fit_transform(labels)


In [4]:
import lightgbm as lgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [10]:
X = np.array(images)
y = np.array(labels) 
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=80)
X = sklearn_pca.fit_transform(X_std)

In [11]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 5}
Best score:  0.8732954545454545
              precision    recall  f1-score   support

           0       0.83      0.96      0.89        25
           1       0.84      0.95      0.89        22
           2       0.95      1.00      0.98        20
           3       1.00      0.96      0.98        23
           4       0.89      0.81      0.85        21
           5       0.95      0.90      0.93        21
           6       0.80      0.95      0.87        21
           7       0.76      0.89      0.82        18
           8       0.88      0.79      0.83        19
           9       0.94      0.94      0.94        17
          10       0.76      0.93      0.84        14
          11       0.92      0.85      0.88        26
          12       0.94      0.88      0.91        17
          13       0.71      0.85      0.77        20
          14       1.00      0.95      0.98        21
       

In [12]:
X = np.array(images)
y = np.array(labels) 
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=100)
X = sklearn_pca.fit_transform(X_std)

In [13]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 5}
Best score:  0.8693181818181819
              precision    recall  f1-score   support

           0       0.82      0.92      0.87        25
           1       0.95      0.95      0.95        22
           2       0.86      0.90      0.88        20
           3       1.00      0.91      0.95        23
           4       0.84      0.76      0.80        21
           5       0.90      0.90      0.90        21
           6       0.88      1.00      0.93        21
           7       0.80      0.89      0.84        18
           8       0.88      0.79      0.83        19
           9       0.94      0.88      0.91        17
          10       0.82      1.00      0.90        14
          11       0.96      0.88      0.92        26
          12       0.94      0.94      0.94        17
          13       0.74      0.70      0.72        20
          14       1.00      1.00      1.00        21
       

In [14]:
X = np.array(images)
y = np.array(labels) 
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=150)
X = sklearn_pca.fit_transform(X_std)

In [15]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 5}
Best score:  0.8732954545454545
              precision    recall  f1-score   support

           0       0.91      0.84      0.87        25
           1       0.95      0.95      0.95        22
           2       0.83      0.95      0.88        20
           3       1.00      0.96      0.98        23
           4       0.90      0.86      0.88        21
           5       0.90      0.90      0.90        21
           6       0.83      0.90      0.86        21
           7       0.79      0.83      0.81        18
           8       0.71      0.79      0.75        19
           9       0.83      0.88      0.86        17
          10       0.88      1.00      0.93        14
          11       0.92      0.88      0.90        26
          12       0.94      0.88      0.91        17
          13       0.67      0.80      0.73        20
          14       1.00      1.00      1.00        21
       

In [16]:
X = np.array(images)
y = np.array(labels) 
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=200)
X = sklearn_pca.fit_transform(X_std)

In [17]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 5}
Best score:  0.8744318181818181
              precision    recall  f1-score   support

           0       0.92      0.88      0.90        25
           1       0.95      0.95      0.95        22
           2       0.78      0.90      0.84        20
           3       1.00      0.96      0.98        23
           4       0.90      0.86      0.88        21
           5       0.95      0.90      0.93        21
           6       0.90      0.86      0.88        21
           7       0.83      0.83      0.83        18
           8       0.80      0.84      0.82        19
           9       0.89      0.94      0.91        17
          10       0.78      1.00      0.88        14
          11       0.92      0.88      0.90        26
          12       0.94      0.88      0.91        17
          13       0.76      0.80      0.78        20
          14       1.00      1.00      1.00        21
       

In [18]:
X = np.array(images)
y = np.array(labels) 
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=120)
X = sklearn_pca.fit_transform(X_std)

In [19]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 5}
Best score:  0.8732954545454545
              precision    recall  f1-score   support

           0       0.84      0.84      0.84        25
           1       1.00      0.95      0.98        22
           2       0.90      0.90      0.90        20
           3       0.95      0.91      0.93        23
           4       0.81      0.81      0.81        21
           5       0.90      0.90      0.90        21
           6       0.95      0.90      0.93        21
           7       0.79      0.83      0.81        18
           8       0.88      0.79      0.83        19
           9       0.89      0.94      0.91        17
          10       0.93      0.93      0.93        14
          11       0.92      0.88      0.90        26
          12       0.89      0.94      0.91        17
          13       0.68      0.85      0.76        20
          14       1.00      1.00      1.00        21
       

In [20]:
X = np.array(images)
y = np.array(labels) 
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=110)
X = sklearn_pca.fit_transform(X_std)

In [21]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 5}
Best score:  0.8761363636363637
              precision    recall  f1-score   support

           0       1.00      0.80      0.89        25
           1       0.95      0.95      0.95        22
           2       0.79      0.95      0.86        20
           3       1.00      0.96      0.98        23
           4       0.89      0.81      0.85        21
           5       0.90      0.90      0.90        21
           6       0.71      0.81      0.76        21
           7       0.82      0.78      0.80        18
           8       0.84      0.84      0.84        19
           9       0.84      0.94      0.89        17
          10       1.00      0.93      0.96        14
          11       0.92      0.88      0.90        26
          12       0.84      0.94      0.89        17
          13       0.71      0.85      0.77        20
          14       1.00      1.00      1.00        21
       

In [5]:
X = np.array(images)
y = np.array(labels) 
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=300)
X = sklearn_pca.fit_transform(X_std)

In [6]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 5}
Best score:  0.8659090909090909
              precision    recall  f1-score   support

           0       0.92      0.96      0.94        25
           1       0.95      0.95      0.95        22
           2       0.81      0.85      0.83        20
           3       0.95      0.91      0.93        23
           4       0.89      0.76      0.82        21
           5       0.90      0.90      0.90        21
           6       0.78      0.86      0.82        21
           7       0.83      0.83      0.83        18
           8       0.76      0.84      0.80        19
           9       0.89      0.94      0.91        17
          10       0.88      1.00      0.93        14
          11       0.96      0.92      0.94        26
          12       0.94      0.88      0.91        17
          13       0.75      0.90      0.82        20
          14       0.95      0.95      0.95        21
       