In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from skimage.feature import canny
from sklearn.preprocessing import LabelEncoder

In [2]:
# EDGE BASED SEGMENTATION
def preprocess1(image):
  # resizing
  image = cv2.resize(image,(96,96))
  # gray scaling
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  # log transformation
  c=0
  if np.max(image) == 0:
        c=0
  else:
        c = 255 / np.log(1 + np.max(image)) 
  log_image = c * (np.log(image + 1)) 
  log_image = np.array(log_image, dtype = np.uint8)  
  # canny edge detection
  image = canny(log_image)
  return image

In [3]:
rootdir = 'asl_alphabet/asl_alphabet_train/asl_alphabet_train'
images = []
labels = []

for folder in os.listdir(rootdir):
    classes = os.path.join(rootdir,folder)
    label = folder
    for file in os.listdir(classes):
        img = os.path.join(classes, file)
        image = cv2.imread(img)
        image = preprocess1(image)
        images.append(image)
        labels.append(label)

encoder = LabelEncoder()
labels = encoder.fit_transform(labels)


  log_image = c * (np.log(image + 1))


In [4]:
X = np.array(images)
y = np.array(labels) 

In [1]:
import lightgbm as lgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [6]:
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=80)
X = sklearn_pca.fit_transform(X_std)

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 20}
Best score:  0.9566338259441707
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       886
           1       0.96      0.99      0.98       881
           2       0.97      0.98      0.98       897
           3       0.97      0.97      0.97       898
           4       0.96      0.95      0.96       951
           5       0.98      0.98      0.98       926
           6       0.98      0.98      0.98       895
           7       0.98      0.99      0.99       851
           8       0.98      0.97      0.97       883
           9       0.99      0.99      0.99       904
          10       0.98      0.98      0.98       907
          11       0.99      0.99      0.99       847
          12       0.96      0.95      0.96       873
          13       0.96      0.95      0.96       892
          14       0.94      0.95      0.95       878
      

In [8]:
# n=80, accuracy=96
# n=100, accuracy=96

In [9]:
X = np.array(images)
y = np.array(labels) 

In [10]:
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=100)
X = sklearn_pca.fit_transform(X_std)

In [11]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 200, 'num_leaves': 20}
Best score:  0.9570935960591133
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       886
           1       0.96      0.99      0.97       881
           2       0.98      0.98      0.98       897
           3       0.97      0.98      0.97       898
           4       0.96      0.96      0.96       951
           5       0.99      0.98      0.98       926
           6       0.98      0.97      0.98       895
           7       0.98      0.99      0.98       851
           8       0.98      0.97      0.97       883
           9       0.99      0.99      0.99       904
          10       0.98      0.98      0.98       907
          11       0.99      0.99      0.99       847
          12       0.95      0.95      0.95       873
          13       0.96      0.95      0.95       892
          14       0.93      0.95      0.94       878
      

In [21]:
X = np.array(images)
y = np.array(labels) 

In [22]:
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=150)
X = sklearn_pca.fit_transform(X_std)

In [24]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 20}
Best score:  0.9570771756978654
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       886
           1       0.96      0.99      0.97       881
           2       0.98      0.98      0.98       897
           3       0.96      0.97      0.97       898
           4       0.95      0.95      0.95       951
           5       0.98      0.99      0.98       926
           6       0.98      0.97      0.98       895
           7       0.98      0.99      0.99       851
           8       0.98      0.97      0.97       883
           9       1.00      0.99      0.99       904
          10       0.98      0.98      0.98       907
          11       0.99      0.99      0.99       847
          12       0.96      0.95      0.95       873
          13       0.96      0.95      0.95       892
          14       0.93      0.95      0.94       878
      

In [25]:
X = np.array(images)
y = np.array(labels) 

In [26]:
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=180)
X = sklearn_pca.fit_transform(X_std)

In [27]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 20}
Best score:  0.9558128078817735
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       886
           1       0.97      0.99      0.98       881
           2       0.98      0.99      0.98       897
           3       0.96      0.97      0.97       898
           4       0.97      0.94      0.95       951
           5       0.98      0.99      0.99       926
           6       0.98      0.97      0.97       895
           7       0.98      0.98      0.98       851
           8       0.97      0.97      0.97       883
           9       0.99      0.99      0.99       904
          10       0.98      0.97      0.97       907
          11       0.99      0.99      0.99       847
          12       0.96      0.94      0.95       873
          13       0.95      0.95      0.95       892
          14       0.94      0.95      0.94       878
      

In [28]:
X = np.array(images)
y = np.array(labels) 

In [5]:
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=200)
X = sklearn_pca.fit_transform(X_std)

In [8]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 20}
Best score:  0.9564696223316913
              precision    recall  f1-score   support

           0       0.97      0.93      0.95       886
           1       0.96      0.99      0.97       881
           2       0.98      0.98      0.98       897
           3       0.96      0.97      0.96       898
           4       0.95      0.94      0.94       951
           5       0.98      0.98      0.98       926
           6       0.98      0.97      0.97       895
           7       0.98      0.98      0.98       851
           8       0.98      0.96      0.97       883
           9       0.99      0.99      0.99       904
          10       0.98      0.98      0.98       907
          11       0.98      0.99      0.99       847
          12       0.96      0.95      0.95       873
          13       0.96      0.96      0.96       892
          14       0.93      0.94      0.94       878
      

In [9]:
X = np.array(images)
y = np.array(labels) 

In [10]:
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=300)
X = sklearn_pca.fit_transform(X_std)

In [11]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 20}
Best score:  0.9536124794745484
              precision    recall  f1-score   support

           0       0.96      0.94      0.95       886
           1       0.96      0.99      0.97       881
           2       0.97      0.98      0.98       897
           3       0.96      0.96      0.96       898
           4       0.95      0.94      0.94       951
           5       0.98      0.98      0.98       926
           6       0.97      0.97      0.97       895
           7       0.98      0.98      0.98       851
           8       0.98      0.97      0.97       883
           9       1.00      0.99      0.99       904
          10       0.96      0.97      0.97       907
          11       0.98      0.99      0.98       847
          12       0.95      0.95      0.95       873
          13       0.95      0.95      0.95       892
          14       0.93      0.94      0.94       878
      

In [12]:
X = np.array(images)
y = np.array(labels) 

In [13]:
# Flatten the images
X_flat = X.reshape(X.shape[0], -1)

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X_flat)

from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=120)
X = sklearn_pca.fit_transform(X_std)

In [14]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define LightGBM classifier and parameters to tune
lgb_clf = lgb.LGBMClassifier()
params = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [50, 100, 200],
    'max_depth': [-1, 10, 20]
}

# Perform grid search with cross-validation to find best parameters
grid_search = GridSearchCV(lgb_clf, params, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

# Make predictions on testing set using the best model
y_pred = grid_search.predict(X_test)

# Print classification report
print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100))

Best parameters:  {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 200, 'num_leaves': 20}
Best score:  0.9574384236453202
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       886
           1       0.96      0.99      0.98       881
           2       0.98      0.98      0.98       897
           3       0.97      0.97      0.97       898
           4       0.96      0.95      0.95       951
           5       0.98      0.99      0.99       926
           6       0.98      0.98      0.98       895
           7       0.98      0.98      0.98       851
           8       0.98      0.98      0.98       883
           9       0.99      0.99      0.99       904
          10       0.98      0.98      0.98       907
          11       0.99      0.99      0.99       847
          12       0.96      0.95      0.95       873
          13       0.96      0.96      0.96       892
          14       0.94      0.95      0.94       878
      