<a href="https://colab.research.google.com/github/asif-ethmoid/Lung-Cancer-Detection_Lab/blob/main/Lung_Cancer_Detection_Lab_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from pathlib import Path
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score





In [None]:
import os
import cv2

# Define a mapping from folder names to class labels
label_mapping = {
    'Bengin': 0,
    'Malignant': 1,
    'Normal': 2
}

def load_images_from_folders(base_dir, debug=False):

    images = []
    labels = []

    for class_name, class_label in label_mapping.items():
        class_dir = os.path.join(base_dir, class_name)

        if not os.path.isdir(class_dir):
            print(f"Directory {class_dir} not found, skipping...")
            continue

        for filename in os.listdir(class_dir):
            image_path = os.path.join(class_dir, filename)
            if not os.path.isfile(image_path):
                continue

            # Read the image
            image = cv2.imread(image_path)
            if image is None:
                if debug:
                    print(f"Failed to read {image_path}. Skipping.")
                continue

            # Convert the image to grayscale
            gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # Resize the image to the desired size
            resized_image = cv2.resize(gray_image, (128, 128))


            images.append(resized_image)  # Append the image to the list
            labels.append(class_label)  # Append the corresponding label

            if debug and len(images) % 100 == 0:
                print(f"Loaded {len(images)} images.")

    return images, labels



In [None]:
# Load images and labels

base_dir = '/content/drive/MyDrive/Dataset/lung'

images, labels = load_images_from_folders(base_dir, debug=False)

In [None]:
print(len(images))
print(len(labels))

1097
1097


In [None]:
print(images[0].shape)


(128, 128)


In [None]:
def extract_sobel_features():
  features = []
  for gray in images:

    # Apply Sobel edge detection
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    abs_grad_x = cv2.convertScaleAbs(sobelx)
    abs_grad_y = cv2.convertScaleAbs(sobely)
    edges = cv2.addWeighted(abs_grad_x, 0.5, abs_grad_y, 0.5, 0)
    #Resize
    edges = cv2.resize(edges, (128, 128))
    # Flatten the image
    features.append(np.ravel(edges))


  return features
sobel_features = extract_sobel_features()

In [None]:
from skimage.feature import hog
#Histogram of Oriented Gradients (HOG)
def extract_hog_features(images, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2)):
    features = []
    for gray in images:

        # Resize image to a standard size
        resized = cv2.resize(gray, (128, 128))

        # Compute HOG features
        hog_features = hog(resized, orientations=orientations, pixels_per_cell=pixels_per_cell,
                           cells_per_block=cells_per_block, block_norm='L2-Hys', feature_vector=True)

        features.append(hog_features)

    return np.array(features)

hog_features = extract_hog_features(images)

In [None]:
from skimage.feature import local_binary_pattern
#Local Binary Patterns (LBP)
def extract_lbp_features(images, n_points=8, radius=1, method='uniform'):
    features = []
    for gray in images:


        # Compute LBP
        lbp = local_binary_pattern(gray, n_points, radius, method)

        # Compute histogram of LBP
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-7)

        features.append(hist)

    return np.array(features)

lbp_features = extract_lbp_features(images)

In [None]:
from sklearn.metrics import accuracy_score
def train_and_evaluate_classifiers(X, y):
  print("Here")
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

  # KNN
  knn = KNeighborsClassifier(n_neighbors=5)
  knn.fit(X_train, y_train)
  knn_pred = knn.predict(X_test)
  knn_accuracy = accuracy_score(y_test, knn_pred)
  knn_precision = precision_score(y_test, knn_pred, average='weighted')
  knn_recall = recall_score(y_test, knn_pred, average='weighted')
  knn_f1 = f1_score(y_test, knn_pred, average='weighted')
  knn_res = [knn_accuracy, knn_precision, knn_recall, knn_f1]

  # Random Forest
  rf = RandomForestClassifier(n_estimators=100, random_state=42)
  rf.fit(X_train, y_train)
  rf_pred = rf.predict(X_test)
  rf_accuracy = accuracy_score(y_test, rf_pred)
  rf_precision = precision_score(y_test, rf_pred, average='weighted')
  rf_recall = recall_score(y_test, rf_pred, average='weighted')
  rf_f1 = f1_score(y_test, rf_pred, average='weighted')
  rf_result = [rf_accuracy, rf_precision, rf_recall, rf_f1]

  # Train SVM classifier
  svm = SVC(kernel='rbf', C=10)
  svm.fit(X_train, y_train)
  svm_pred = svm.predict(X_test)
  svm_accuracy = accuracy_score(y_test, svm_pred)
  svm_precision = precision_score(y_test, svm_pred, average='weighted')
  svm_recall = recall_score(y_test, svm_pred, average='weighted')
  svm_f1 = f1_score(y_test, svm_pred, average='weighted')
  svm_result = [svm_accuracy, svm_precision, svm_recall, svm_f1]

  # Decision Tree classifier

  dt = DecisionTreeClassifier()
  dt.fit(X_train, y_train)
  dt_pred = dt.predict(X_test)
  dt_accuracy = accuracy_score(y_test, dt_pred)
  dt_precision = precision_score(y_test, dt_pred, average='weighted')
  dt_recall = recall_score(y_test, dt_pred, average='weighted')
  dt_f1 = f1_score(y_test, dt_pred, average='weighted')
  dt_result = [dt_accuracy, dt_precision, dt_recall, dt_f1]

  return [knn_res, rf_result, svm_result, dt_result]

In [None]:
sobel_acc = train_and_evaluate_classifiers(sobel_features, labels)
#(Accuracy , Precision , Recall , F1-Score)
print("For KNN")
print(sobel_acc[0])
print("For Random Forest")
print(sobel_acc[1])
print("For SVM")
print(sobel_acc[2])
print("For Decision Tree")
print(sobel_acc[3])

Here
For KNN
[0.9818181818181818, 0.9823407875546913, 0.9818181818181818, 0.9812776412029959]
For Random Forest
[0.9454545454545454, 0.9513685239491692, 0.9454545454545454, 0.9411749101961869]
For SVM
[0.9863636363636363, 0.9865144877028776, 0.9863636363636363, 0.9862529987654521]
For Decision Tree
[0.8454545454545455, 0.8453926744761591, 0.8454545454545455, 0.8445215916661619]


In [None]:
lbp_acc = train_and_evaluate_classifiers(lbp_features, labels)
#Accuracy , Precision , Recall , F1-Score)
print("For KNN")
print(lbp_acc[0])
print("For Random Forest")
print(lbp_acc[1])
print("For SVM")
print(lbp_acc[2])
print("For Decision Tree")
print(lbp_acc[3])

Here
For KNN
[0.9318181818181818, 0.9297977204603711, 0.9318181818181818, 0.9304545454545454]
For Random Forest
[0.9045454545454545, 0.8996486063582859, 0.9045454545454545, 0.8977908055113938]
For SVM
[0.8318181818181818, 0.732270319882038, 0.8318181818181818, 0.7768125872791458]
For Decision Tree
[0.8636363636363636, 0.8636363636363636, 0.8636363636363636, 0.8636363636363636]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
hog_acc = train_and_evaluate_classifiers(hog_features, labels)
#Accuracy , Precision , Recall , F1-Score)
print("For KNN")
print(hog_acc[0])
print("For Random Forest")
print(hog_acc[1])
print("For SVM")
print(hog_acc[2])
print("For Decision Tree")
print(hog_acc[3])

Here
For KNN
[0.9590909090909091, 0.9614793771043771, 0.9590909090909091, 0.959957181015966]
For Random Forest
[0.9318181818181818, 0.9374068409466638, 0.9318181818181818, 0.9249836752148891]
For SVM
[0.9863636363636363, 0.9865144877028776, 0.9863636363636363, 0.9862529987654521]
For Decision Tree
[0.8227272727272728, 0.8207983678669888, 0.8227272727272728, 0.8213222242760165]
