In [3]:
import os
import cv2
import numpy as np
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from joblib import dump, Parallel, delayed

# Define dataset path and categories
dataset_path = "lung_image_sets"
categories = ["lung_scc", "lung_n", "lung_aca"]

X, y = [], []

# Function to process a single image
def process_image(img_path, label):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return None, None
    img = cv2.resize(img, (64, 64))
    features, _ = hog(img, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)
    return features, label

# Load and process all images using parallel processing
results = Parallel(n_jobs=-1)(
    delayed(process_image)(os.path.join(dataset_path, cat, fname), label)
    for label, cat in enumerate(categories)
    for fname in os.listdir(os.path.join(dataset_path, cat))
)

# Filter out failed loads
X, y = zip(*[r for r in results if r[0] is not None])
X = np.array(X)
y = np.array(y)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train SVM model
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

# Evaluate model
y_pred = svm_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=categories))

# Save model as .pkl
dump(svm_model, "svm_model.pkl")
print("✅ SVM model saved as 'svm_model.pkl'")


Accuracy: 0.7636666666666667
              precision    recall  f1-score   support

    lung_scc       0.77      0.79      0.78      1037
      lung_n       0.86      0.86      0.86       970
    lung_aca       0.66      0.65      0.65       993

    accuracy                           0.76      3000
   macro avg       0.76      0.76      0.76      3000
weighted avg       0.76      0.76      0.76      3000

✅ SVM model saved as 'svm_model.pkl'
