In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.decomposition import PCA
import zipfile

# Step 1: Extract the zip file (if not already extracted)
zip_path = 'archive.zip'  # Place this file in the project directory
extract_path = 'data'  # Will extract to a 'data' folder
if not os.path.exists(extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

# Step 2: Define paths and class names
data_dir = extract_path  # Extracted folder
class_names = ['glioma', 'meningioma', 'notumor', 'pituitary']
label_map = {name: idx for idx, name in enumerate(class_names)}

# Step 3: Prepare images and labels
images = []
labels = []

for class_name in class_names:
    class_path = os.path.join(data_dir, class_name)
    for filename in os.listdir(class_path):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            img_path = os.path.join(class_path, filename)
            img = cv2.imread(img_path)
            if img is None:
                continue  # Skip unreadable images

            # Preprocessing
            img = cv2.resize(img, (256, 256))  # Increased resolution
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = cv2.equalizeHist(img)  # Histogram equalization
            img = cv2.GaussianBlur(img, (3, 3), 0)  # Noise reduction

            # Extract HOG features
            features, _ = hog(img, orientations=11, pixels_per_cell=(16, 16),
                             cells_per_block=(2, 2), block_norm='L2-Hys',
                             visualize=True, feature_vector=True)

            # Add flattened pixel values
            pixel_features = img.flatten() / 255.0
            combined_features = np.concatenate([features, pixel_features])

            images.append(combined_features)
            labels.append(label_map[class_name])

# Convert to NumPy arrays
X = np.array(images)
y = np.array(labels)

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Dimensionality reduction
pca = PCA(n_components=0.95)  # Keep 95% variance
X = pca.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.15, random_state=42, stratify=y)

# Train SVM
model = SVC(C=10, kernel='rbf', gamma='scale', random_state=42)
model.fit(X_train, y_train)

# Evaluate
train_accuracy = accuracy_score(y_train, model.predict(X_train))
test_accuracy = accuracy_score(y_test, model.predict(X_test))

print("Training Accuracy:", round(train_accuracy * 100, 2), "%")
print("Testing Accuracy:", round(test_accuracy * 100, 2), "%")

Training Accuracy: 100.0 %
Testing Accuracy: 88.83 %
