<a href="https://colab.research.google.com/github/jobinpjoseph707/Interview-Helper/blob/master/Comparison_svm_ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:

cd /content/drive/MyDrive/datasetbrain


/content/drive/MyDrive/datasetbrain


In [7]:
!pip install PyWavelets
!pip install scikit-learn
!pip install scikit-image
!pip install imgaug
import os
import numpy as np
import cv2
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from skimage.feature import graycomatrix, graycoprops
from sklearn.feature_selection import RFE
from sklearn.ensemble import VotingClassifier
import imgaug.augmenters as iaa


def extract_features(image_path):
    im = cv2.imread(image_path, cv2.IMREAD_COLOR)
    im = cv2.resize(im, (150, 150), interpolation=cv2.INTER_AREA)
    im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)

    # 1. Basic Shape and Color Features
    _, t_im = cv2.threshold(im_gray, 110, 255, cv2.THRESH_BINARY)
    kernel = np.ones((1, 1), np.uint8)
    t_im = cv2.morphologyEx(t_im, cv2.MORPH_OPEN, kernel)
    contours1, _ = cv2.findContours(t_im, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    if len(contours1) > 1:
        cnt1 = contours1[1]
        perimeter = cv2.arcLength(cnt1, True)
        num_contours = len(contours1)
    else:
        perimeter = 0
        num_contours = 0

    mean_r, mean_g, mean_b = np.mean(im, axis=(0, 1)).tolist()


    # Texture Features using Gray-Level Co-occurrence Matrix (GLCM)
    glcm = graycomatrix(im_gray, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]

    # ... (Append all features to the list) ...
    return [perimeter, len(contours1), *np.mean(im, axis=(0, 1)).tolist(), contrast, dissimilarity, homogeneity, energy, correlation]



# Load Data
data = []
basepath = './dataset/'
for folder in os.scandir(basepath):
    if folder.is_dir():
        for file in os.scandir(folder.path):
            features = extract_features(file.path)
            data.append([*features, folder.name])

df = pd.DataFrame(data, columns=['perimeter', 'num_contours', 'mean_r', 'mean_g', 'mean_b', 'contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'class'])

# Preprocessing
X = df[['perimeter', 'num_contours', 'mean_r', 'mean_g', 'mean_b', 'contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']].values
y = df['class']
le = LabelEncoder()
y = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# --- Feature Selection using RFE ---
estimator = SVC(kernel="linear")  # Use a linear kernel for RFE
selector = RFE(estimator, n_features_to_select=5, step=1)  # Select top 5 features
selector = selector.fit(X_train, y_train)

# Get the selected features
selected_features = selector.support_
X_train = X_train[:, selected_features]
X_test = X_test[:, selected_features]

# --- PCA (Optional) ---
# You can still apply PCA after feature selection if desired
# pca = PCA(n_components=0.95)
# X_train = pca.fit_transform(X_train)
# X_test = pca.transform(X_test)

# --- Hyperparameter Tuning ---
param_grid = {
    'C': [0.1, 1, 10],  # Reduced options for C
    'kernel': ['linear', 'rbf'],  # Reduced kernel options
    'gamma': ['scale', 'auto'],  # Reduced gamma options
}

cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)  # Reduced CV folds

svm = SVC(random_state=42)
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=cv, scoring='accuracy', verbose=2, n_jobs=-1)

# Fit with Error Handling and Verbosity
try:
    grid_search.fit(X_train, y_train)
except Exception as e:
    print(f"Error during GridSearchCV: {e}")

# Get the best model and evaluate
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test accuracy with best model: {accuracy}")
print(f"Best hyperparameters: {grid_search.best_params_}")

# --- Data Augmentation (using imgaug) ---
seq = iaa.Sequential([
    iaa.Fliplr(0.5),  # Horizontal flips
    iaa.Affine(rotate=(-10, 10)),  # Small rotations
    iaa.Multiply((0.8, 1.2)),  # Brightness adjustments
])

# Apply augmentation during training
augmented_X_train = []
augmented_y_train = []
for i in range(len(X_train)):
    # Reshape to (1, -1) for imgaug if necessary
    augmented_image = seq(image=X_train[i].reshape(1, -1).astype(np.float32))
    augmented_X_train.append(augmented_image.flatten())
    augmented_y_train.append(y_train[i])

X_train = np.vstack([X_train, augmented_X_train])
y_train = np.concatenate([y_train, augmented_y_train])

# --- Ensemble Method (using VotingClassifier) ---
svm1 = SVC(kernel='linear', probability=True)
svm2 = SVC(kernel='rbf', probability=True)
# ... (other models) ...

ensemble = VotingClassifier(estimators=[('svm1', svm1), ('svm2', svm2)], voting='soft')
ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test accuracy with ensemble: {accuracy}")

Fitting 3 folds for each of 12 candidates, totalling 36 fits
Test accuracy with best model: 0.8888888888888888
Best hyperparameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
Test accuracy with ensemble: 0.9629629629629629


In [3]:
!pip install PyWavelets
!pip install scikit-learn
!pip install scikit-image
import os
import numpy as np
import cv2
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV,StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from skimage.feature import graycomatrix, graycoprops  # Import for GLCM features
from sklearn.feature_selection import RFE  # Import RFE




data = []
basepath = './dataset/'

# Function to extract features from an image
# Function to extract features from an image (enhanced)
def extract_features(image_path):
    im = cv2.imread(image_path, cv2.IMREAD_COLOR)
    im = cv2.resize(im, (150, 150), interpolation=cv2.INTER_AREA)
    im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)

      # 1. Basic Shape and Color Features
    _, t_im = cv2.threshold(im_gray, 110, 255, cv2.THRESH_BINARY)
    kernel = np.ones((1, 1), np.uint8)
    t_im = cv2.morphologyEx(t_im, cv2.MORPH_OPEN, kernel)
    contours1, _ = cv2.findContours(t_im, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    if len(contours1) > 1:
        cnt1 = contours1[1]
        perimeter = cv2.arcLength(cnt1, True)
        num_contours = len(contours1)
    else:
        perimeter = 0
        num_contours = 0

    mean_r, mean_g, mean_b = np.mean(im, axis=(0, 1)).tolist()


    # Texture Features using Gray-Level Co-occurrence Matrix (GLCM)
    glcm = graycomatrix(im_gray, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]

    # ... (Append all features to the list) ...
    return [perimeter, len(contours1), *np.mean(im, axis=(0, 1)).tolist(), contrast, dissimilarity, homogeneity, energy, correlation]


# Iterate through folders and extract features
for folder in os.scandir(basepath):
    if folder.is_dir():
        for file in os.scandir(folder.path):
            features = extract_features(file.path)
            data.append([*features, folder.name])

# Create a DataFrame
df = pd.DataFrame(data, columns=['perimeter', 'num_contours', 'mean_r', 'mean_g', 'mean_b', 'contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'class'])

# Feature Selection and Engineering
X = df[['perimeter', 'num_contours', 'mean_r', 'mean_g', 'mean_b', 'contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']].values
y = df['class']

# Label Encoding
le = LabelEncoder()
y = le.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

estimator = SVC(kernel="linear")  # Use a linear kernel for RFE
selector = RFE(estimator, n_features_to_select=5, step=1)  # Select top 5 features
selector = selector.fit(X_train, y_train)


selected_features = selector.support_
X_train = X_train[:, selected_features]
X_test = X_test[:, selected_features]

# Hyperparameter Tuning using GridSearchCV for SVM
param_grid = {
    'C': [0.1, 1, 10],  # Reduced options for C
    'kernel': ['linear', 'rbf'],  # Reduced kernel options
    'gamma': ['scale', 'auto'],  # Reduced gamma options
}

# Use StratifiedKFold for more robust cross-validation
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)


svm = SVC(random_state=42)
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Get the best model
best_model = grid_search.best_estimator_

# Evaluate the best model
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test accuracy with best model: {accuracy}")
print(f"Best hyperparameters: {grid_search.best_params_}")

Test accuracy with best model: 0.8888888888888888
Best hyperparameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
