In [26]:
import os
import numpy as np
from PIL import Image
import torch
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from read import CustomImageFolder


In [28]:
train_1_dir = "../train_1"
test_1_dir = "../test_1"
train_0_dir = "../train_0"
test_0_dir = "../test_0"

# Load the images and data loader for training and testing
Dataset_tr = CustomImageFolder(train_1_dir, train_0_dir, transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
]))
Dataset_te = CustomImageFolder(test_1_dir, test_0_dir, transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
]))

In [None]:
#extract the features for svm
def extract_features(dataloader, model):
  features = []
  labels = []

  model.eval()  # Set the model to evaluation mode
  with torch.no_grad():
    for images, targets in dataloader:
      images = images.to(device)
      targets = targets.to(device)

      # Forward pass through the model
      outputs = model(images)
      features.append(outputs.cpu().numpy())
      labels.append(targets.cpu().numpy())

  features = np.vstack(features)
  labels = np.hstack(labels)
  return features, labels

In [None]:
# Train an SVM classifier
def train_svm(train_features, train_labels, test_features, test_labels):
    # Initialize the SVM model
    svm_model = SVC(kernel='rbf', C=10, gamma=0.001, class_weight = 'balanced',random_state=42)

    print("Training SVM...")
    svm_model.fit(train_features, train_labels)

    predictions = svm_model.predict(test_features)

    # Evaluate the model
    accuracy = accuracy_score(test_labels, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(test_labels, predictions))

    return svm_model

In [38]:
#RESNET18 test. not satisfied. can't use advanced models. lead to worse results.

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrained_model = models.resnet18(pretrained=True)

feature_extractor = nn.Sequential(*list(pretrained_model.children())[:-1])  # Remove the final classification layer
feature_extractor = feature_extractor.to(device)

trainloader = torch.utils.data.DataLoader(Dataset_tr, batch_size=32, shuffle=False)
testloader = torch.utils.data.DataLoader(Dataset_te, batch_size=32, shuffle=False)

train_features, train_labels = extract_features(trainloader, feature_extractor)
test_features, test_labels = extract_features(testloader, feature_extractor)
#Flatten to 1d
train_features = train_features.reshape(train_features.shape[0], -1)
test_features = test_features.reshape(test_features.shape[0], -1)

# Train the SVM
svm_model = train_svm(train_features, train_labels, test_features, test_labels)



Training SVM...
Accuracy: 0.6359
Classification Report:
              precision    recall  f1-score   support

           0       0.58      0.87      0.70       100
           1       0.77      0.42      0.54       106

    accuracy                           0.64       206
   macro avg       0.68      0.64      0.62       206
weighted avg       0.68      0.64      0.62       206



In [31]:
#Try PCA as a dedimensionalizer.
def pca(train_features,test_features,n_components):
  pca = PCA(n_components=n_components)
  train_features_pca = pca.fit_transform(train_features)
  test_features_pca = pca.transform(test_features)
  explained_variance = np.sum(pca.explained_variance_ratio_)
  print(f"Explained variance by {n_components} components: {explained_variance:.2f}")
  return train_features_pca, test_features_pca

In [32]:
n_components = 50
train_features_pca, test_features_pca = pca(train_features, test_features, n_components)

svm_model_pca = train_svm(train_features_pca, train_labels, test_features_pca, test_labels)

#improved but not a lot

Explained variance by 50 components: 0.92
Training SVM...
Accuracy: 0.6408
Classification Report:
              precision    recall  f1-score   support

           0       0.59      0.89      0.71       100
           1       0.80      0.41      0.54       106

    accuracy                           0.64       206
   macro avg       0.69      0.65      0.62       206
weighted avg       0.69      0.64      0.62       206



In [34]:
# try grid seach
def grid_search_svm(train_features_pca, train_labels, test_features_pca, test_labels):
    print("Starting GridSearchCV for SVM...")
    grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=3)
    grid.fit(train_features_pca, train_labels)

    print("Best parameters found:", grid.best_params_)


    best_model = grid.best_estimator_
    predictions = best_model.predict(test_features_pca)

    print("Classification Report:")
    print(classification_report(test_labels, predictions))
    accuracy = accuracy_score(test_labels, predictions)
    print(f"Accuracy: {accuracy:.4f}")

    return best_model

In [35]:
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel': ['rbf']
}
best_svm_model = grid_search_svm(train_features_pca, train_labels, test_features_pca, test_labels)

#not much change. svm has reached its limit.

Starting GridSearchCV for SVM...
Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=0.001, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=0.001, kernel=rbf; total time=   0.0s
[CV] END ......