In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
data_dir = '/content/drive/My Drive/hymenoptera_data'

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import numpy as np

In [4]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
#The mean (from the first list) is subtracted from every pixel.
#Then, the result is divided by the corresponding standard deviation (from the second list).
data_dir = '/content/drive/My Drive/hymenoptera_data'

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



In [5]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 52.4MB/s]


In [6]:
modules = list(model_ft.children())[:-1]
model_ft = nn.Sequential(*modules)

# Move the model to GPU if available
model_ft = model_ft.to(device)

In [7]:
#Extract features from the last layer of the pretrained CNN architecture
def extract_features(dataloader, model, num_features):
    model.eval()  # Set the model to evaluation mode
    features = []
    labels = []

    with torch.no_grad():  # No need to track gradients for feature extraction
        for inputs, label in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            features.append(outputs.cpu().reshape(inputs.size(0), -1))
            labels.append(label)

    features = torch.cat(features, 0).numpy()
    labels = torch.cat(labels, 0).numpy()
    return features, labels

In [8]:
train_features, train_labels = extract_features(dataloaders['train'], model_ft, num_ftrs)
val_features, val_labels = extract_features(dataloaders['val'], model_ft, num_ftrs)

In [9]:
#Using Random Forest to perform classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV


# Define parameter grid
param_grid = {
    'max_depth': [10, 20, 30, 40],
    'max_features': [4, 8, 12, 16]
}

# Define classifier
rf = RandomForestClassifier(n_estimators=1000, random_state=42)

# Define grid search
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(train_features,train_labels)

print('Best hyperparameters:', grid_search.best_params_)
print('Best accuracy:', grid_search.best_score_)


Best hyperparameters: {'max_depth': 10, 'max_features': 4}
Best accuracy: 0.9221938775510203


In [12]:
#Using logistic regression to perform classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score

param_grid = {'C': [0.01, 0.1, 1, 10, 100]}
logreg = LogisticRegression(penalty='l2', solver='lbfgs', max_iter=1000)
grid_search = GridSearchCV(logreg, param_grid, cv=5, scoring='accuracy')
grid_search.fit(train_features,train_labels )
y_pred = grid_search.predict(val_features)

# Calculate accuracy and F1 score
acc = accuracy_score(val_labels, y_pred)
f1 = f1_score(val_labels, y_pred, average='weighted')


In [13]:
print('Accuracy:', acc)
print('F1 score:', f1)


Accuracy: 0.9607843137254902
F1 score: 0.9608213604919811


In [15]:
#Using Support VEctor Machine to perform Classification
from sklearn.svm import SVC

# Train RBF kernel SVM using GridSearchCV
param_grid = {'C': [0.01, 0.1, 1, 10, 100], 'gamma': [0.01, 0.1, 1, 10, 100]}
svm = SVC(kernel='rbf')
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy')
grid_search.fit(train_features, train_labels)

# Predict labels using the final model
y_pred = grid_search.predict(val_features)

# Calculate accuracy and F1 score
acc = accuracy_score(val_labels, y_pred)
f1 = f1_score(val_labels, y_pred, average='weighted')

print('Accuracy:', acc)
print('F1 score:', f1)


Accuracy: 0.9477124183006536
F1 score: 0.9477931090131526
