SVM_Compare

In [None]:
!pip install torchmetrics
!pip install pytorch_lightning

In [20]:
from google.colab import drive
drive.mount('/content/drive')

import os
from PIL import Image
from sklearn import svm
from sklearn.model_selection import train_test_split
from torchvision import transforms

# image data augmentation
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# load data from the file
def load_data(data_dir):
    images = []
    labels = []
    for folder in os.listdir(data_dir):
        label = folder
        folder_path = os.path.join(data_dir, folder)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            image = Image.open(file_path)
            image = data_transforms(image)
            images.append(image)
            labels.append(label)
    return images, labels

# data file link
data_dir = '/content/drive/MyDrive/ML/isic2'
images, labels = load_data(data_dir)

# split train set and test set
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.3)

# transform the image daata into two dimension array
def flatten(images):
    return [image.view(-1).numpy() for image in images]

X_train = flatten(X_train)
X_test = flatten(X_test)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:

# clf = svm.SVC(gamma=0.1, C=1., probability=True)
# clf = svm.SVC(gamma='auto', C=10.)
clf = svm.SVC()
clf.fit(X_train, y_train)


accuracy = clf.score(X_test, y_test)
print('Accuracy:', accuracy)

# 在SVC模型中,gamma参数的默认值为'auto',
# 且使用1/n_features作为gamma的值,
# 其中n_features是特征的数量。如果gamma参数的值为'scale',
# 则使用1 / (n_features * X.var())作为gamma的值,其中X.var()是输入数据的方差
# 当gamma参数的值为'auto'或'scale'时,SVC模型会根据输入数据的特性自动选择gamma的值



Accuracy: 0.7763157894736842


In [29]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc

y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:.4f}".format(acc))

precision = precision_score(y_test, y_pred, pos_label='M')
# precision = precision_score(y_test, y_pred,average='weighted')
print("Precision: {:.4f}".format(precision))

recall = recall_score(y_test, y_pred, pos_label='M')
# recall = recall_score(y_test, y_pred,average='weighted')
print("Recall: {:.4f}".format(recall))

f1 = f1_score(y_test, y_pred, pos_label='M')
# f1 = f1_score(y_test, y_pred,average='weighted')
print("F1-score: {:.4f}".format(f1))

Accuracy: 0.7763
Precision: 0.8167
Recall: 0.8909
F1-score: 0.8522


In [23]:
from sklearn.model_selection import GridSearchCV
from sklearn import svm
# define svm prediction model
svm_model = svm.SVC(kernel='rbf')

# define the hperparameters
param_grid = {'C': [0.1, 1., 10., 100.], 'gamma': ['scale','auto', 0.01, 0.1, 1, 10]}

# GridSearchCV to do the cross validation
grid_search = GridSearchCV(svm_model, param_grid, cv=5)
grid_search.fit(X_train, y_train)
accuracy = grid_search.score(X_test, y_test)
print('Accuracy:', accuracy)
# output the scores
print('Best parameters:', grid_search.best_params_)
print('Best score:', grid_search.best_score_)


Accuracy: 0.7543859649122807
Best parameters: {'C': 10.0, 'gamma': 'auto'}
Best score: 0.7849056603773585


In [24]:
# data_dir = '/content/drive/MyDrive/ML/classification'
# img, l = load_data(data_dir)
# imgs = flatten(img)
# pred = clf.predict(imgs)
# print(pred)

In [25]:
import os
import cv2
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import torch
import torchvision
import torch.nn as nn
import torchmetrics
from torchvision import transforms

import torchvision.models as models
from torch.utils.data import DataLoader, Dataset,random_split
from pytorch_lightning import seed_everything, LightningModule, Trainer
from sklearn.metrics import classification_report



# path
data_dir = '/content/drive/MyDrive/ML/isic2'
categories = ['M', 'WM']

# transform the image data into feature vectors
def extract_features(img):
    # use OpenCV convert the image into grayscale，and resize
    gray = cv2.resize(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), (50, 50))
    # reshape into 1-dimension
    features = np.reshape(gray, (1, -1))
    return features

# load dataset
data = []
labels = []
for category in categories:
    category_dir = os.path.join(data_dir, category)
    for filename in os.listdir(category_dir):
        img_path = os.path.join(category_dir, filename)
        img = cv2.imread(img_path)
        if img is not None:
            features = extract_features(img)
            data.append(features)
            labels.append(category)

# transform into numpy array
data = np.concatenate(data, axis=0)
labels = np.array(labels)

# split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# use SVM to do the classification
clf = svm.SVC(C=1.0, kernel='linear', decision_function_shape='ovr')
clf.fit(X_train, y_train)

# predict and calculate the accuracy
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))


Accuracy: 65.79%


In [26]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc

y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:.4f}".format(acc))

precision = precision_score(y_test, y_pred, pos_label='M')
# precision = precision_score(y_test, y_pred,average='weighted')
print("Precision: {:.4f}".format(precision))

recall = recall_score(y_test, y_pred, pos_label='M')
# recall = recall_score(y_test, y_pred,average='weighted')
print("Recall: {:.4f}".format(recall))

f1 = f1_score(y_test, y_pred, pos_label='M')
# f1 = f1_score(y_test, y_pred,average='weighted')
print("F1-score: {:.4f}".format(f1))

Accuracy: 0.6579
Precision: 0.7843
Recall: 0.7273
F1-score: 0.7547
