# Import Library

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import platform
import os
from sklearn.utils import class_weight
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

%matplotlib inline
%load_ext autoreload
%aimport utils
%aimport imc
%autoreload 1

Using TensorFlow backend.


# Settings

In [2]:
pdiv = "/"
### LABELS ###

normal_label = 0
pneumonia_label = 1
labels = sorted([normal_label, pneumonia_label])

### IMAGE SETTINGS ###

dimension = (64,0)
resize_dim = dimension[:2]

# Training and Test data preparation

In [3]:
path_train_normal = ".{}chest_xray{}train{}NORMAL{}".format(pdiv, pdiv, pdiv, pdiv)
path_train_pneumonia = ".{}chest_xray{}train{}PNEUMONIA{}".format(pdiv, pdiv, pdiv, pdiv)

path_test_normal = ".{}chest_xray{}test{}NORMAL{}".format(pdiv, pdiv, pdiv, pdiv)
path_test_pneumonia = ".{}chest_xray{}test{}PNEUMONIA{}".format(pdiv, pdiv, pdiv, pdiv)

print("File name loaded")

# Build Sift Vocabulary

In [5]:
from imc import build_vocabulary_from_dirs
import os.path as osp
import pickle

vocab_filename = 'vocab2.pkl'
vocab_size = 200
vocab = build_vocabulary_from_dirs(path_train_normal, path_train_pneumonia, vocab_size)
with open(vocab_filename, 'wb') as f:
    pickle.dump(vocab, f)
    
print(f"Built {vocab_size}-word vocabulary from training dataset, saved to {vocab_filename}")

vocab2.pkl saved
Built 200-word vocabulary from training dataset


# Generate Histogram

In [34]:
from imc import bags_of_sifts_from_dir

data_train_normal = bags_of_sifts_from_dir(path_train_normal, vocab_filename)
data_train_pneumonia = bags_of_sifts_from_dir(path_train_pneumonia, vocab_filename)

data_test_normal = bags_of_sifts_from_dir(path_test_normal, vocab_filename)
data_test_pneumonia = bags_of_sifts_from_dir(path_test_pneumonia, vocab_filename)

print("Image histograms generated for training and test images")

Image histograms generated for training and test images


# Shuffle Datasets

In [35]:
# Number of training and test images for normal and pneumonia
label_train_normal = [0] * 50
label_train_pneumonia = [1] * 50
label_test_normal = [0] * 50
label_test_pneumonia = [1] * 50

# Combine training images and labels
data_train = np.concatenate((data_train_normal, data_train_pneumonia), axis=0)
label_train = np.asarray(label_train_normal + label_train_pneumonia)

# Combine training images and labels
data_test = np.concatenate((data_test_normal, data_test_pneumonia), axis=0)
label_test = np.asarray(label_test_normal + label_test_pneumonia)

print("Data concatenated")
print(f"Train data dimensions: {data_train.shape}")
print(f"Test data dimensions: {data_test.shape}")


Data concatenated
Train data dimensions: (100, 200)
Test data dimensions: (100, 200)


# Shuffle

In [36]:
# Shuffle
rand_order = np.random.permutation(data_train.shape[0])
data_train = data_train[rand_order]
label_train = label_train[rand_order]

# # Shuffle
rand_order = np.random.permutation(data_test.shape[0])
data_test = data_test[rand_order]
label_test = label_test[rand_order]

print("Datasets shuffled")

Datasets shuffled


# SVM - Scale Gamma

In [39]:
svclassifier = SVC(gamma='scale')
svclassifier.fit(data_train, label_train)
      
# Make prediction
label_pred = svclassifier.predict(data_test)

print(confusion_matrix(label_test, label_pred))
print(classification_report(label_test, label_pred))

[[15 35]
 [ 9 41]]
              precision    recall  f1-score   support

           0       0.62      0.30      0.41        50
           1       0.54      0.82      0.65        50

    accuracy                           0.56       100
   macro avg       0.58      0.56      0.53       100
weighted avg       0.58      0.56      0.53       100



# SVM - polynomial kernel

In [38]:
svclassifier = SVC(kernel='poly', degree=8)
svclassifier.fit(data_train, label_train)

# Make prediction
label_pred = svclassifier.predict(data_test)

print(confusion_matrix(label_test, label_pred))
print(classification_report(label_test, label_pred))

[[20 30]
 [ 1 49]]
              precision    recall  f1-score   support

           0       0.95      0.40      0.56        50
           1       0.62      0.98      0.76        50

    accuracy                           0.69       100
   macro avg       0.79      0.69      0.66       100
weighted avg       0.79      0.69      0.66       100



# SVM - Gaussian kernel

In [40]:
svclassifier = SVC(kernel='rbf')
svclassifier.fit(data_train, label_train)

label_pred = svclassifier.predict(data_test)

print(confusion_matrix(label_test, label_pred))
print(classification_report(label_test, label_pred))

[[15 35]
 [ 9 41]]
              precision    recall  f1-score   support

           0       0.62      0.30      0.41        50
           1       0.54      0.82      0.65        50

    accuracy                           0.56       100
   macro avg       0.58      0.56      0.53       100
weighted avg       0.58      0.56      0.53       100



# SVM - Sigmoid kernel

In [7]:
svclassifier = SVC(kernel='sigmoid')
svclassifier.fit(data_train, label_train)

label_pred = svclassifier.predict(data_test)
print(confusion_matrix(label_test, label_pred))
print(classification_report(label_test, label_pred))



[[25  2]
 [ 6 21]]
              precision    recall  f1-score   support

           0       0.81      0.93      0.86        27
           1       0.91      0.78      0.84        27

    accuracy                           0.85        54
   macro avg       0.86      0.85      0.85        54
weighted avg       0.86      0.85      0.85        54

