# Import Library

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import platform
import os
from sklearn.utils import class_weight
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

%matplotlib inline
%load_ext autoreload
%aimport utils
%aimport imc
%autoreload 1

Using TensorFlow backend.


# Settings

In [2]:
pdiv = "/"
### LABELS ###

normal_label = 0
pneumonia_label = 1
labels = sorted([normal_label, pneumonia_label])

### IMAGE SETTINGS ###

dimension = (64,0)
resize_dim = dimension[:2]

# Training and Test data preparation

In [66]:
path_train_normal = ".{}chest_xray{}train{}NORMAL{}".format(pdiv, pdiv, pdiv, pdiv)
path_train_pneumonia = ".{}chest_xray{}train{}PNEUMONIA{}".format(pdiv, pdiv, pdiv, pdiv)

path_test_normal = ".{}chest_xray{}test{}NORMAL{}".format(pdiv, pdiv, pdiv, pdiv)
path_test_pneumonia = ".{}chest_xray{}test{}PNEUMONIA{}".format(pdiv, pdiv, pdiv, pdiv)

# number of images to be loaded from each directory
train_images_limit = 200
test_images_limit = 200
load_all_images = False

paths_train_normal = utils.extract_image_paths(path_train_normal)
paths_train_pneumonia = utils.extract_image_paths(path_train_pneumonia)
paths_test_normal = utils.extract_image_paths(path_test_normal)
paths_test_pneumonia = utils.extract_image_paths(path_test_pneumonia)

if not load_all_images:
    paths_train_normal = paths_train_normal[1:train_images_limit]
    paths_train_pneumonia = paths_train_pneumonia[1:train_images_limit]
    paths_test_normal = paths_test_normal[1:test_images_limit]
    paths_test_pneumonia = paths_train_pneumonia[1:test_images_limit]

# Build Sift Vocabulary

In [43]:
# from imc import build_vocabulary_from_dirs
import os.path as osp
import pickle

vocab_filename = 'vocab2.pkl'
vocab_size = 200
vocab = imc.build_vocabulary_from_dirs(paths_train_normal, paths_train_pneumonia, vocab_size)
with open(vocab_filename, 'wb') as f:
    pickle.dump(vocab, f)
    
print(f"Built {vocab_size}-word vocabulary from training dataset, saved to {vocab_filename}")

Built 200-word vocabulary from training dataset, saved to vocab2.pkl


# Generate Image Histograms

In [44]:
from imc import bags_of_sifts

data_train_normal = bags_of_sifts(paths_train_normal, vocab_filename)
data_train_pneumonia = bags_of_sifts(paths_train_pneumonia, vocab_filename)

data_test_normal = bags_of_sifts(paths_test_normal, vocab_filename)
data_test_pneumonia = bags_of_sifts(paths_test_pneumonia, vocab_filename)

print("Image histograms generated for training and test images")


Image histograms generated for training and test images


# Combine Datasets

In [53]:
# Number of training and test images for normal and pneumonia

label_train_normal = [0] * len(paths_train_normal)
label_train_pneumonia = [1] * len(paths_train_pneumonia)
label_test_normal = [0] * len(paths_test_normal)
label_test_pneumonia = [1] * len(paths_test_pneumonia)

# Combine training images and labels
data_train = np.concatenate((data_train_normal, data_train_pneumonia), axis=0)
label_train = np.asarray(label_train_normal + label_train_pneumonia)

# Combine training images and labels
data_test = np.concatenate((data_test_normal, data_test_pneumonia), axis=0)
label_test = np.asarray(label_test_normal + label_test_pneumonia)

print("Data concatenated")
print(f"Train data dimensions: {data_train.shape}")
print(f"Test data dimensions: {data_test.shape}")


Data concatenated
Train data dimensions: (5216, 200)
Test data dimensions: (624, 200)


# Shuffle Data and Labels

In [54]:
# Shuffle
rand_order = np.random.permutation(data_train.shape[0])

data_train = data_train[rand_order]
label_train = label_train[rand_order]


# Shuffle
rand_order = np.random.permutation(data_test.shape[0])
data_test = data_test[rand_order]
label_test = label_test[rand_order]

print("Datasets shuffled")

Datasets shuffled


# SVM - Scale Gamma

In [55]:
svclassifier = SVC(gamma='scale')
svclassifier.fit(data_train, label_train)
      
# Make prediction
label_pred = svclassifier.predict(data_test)

print(confusion_matrix(label_test, label_pred))
print(classification_report(label_test, label_pred))

[[  5 229]
 [  0 390]]
              precision    recall  f1-score   support

           0       1.00      0.02      0.04       234
           1       0.63      1.00      0.77       390

    accuracy                           0.63       624
   macro avg       0.82      0.51      0.41       624
weighted avg       0.77      0.63      0.50       624



# SVM - polynomial kernel

In [56]:
svclassifier = SVC(kernel='poly', degree=8)
svclassifier.fit(data_train, label_train)

# Make prediction
label_pred = svclassifier.predict(data_test)

print(confusion_matrix(label_test, label_pred))
print(classification_report(label_test, label_pred))

[[  9 225]
 [  0 390]]
              precision    recall  f1-score   support

           0       1.00      0.04      0.07       234
           1       0.63      1.00      0.78       390

    accuracy                           0.64       624
   macro avg       0.82      0.52      0.43       624
weighted avg       0.77      0.64      0.51       624



# SVM - Gaussian kernel

In [57]:
svclassifier = SVC(kernel='rbf')
svclassifier.fit(data_train, label_train)

label_pred = svclassifier.predict(data_test)

print(confusion_matrix(label_test, label_pred))
print(classification_report(label_test, label_pred))

[[  5 229]
 [  0 390]]
              precision    recall  f1-score   support

           0       1.00      0.02      0.04       234
           1       0.63      1.00      0.77       390

    accuracy                           0.63       624
   macro avg       0.82      0.51      0.41       624
weighted avg       0.77      0.63      0.50       624



# SVM - Sigmoid kernel

In [58]:
svclassifier = SVC(kernel='sigmoid')
svclassifier.fit(data_train, label_train)

label_pred = svclassifier.predict(data_test)
print(confusion_matrix(label_test, label_pred))
print(classification_report(label_test, label_pred))

[[102 132]
 [ 86 304]]
              precision    recall  f1-score   support

           0       0.54      0.44      0.48       234
           1       0.70      0.78      0.74       390

    accuracy                           0.65       624
   macro avg       0.62      0.61      0.61       624
weighted avg       0.64      0.65      0.64       624

