In [85]:
import numpy as np 
import matplotlib.pyplot as plt
import glob
import cv2
from sklearn.model_selection import train_test_split
import os
from keras.applications.densenet import DenseNet169, DenseNet121
from keras.applications.mobilenet import MobileNet
from sklearn import preprocessing
from google.colab import drive
import xgboost as xgb
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import classification_report
from bayes_opt import BayesianOptimization
from sklearn.metrics import mean_squared_error

## loading the dataset

In [4]:
SIZE = 224
images = []
images_labels = [] 

### importing each image and set the current directory name as its label 

In [5]:
ext = ['png', 'jpg', 'jpeg'] 

for directory_path in glob.glob("PATH‌ TO THE DATASET"):
    splited = directory_path.split("/")
    label = splited[-1]
    print(label)
    for e in ext:
      for img_path in glob.glob(os.path.join(directory_path, "*." + e)):
          #print(img_path)
           img = cv2.imread(img_path, cv2.IMREAD_COLOR)       
           img = cv2.resize(img, (SIZE, SIZE))
           images.append(img)
           images_labels.append(label)

Pneumonia
No_findings
Covid-19


In [6]:
images = np.array(images)
images_labels = np.array(images_labels)

In [7]:
images.shape

(1125, 224, 224, 3)

## pre-processing

### label encoding

In [8]:
le = preprocessing.LabelEncoder()
le.fit(images_labels)
labels_encoded = le.transform(images_labels)

### spliting the dataset into training set and test set

In [9]:
x_train, x_test, y_train, y_test = train_test_split(images, labels_encoded, test_size=0.2, random_state=42)

### scaling train set and test set

In [10]:
x_train, x_test = x_train / 255.0, x_test / 255.0

## feature Extraction

### DenseNet169

In [11]:
dense_model = DenseNet169(include_top=False, input_shape=(SIZE, SIZE, 3), pooling='avg')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5


In [12]:
dense_features = dense_model.predict(x_train)

In [13]:
features = dense_features.reshape(dense_features.shape[0], -1)

In [14]:
features.shape

(900, 1664)

### mobilenet

In [15]:
model_mobile = MobileNet(weights='imagenet',include_top=False, input_shape=(SIZE, SIZE, 3), pooling='avg')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5


In [16]:
mobile_features = model_mobile.predict(x_train)

In [17]:
features_2 = mobile_features.reshape(mobile_features.shape[0], -1)

In [18]:
features_2.shape

(900, 1024)

## feature combination

In [20]:
combined_features = np.hstack((dense_features, mobile_features))

## parameters optimization
to get the best out of our classifier, we need to find the best parameters of xgbclassifier

In [21]:
classifier = xgb.XGBClassifier()
classifier.fit(combined_features, y_train) #For sklearn no one hot encoding

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [22]:
train_prediction = classifier.predict(combined_features)

In [23]:
dtrain = xgb.DMatrix(combined_features, label=y_train)

In [26]:
print(classification_report(train_prediction, y_train))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        99
           1       1.00      1.00      1.00       410
           2       1.00      1.00      1.00       391

    accuracy                           1.00       900
   macro avg       1.00      1.00      1.00       900
weighted avg       1.00      1.00      1.00       900



In [24]:
cm = confusion_matrix(train_prediction, y_train)

In [25]:
accuracy = cm.diagonal().sum()/cm.sum()
print(accuracy)

1.0


In [73]:
def bo_tune_xgb(max_depth, gamma, n_estimators ,learning_rate):
  params = {'max_depth': int(max_depth),
            'gamma': gamma,
            'n_estimators': int(n_estimators),
            'learning_rate':learning_rate,
            'subsample': 0.8,
            'eta': 0.1,
            'eval_metric': 'rmse',
            }
  cv_result = xgb.cv(params, dtrain, num_boost_round=70, nfold=5)
  return -1.0 * cv_result['test-rmse-mean'].iloc[-1]

In [74]:
xgb_bo = BayesianOptimization(bo_tune_xgb, {'max_depth': (3, 10),
                                             'gamma': (0, 1),
                                             'learning_rate':(0,1),
                                             'n_estimators':(100,150)
                                            })

In [75]:
xgb_bo.maximize(n_iter=2, init_points=8, acq='ei')

|   iter    |  target   |   gamma   | learni... | max_depth | n_esti... |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m-0.4657  [0m | [0m 0.05113 [0m | [0m 0.1383  [0m | [0m 4.97    [0m | [0m 123.6   [0m |
| [0m 2       [0m | [0m-0.4691  [0m | [0m 0.5944  [0m | [0m 0.09867 [0m | [0m 6.53    [0m | [0m 143.1   [0m |
| [0m 3       [0m | [0m-0.5626  [0m | [0m 0.4564  [0m | [0m 0.5881  [0m | [0m 3.485   [0m | [0m 113.6   [0m |
| [0m 4       [0m | [0m-0.4887  [0m | [0m 0.03935 [0m | [0m 0.2692  [0m | [0m 4.192   [0m | [0m 121.6   [0m |
| [0m 5       [0m | [0m-0.4859  [0m | [0m 0.8666  [0m | [0m 0.2499  [0m | [0m 4.608   [0m | [0m 117.5   [0m |
| [0m 6       [0m | [0m-0.5825  [0m | [0m 0.2493  [0m | [0m 0.6346  [0m | [0m 3.183   [0m | [0m 113.0   [0m |
| [0m 7       [0m | [0m-0.5622  [0m | [0m 0.6027  [0m | [0m 0.582   [0m | [0m 7.621   [0m | [0m 124.6   [0m 

In [76]:
params = xgb_bo.max['params']
print(params)

{'gamma': 0.05113333243656282, 'learning_rate': 0.13829759883059534, 'max_depth': 4.970455351622159, 'n_estimators': 123.63582130972148}


In [77]:
params['max_depth']= int(params['max_depth'])
params['n_estimators']= int(params['n_estimators'])

In [66]:
print(params)

{'max_depth': 6, 'gamma': 0.45, 'n_estimators': 150, 'learning_rate': 0.6, 'subsample': 0.8, 'eta': 0.1, 'eval_metric': 'rmse', 'objective': 'multi softmax'}


## Training the model with optimized parameters

In [78]:
classifier = xgb.XGBClassifier(**params)
classifier.fit(combined_features, y_train) 

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0.05113333243656282,
              learning_rate=0.13829759883059534, max_delta_step=0, max_depth=4,
              min_child_weight=1, missing=None, n_estimators=123, n_jobs=1,
              nthread=None, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

## predict test set

In [68]:
X_test_dense_features = dense_model.predict(x_test)
X_test_dense_features = X_test_dense_features.reshape(X_test_dense_features.shape[0], -1)

In [69]:
X_test_mobile_features = model_mobile.predict(x_test)
X_test_mobile_features = X_test_mobile_features.reshape(X_test_mobile_features.shape[0], -1)

In [79]:
combined_test_features = np.hstack((X_test_dense_features, X_test_mobile_features))

In [89]:
prediction = classifier.predict(combined_test_features)
Inverse le transform to get original label back. 
prediction = le.inverse_transform(prediction)

## accuracy score

In [90]:
print ("Accuracy = ", metrics.accuracy_score(y_test, prediction))

Accuracy =  0.8711111111111111
