In [1]:
from keras import applications
from keras import models
from keras import layers
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, Activation, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras import regularizers
import matplotlib.pyplot as plt
from keras.models import load_model
from keras.utils.np_utils import to_categorical 
from keras.callbacks import ModelCheckpoint
from keras import Model
from keras import initializers
from keras.callbacks import LearningRateScheduler
from keras.utils import layer_utils, np_utils
from keras.applications.inception_v3 import preprocess_input
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.datasets import make_classification
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
from sklearn.cross_validation import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from sklearn.utils import class_weight
import seaborn as sn
from scipy import interp
from itertools import cycle
from sklearn.decomposition import TruncatedSVD
from sklearn.ensemble import RandomForestClassifier
from scipy.sparse import csr_matrix

Using TensorFlow backend.


In [2]:
image_width = 224
image_height = 224

# Change the batchsize according to your system RAM
batch_size = 64

train_dir = "C:/Users/hp/Desktop/Diabetic_retinopathy_dataset_kaggle/original_new/"

inceptionv3_base = applications.InceptionV3(weights='imagenet', include_top=False, input_shape=(image_width, image_height, 3))

In [3]:
flatten = Flatten()
feature_extraction_layer = flatten(inceptionv3_base.get_layer(index=-1).output)
model = Model(inputs=inceptionv3_base.input, outputs=feature_extraction_layer)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 111, 111, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 111, 111, 32) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 111, 111, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

In [4]:
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(image_width, image_height),
        batch_size=batch_size,
        class_mode=None,  # this means our generator will only yield batches of data, no labels
        shuffle=False)

nb_train_samples = len(train_generator.filenames)  
num_classes = len(train_generator.class_indices)

# get the class lebels for the training data, in the original order  
train_labels = train_generator.classes  
   
# convert the training labels to categorical vectors  
train_labels = to_categorical(train_labels, num_classes=num_classes)

Found 744 images belonging to 5 classes.


In [5]:
# # the predict_generator method returns the output of a model, given
# # a generator that yields batches of numpy data
# bottleneck_features_train = model.predict_generator(train_generator, nb_train_samples // batch_size + 1)
# # save the output as a Numpy array
# np.save(open('C:/Users/hp/Desktop/Diabetic_retinopathy_dataset_kaggle/models/inceptionV3/bottle_neck_features/bottleneck_features_train_without_gap_svd.npy', 'wb'), bottleneck_features_train)

In [6]:
train_data = np.load(open('C:/Users/hp/Desktop/Diabetic_retinopathy_dataset_kaggle/models/inceptionV3/bottle_neck_features/bottleneck_features_train_without_gap_svd.npy', 'rb'))
#test_data = np.load(open('D:/retinal_data_set_visioncare/models/inceptionV3/bottle_neck_features/bottleneck_features_test_without_aug.npy', 'rb'))

In [7]:
train_data_labels = train_generator.classes

In [8]:
# Feature Scaling - fature normalizing
scaler = StandardScaler()
X_train = scaler.fit_transform(train_data)

In [9]:
len(X_train)

744

In [10]:
train_data_labels = train_generator.classes  

X = X_train
Y = train_data_labels

In [11]:
len(Y)

744

In [12]:
np.unique(train_data_labels)

array([0, 1, 2, 3, 4])

In [13]:
class_weight = class_weight.compute_class_weight('balanced'
                                               ,np.unique(train_data_labels)
                                               ,train_data_labels)

In [14]:
dict(enumerate(class_weight))

{0: 0.3241830065359477,
 1: 1.488,
 2: 1.984,
 3: 3.381818181818182,
 4: 2.2545454545454544}

In [15]:
class_weight_dic = dict(enumerate(class_weight))

In [16]:
# define 5-fold cross validation test harness
#kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
kfold = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=25)
cvscores = []
trainScores = []
f1Score = []
num_k_folds = 5
fold_counter = 0
val_conmats = []
val_precisions = []
val_recalls = []
val_f_scores = []
input_dim = X_train.shape[1:][0]

rf_classifier = RandomForestClassifier(n_estimators=500,max_features=20,max_depth=5,min_samples_leaf=40,criterion="gini", class_weight=class_weight_dic, n_jobs=-1)

In [17]:
for train, test in kfold.split(X, Y):
    # Fit the model
    rf_classifier.fit(X[train], Y[train])
    
    y_train_pred = rf_classifier.predict(X[train])
    #y_train_pred = np.argmax(y_train_pred, axis=1)
    
    y_validation_pred = rf_classifier.predict(X[test])
    #y_validation_pred = np.argmax(y_validation_pred, axis=1)
    #y_validation_pred = np.argmax(y_validation_pred, axis=1)
    
    [precision, recall, f_score, _] = precision_recall_fscore_support(Y[test], y_validation_pred)
    #print("Validation k-fold #%d - precision: %f, recallL: %f, f-score: %f" % (fold_counter, precision, recall, f_score))
    
    conmat = confusion_matrix(Y[test], y_validation_pred)
    
    val_precisions.append(precision)
    val_recalls.append(recall)
    val_f_scores.append(f_score)
    val_conmats.append(conmat)
    fold_counter = fold_counter + 1
    
    trainScores.append(accuracy_score(Y[train], y_train_pred))
    cvscores.append(accuracy_score(Y[test], y_validation_pred))
    
print("\nAveraging the 5-fold results:")
print("%s: %.2f%%" % ('AVG Train Acc ', np.mean(trainScores) * 100))
print("%s: %.2f%%" % ('AVG Validation Acc ', np.mean(cvscores) * 100))
print("Validation precision - mean: %f, stddev: %f" % (np.mean(val_precisions), np.std(val_precisions)))
print("Validation recall - mean: %f, stddev: %f" % (np.mean(val_recalls), np.std(val_recalls)))
print("Validation f-score - mean: %f, stddev: %f" % (np.mean(val_f_scores), np.std(val_f_scores)))
print("Confusion matrix:")
print (sum(val_conmats).astype(float) / fold_counter)


Averaging the 5-fold results:
AVG Train Acc : 84.54%
AVG Validation Acc : 61.96%
Validation precision - mean: 0.524987, stddev: 0.230918
Validation recall - mean: 0.505572, stddev: 0.247291
Validation f-score - mean: 0.486283, stddev: 0.221706
Confusion matrix:
[[66.84 20.36  1.12  0.32  3.16]
 [ 9.84  9.04  0.32  0.    0.8 ]
 [ 6.88  2.36  2.72  0.88  2.16]
 [ 0.28  0.64  1.4   3.52  2.96]
 [ 0.84  0.48  0.24  1.56 10.08]]
