In [36]:
import os
import os.path
import shutil
import glob
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold                                                                                                                       
from sklearn.metrics import confusion_matrix,accuracy_score

import matplotlib.pyplot as plt
import matplotlib.cm as colormap
plt.rcParams['image.cmap'] = 'Pastel1'

import numpy as np
np.random.seed(1)

from keras.preprocessing.image import img_to_array
from keras.utils import np_utils
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50
from keras.applications.imagenet_utils import preprocess_input
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential,Model
from keras.layers import Input,Flatten,Dense,Dropout,GlobalAveragePooling2D,Conv2D,MaxPooling2D

In [13]:
imagedir = "retina-dataset"

In [14]:
cur_dir = os.getcwd()
os.chdir(imagedir)  # the parent folder with sub-folders

# Get number of samples per family
list_fams = sorted(os.listdir(os.getcwd()), key=str.lower)  # vector of strings with family names
no_imgs = []  # No. of samples per family
for i in range(len(list_fams)):
    os.chdir(list_fams[i])
    len1 = len(glob.glob('*.png'))  # assuming the images are stored as 'png'
    no_imgs.append(len1)
    os.chdir('..')
num_samples = np.sum(no_imgs)  # total number of all samples

# Compute the labels
y = np.zeros(num_samples)
pos = 0
label = 0
for i in no_imgs:
    print ("Label:%2d\tFamily: %15s\tNumber of images: %d" % (label, list_fams[label], i))
    for j in range(i):
        y[pos] = label
        pos += 1
    label += 1
num_classes = label

# Compute the features
width, height,channels = (224,224,3)
#X = np.zeros((num_samples, width, height, channels))
cnt = 0
list_paths = [] # List of image paths
print("Processing images ...")
for i in range(len(list_fams)):
    for img_file in glob.glob(list_fams[i]+'/*.png'):
        #print("[%d] Processing image: %s" % (cnt, img_file))
        list_paths.append(os.path.join(os.getcwd(),img_file))
        #img = image.load_img(img_file, target_size=(224, 224))
        #x = image.img_to_array(img)
        #x = np.expand_dims(x, axis=0)
        #x = preprocess_input(x)
        #X[cnt] = x
        cnt += 1
print("Images processed: %d" %(cnt))

os.chdir(cur_dir)

Label: 0	Family:         Exudate	Number of images: 67600
Label: 1	Family:     Non_Exudate	Number of images: 70043
Processing images ...
Images processed: 137643


In [21]:
# Create stratified k-fold subsets                                                                                                                                        
kfold = 10  # no. of folds                                                                 
skf = StratifiedKFold(kfold, shuffle=True,random_state=1)
skfind = [None] * kfold  # skfind[i][0] -> train indices, skfind[i][1] -> test indices
cnt = 0                                              
#for index in skf.split(X, y):    
for index in skf.split(resnet50features, y):         
    skfind[cnt] = index                                                 
    cnt += 1 

In [22]:
for i in range(kfold):
    print(skfind[i][1])

[    12     28     30 ..., 137586 137597 137613]
[    10     14     21 ..., 137633 137639 137641]
[    22     23     40 ..., 137616 137624 137629]
[     2      3     11 ..., 137609 137615 137622]
[     8     13     19 ..., 137600 137603 137610]
[    26     35     36 ..., 137606 137608 137621]
[     0      4      6 ..., 137623 137625 137626]
[     1      5     49 ..., 137637 137638 137642]
[    39     48     59 ..., 137619 137627 137635]
[     9     15     18 ..., 137634 137636 137640]


In [34]:
l = np.array(list_paths)
for i in range(kfold):
    print(l[skfind[i][1]])

['/home/edmar/GIT/Retina/retina-dataset/Exudate/Exudate__3_10_57.png'
 '/home/edmar/GIT/Retina/retina-dataset/Exudate/Exudate__4_15_176.png'
 '/home/edmar/GIT/Retina/retina-dataset/Exudate/Exudate__15_15_26.png' ...,
 '/home/edmar/GIT/Retina/retina-dataset/Non_Exudate/Non_Exudate_5_5_561.png'
 '/home/edmar/GIT/Retina/retina-dataset/Non_Exudate/Non_Exudate_7_8_235.png'
 '/home/edmar/GIT/Retina/retina-dataset/Non_Exudate/Non_Exudate_0_8_573.png']
['/home/edmar/GIT/Retina/retina-dataset/Exudate/Exudate__24_12_156.png'
 '/home/edmar/GIT/Retina/retina-dataset/Exudate/Exudate__4_17_2720.png'
 '/home/edmar/GIT/Retina/retina-dataset/Exudate/Exudate__5_13_379.png' ...,
 '/home/edmar/GIT/Retina/retina-dataset/Non_Exudate/Non_Exudate_4_1_307.png'
 '/home/edmar/GIT/Retina/retina-dataset/Non_Exudate/Non_Exudate_4_3_482.png'
 '/home/edmar/GIT/Retina/retina-dataset/Non_Exudate/Non_Exudate_4_3_574.png']
['/home/edmar/GIT/Retina/retina-dataset/Exudate/Exudate__19_9_51.png'
 '/home/edmar/GIT/Retina/reti

In [37]:
foldsdir = 'Folds' 
if not os.path.exists(foldsdir):
    os.makedirs(foldsdir)

l = np.array(list_paths)
for i in range(kfold):
    fdir = os.path.join(foldsdir,'Fold'+str(i))
    if not os.path.exists(fdir):
        os.makedirs(fdir)
    for fname in l[skfind[i][1]]:
        shutil.copy(fname,fdir)