In [1]:
import os
import cv2
import keras 
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras import regularizers
import pickle
import keras 
from keras import backend as K
from keras.layers import Dropout
from keras.models import Sequential
from keras.layers import Activation, MaxPooling2D
from keras.layers.core import Dense, Flatten
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.convolutional import *
from sklearn.metrics import confusion_matrix
from keras.constraints import max_norm
from keras import optimizers
from sklearn.metrics import precision_score, recall_score, f1_score
from keras.callbacks import ReduceLROnPlateau

In [2]:
# To mount drive, if you are at colab
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


# Import Data

In [3]:
data_dir     = '/content/gdrive/MyDrive/dataset anemia/'
india_folder = os.path.join(data_dir, 'India')
italy_folder = os.path.join(data_dir, 'Italy')

In [87]:
def should_use_img(img_name, img_type):
    if img_name.split(".")[1] in ["jpg", "png"]:
        img_name = img_name.split(".")[0]
        
        if img_type == "":
            img_types   = ["forniceal", "forniceal_palpebral", "palpebral"]
            for _type in img_types:
                if img_name.endswith(_type):
                    return False
            return True
        elif img_type == "palpebral":
            img_name = img_name.split(".")[0]
            if img_name.endswith("forniceal_palpebral"):
                    return False
            return True if img_name.endswith(img_type) else False
            
        else:
            return True if img_name.endswith(img_type)  else False
    
    else:
        return False
        

img_to_use  = ""
img_files   = {"India":[], "Italy":[]}

for folder in ['India', 'Italy']:
    img_folder = os.path.join(data_dir, folder)
    print("Looking into -> ", folder)
    for root, dirs, files in os.walk(img_folder):
        flag = False
        for file in files:
            if should_use_img(file, img_to_use):
                cls = root.split("\\")[-1]
                img_files[folder].append( ( os.path.join(root, file), cls) )
                flag = True
                break
        if flag is False:
            print(root, " not found -> ", files)


Looking into ->  India
/content/gdrive/MyDrive/dataset anemia/India  not found ->  ['.DS_Store', 'IndiaRd.xlsx']
Looking into ->  Italy
/content/gdrive/MyDrive/dataset anemia/Italy  not found ->  ['.DS_Store', 'Italyrd.xlsx']


In [88]:
for key, items in img_files.items():
    print(f"items for {key} -> {len(items)}")

items for India -> 95
items for Italy -> 123


In [89]:
img_files["India"][0]

('/content/gdrive/MyDrive/dataset anemia/India/95/20200318_130225.jpg',
 '/content/gdrive/MyDrive/dataset anemia/India/95')

In [90]:
data_dir = '/content/gdrive/MyDrive/dataset anemia/'
file1    = data_dir+'India/IndiaRd.xlsx'
file2    = data_dir+'Italy/Italyrd.xlsx'

In [91]:

labels = {}

d1 = pd.read_excel(file1)
d1["Anemia"] = d1["Note"]
d1 = d1[["Number", "Anemia" ]]
d1.replace("No anemia", "No Anemia", inplace=True)

d2 = pd.read_excel(file2)[["Number", "Anemia" ]]
d2.replace("No anemia", "No Anemia", inplace=True)

unique_values   = list(d1['Anemia'].unique())
enum            = {val:i for i, val in enumerate(unique_values)} 
print(enum)

d1['Anemia'].replace(enum, inplace=True)
d2['Anemia'].replace(enum, inplace=True)

labels["India"] = { row["Number"]:row["Anemia"] for index, row in d1.iterrows() }
labels["Italy"] = { row["Number"]:row["Anemia"] for index, row in d2.iterrows() }


{'No Anemia': 0, 'Anemia': 1}


# Read Images

In [92]:

X = []
y = []


for key, items in img_files.items():
    for item in items:
        img_path, cls = item
        cls           = cls.split("/")[-1]
        img           = cv2.imread(img_path)
        img           = cv2.resize(img, (64, 64))
        img           = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        kernel        = np.array([[-1,-1,-1], 
                                    [-1, 9,-1],
                                    [-1,-1,-1]])
        sharpened     = cv2.filter2D(img, -1, kernel)
        img           = cv2.GaussianBlur(sharpened, (5, 5), 0)
        img           = img.reshape(64, 64, 1)
        lbl           = labels[key][int(cls)]
        
        X.append(img)
        y.append(lbl)
        

In [95]:
print(f"len(X) -> {len(X)}")
print(f"len(y) -> {len(y)}")

len(X) -> 218
len(y) -> 218


In [96]:
y = [0 if i == 0 else 1 for i in y]

In [97]:
X = np.array(X)
X.shape

(218, 64, 64, 1)

In [98]:
set(y)

{0, 1}

In [175]:
number_classes = len(set(y))
number_classes

2

In [176]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=0.8, random_state=0)

# Augmentation

In [177]:
X_train.shape

(174, 64, 64, 1)

In [178]:
characters  = X_train
labels      = np.array(Y_train.copy()) 

In [179]:
datagen = ImageDataGenerator( width_shift_range=[-200,200],
    rotation_range=30,
    zoom_range=0.15,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest")



In [180]:
iterator = datagen.flow(characters, labels)

In [181]:
set(labels)

{0, 1}

In [182]:
label_counts = [(lbl, list(labels).count(lbl) ) for lbl in set(labels)]
label_counts

[(0, 102), (1, 72)]

In [183]:
augment_lbls = [0, 1]
NUM = 1500
sample_count = {item:0 for item in augment_lbls}
x_aug = []
y_aug = []
while True:
    x_tmp, y_tmp = iterator.next()
    for x, y in zip(x_tmp, y_tmp):
    if y in augment_lbls:
        if sample_count[y] < NUM:
            x_aug.append(x)
            y_aug.append(y)
            sample_count[y] += 1
    if sum([sample_count[key] >= NUM for key in sample_count.keys()]) == len(sample_count.keys()):
    break

In [184]:
set(y_aug)

{0, 1}

In [185]:
label_counts = [(lbl, y_aug.count(lbl) ) for lbl in set(y_aug)]
label_counts

[(0, 500), (1, 500)]

In [186]:
np.array(x_aug).shape

(1000, 64, 64, 1)

In [187]:
type(characters)

numpy.ndarray

In [188]:
characters.shape

(174, 64, 64, 1)

In [189]:
characters = list(characters)
characters.extend(x_aug)
characters = np.array(characters)
characters.shape

(1174, 64, 64, 1)

In [190]:
labels = list(labels)
labels.extend(y_aug)
len(labels)

1174

In [191]:
label_counts = [(lbl, labels.count(lbl) ) for lbl in set(labels)]
label_counts

[(0, 602), (1, 572)]

# Data Spliting

In [192]:
characters.shape

(1174, 64, 64, 1)

In [193]:
X = characters
Y = labels
number_classes = len(set(labels))

In [194]:

X_train, X_val, Y_train, Y_val= train_test_split(X, Y, train_size=0.8, random_state=0)

In [195]:
from keras.utils import np_utils

Y_train = keras.utils.np_utils.to_categorical(Y_train, number_classes)
Y_test = keras.utils.np_utils.to_categorical(Y_test, number_classes)
Y_val = keras.utils.np_utils.to_categorical(Y_val, number_classes)

#Y = keras.utils.to_categorical(Y, number_classes)

In [196]:
print(len(X_train))
print(len(X_val))
print(len(X_test))

939
235
44


# Create Model

In [123]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization

In [233]:

input_shape = X_test[0].shape
drop_rate= 0.30
k3=(3, 3)
k5=(5, 5)
k7= (7, 7)
model = Sequential(layers=[
    #1st Convolutional layer
    Conv2D(filters=256, kernel_size=k5, padding='same', activation='relu', input_shape=input_shape),  
    BatchNormalization(),
    MaxPooling2D(pool_size=(3, 3), strides=None, padding='valid', data_format=None),
    Dropout(rate=drop_rate),
    #2nd Convolutional layer
    Conv2D(filters=128, kernel_size=k3, padding='same', activation='relu'), 
    BatchNormalization(),
    Dropout(rate=drop_rate),
    #3rd Convolutional layer
    Conv2D(filters=128, kernel_size=k3, padding='same', activation='relu'), 
    BatchNormalization(),
    MaxPooling2D(pool_size=(3, 3), strides=None, padding='valid', data_format=None),
    Dropout(rate=drop_rate),
    
    #Flat layer
    Flatten(),
    
    #6th Layer
    Dense(4096, activation='relu'),
    BatchNormalization(),
    Dropout(drop_rate),
    
    Dense(number_classes, activation='softmax') 
])

In [234]:
model.compile(optimizer=Adam(learning_rate=.00001), loss='categorical_crossentropy', metrics=['accuracy'])

# Fit

In [244]:
!rm -r models
import os
os.makedirs('./models')

In [245]:
i =0
epochs = 10
train_loss = []
val_loss = []
val_acc = []
train_acc = []
while i <= epochs:
    print(i)
    hist = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=1, shuffle=True, batch_size=32) #callbacks=[learning_rate_reduction]) 
    model.save_weights('./models/model'+str(i)+'.hd5')
    i = i+1
  #break

0
1
2
3
4
5
6
7
8
9
10


In [246]:

i = 0
precission, recall, f1_, test_acc, test_loss = ([] for i in range(5))
while i <= epochs:
   
    if i%1 == 0:
        model.load_weights('./models/model' + str(i) + '.hd5')
        predict_x=model.predict(X_test) 
        yp       =np.argmax(predict_x,axis=1) 
        #yp = model.predict_classes(X_test)
        ya = [np.where(p==1)[0][0] for p in Y_test]

        score = model.evaluate(X_test, Y_test)
        pre = precision_score(ya, yp, average='weighted')
        rec = recall_score(ya, yp, average='macro')
        f1 = f1_score(ya, yp, average='micro') 
        test_acc.append(score[1])
        test_loss.append(score[0])
        precission.append(pre)
        recall.append(recall)
        f1_.append(f1)
        print( str(i)+': Testing Score: '+str(score[1]*100) + ', Loss: ' + str(score[0]))
    i = i + 1

0: Testing Score: 75.0, Loss: 0.7206766605377197
1: Testing Score: 72.72727489471436, Loss: 0.8086675405502319
2: Testing Score: 70.45454382896423, Loss: 0.9012613892555237
3: Testing Score: 75.0, Loss: 0.7819459438323975
4: Testing Score: 72.72727489471436, Loss: 0.7795547842979431
5: Testing Score: 75.0, Loss: 0.7632985711097717
6: Testing Score: 75.0, Loss: 0.7388763427734375
7: Testing Score: 75.0, Loss: 0.8117219805717468
8: Testing Score: 75.0, Loss: 0.7850360870361328
9: Testing Score: 75.0, Loss: 0.7573883533477783
10: Testing Score: 70.45454382896423, Loss: 0.8849029541015625


In [219]:
print('Acc: ' + str(max(test_acc))+ ' at '+ str(np.argmax(test_acc)) )
print('loss: '+ str(min(test_loss))+ ' at '+ str(np.argmax(test_loss)) )

Acc: 0.8181818127632141 at 0
loss: 0.6473844647407532 at 10


In [210]:
model.load_weights('./models/model' + str(10) + '.hd5')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f200f75dcd0>

In [211]:
model.evaluate(X_test, Y_test)



[0.6131115555763245, 0.8409090638160706]

# Save Model For Later Use

In [212]:
model.save('model_palpebral_84_09.hd5')

INFO:tensorflow:Assets written to: model_palpebral_84_09.hd5/assets


In [None]:
print('Acc: ' + str(max(test_acc))+ ' at '+ str(np.argmax(test_acc)) )
print('loss: '+ str(min(test_loss))+ ' at '+ str(np.argmax(test_loss)) )

In [249]:
!zip -r model_org_84_09.zip model_org_84_09.hd5/

  adding: model_org_84_09.hd5/ (stored 0%)
  adding: model_org_84_09.hd5/keras_metadata.pb (deflated 93%)
  adding: model_org_84_09.hd5/variables/ (stored 0%)
  adding: model_org_84_09.hd5/variables/variables.data-00000-of-00001 (deflated 9%)
  adding: model_org_84_09.hd5/variables/variables.index (deflated 65%)
  adding: model_org_84_09.hd5/assets/ (stored 0%)
  adding: model_org_84_09.hd5/saved_model.pb (deflated 89%)


In [None]:
!mv model_org_84_09.zip /content/gdrive/MyDrive/

In [None]:
from tensorflow.keras.models import load_model

model1 = load_model('model_org_84_09.hd5')

In [None]:
model1.evaluate(X_test, Y_test)