In [1]:
import cv2
import random
import numpy as np
from os import listdir
from os.path import join
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow as imshow
from sklearn.model_selection import train_test_split as tts

from tensorflow.keras import Input
from tensorflow.keras.optimizers import Adam,SGD
from tensorflow.keras.layers import Flatten,Dense
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model,save_model
from tensorflow.keras.initializers import lecun_uniform

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!curl -o dataset.zip "ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip" && mv dataset.zip /content
!unzip -q -o dataset.zip
!mkdir /content/images/

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  337M  100  337M    0     0  32.8M      0  0:00:10  0:00:10 --:--:-- 40.2M
mv: 'dataset.zip' and '/content/dataset.zip' are the same file
mkdir: cannot create directory ‘/content/images/’: File exists


In [4]:
def isimage(source):
  exts=[".png",".jpg"]
  aux=source.lower()
  for ext in exts:
    if ext in aux:
      return True
  return False

def center_image(img,size):
  h, w, _ = img.shape
  back = np.zeros([size,size,3],'uint8')
  hh, ww, _ = back.shape
  yoff = round((hh-h)/2)
  xoff = round((ww-w)/2)
  back[yoff:yoff+h, xoff:xoff+w,:] = img
  return back

def create_image(source,max_size,channel):
  img = cv2.imread(source,cv2.COLOR_BGR2RGB)
  img=center_image(img,max_size)
  sums=np.sum(img,axis=2)
  x,y=np.shape(sums)
  for i in range(x):
    for j in range(y):
      if sums[i,j]==0:
        img[i,j,channel]=255
  cv2.destroyAllWindows
  return img

def load_X(files):
  X = [cv2.resize(cv2.imread(file,cv2.COLOR_BGR2RGB),(224,224)) for file in files]
  return np.asarray(X)/255

def load_y(y):
  return np.asarray(y)

def loader(X_train,y_train, batch_size):
    L = len(X_train)
    while True:
        batch_start = 0
        batch_end = batch_size
        while batch_start < L:
            limit = min(batch_end, L)
            X=load_X(X_train[batch_start:limit])
            y = load_y(y_train[batch_start:limit])

            yield (X,y) #a tuple with two numpy arrays with batch_size samples     

            batch_start += batch_size   
            batch_end += batch_size

def shuffled(X,y):
  data = list(zip(X, y))
  random.shuffle(data)
  return zip(*data)

def unbalanced(class_0,class_1,perc,total,test=2000):
  test=int(test/2)
  if perc>1:
    return None
  limit_0=int(perc*total)
  limit_1=total-limit_0
  if limit_0+test<=len(class_0) and limit_1+test<=len(class_1):
    X_train=class_0[:limit_0]+class_1[:limit_1]
    y_train=[0]*limit_0+[1]*limit_1
    X_train,y_train=shuffled(X_train,y_train)

    X_test=class_0[limit_0:limit_0+test]+class_1[limit_1:limit_1+test]
    y_test=[0]*test+[1]*test
    X_test,y_test=shuffled(X_test,y_test)

    return X_train,X_test,y_train,y_test
  else:
    return None

In [5]:
#0: uninfected
#1: parasitized
path_0="/content/cell_images/Uninfected"
path_1="/content/cell_images/Parasitized"

files=listdir(path_0)
imgs_0=[join(path_0,file) for file in files if isimage(file)]

files=listdir(path_1)
imgs_1=[join(path_1,file) for file in files if isimage(file)]

X_train, X_test, y_train, y_test=unbalanced(imgs_0,imgs_1,0.5,20000,2400)

#print(len(X_train),len(X_test),len(y_train),len(y_test))
#X_train, X_test, y_train, y_test = tts(X, y, test_size=0.1, random_state=42)

In [11]:
resnet =ResNet50(weights='imagenet')

output = resnet.layers[-3].output
initializer = lecun_uniform(seed=None)

x = Flatten()(output)
x = Dense(100, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)

model = Model(resnet.input, x)

opt = SGD(lr=0.001, momentum=0.9)
model.compile(optimizer=opt, loss='binary_crossentropy',metrics=['accuracy'])

In [12]:
number_of_samples=len(X_train)
batch_size=10
epochs=5
steps_per_epoch=int(number_of_samples/batch_size)

X_val=load_X(X_test)
y_val=load_y(y_test)

history=model.fit(
    loader(X_train,y_train,batch_size),
    steps_per_epoch=steps_per_epoch,
    validation_data=(X_val,y_val),
    epochs=epochs)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
save_model(model,'resnet.h5') #Guardamos el modelo