<a href="https://colab.research.google.com/github/arymaina/ML/blob/master/lungs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from tqdm import tqdm

In [0]:
DATADIR = "/content/drive/My Drive/train"
CATEGORIES = ["PNEUMONIA", "NORMAL"]

IMG_SIZE = 150

training_data = []

def create_training_data():
    for category in CATEGORIES:  # 

        path = os.path.join(DATADIR,category)  # 
        class_num = CATEGORIES.index(category)  # get the classification  (0 or a 1). 0=PNEUMONIA 1=NORMAL

        for img in tqdm(os.listdir(path)):  # iterate over each image per noraml and pneumonia
            try:
                img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_GRAYSCALE)  # convert to array
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize to normalize data size
                training_data.append([new_array, class_num])  # add this to our training_data
            except Exception as e:  # in the interest in keeping the output clean...
                pass
            #except OSError as e:
            #    print("OSErrroBad img most likely", e, os.path.join(path,img))
            #except Exception as e:
            #    print("general exception", e, os.path.join(path,img))

create_training_data()

print(len(training_data))

100%|██████████| 3875/3875 [00:34<00:00, 111.26it/s]
100%|██████████| 1342/1342 [00:30<00:00, 44.72it/s]

5216





In [0]:
import random

random.shuffle(training_data)

for sample in training_data[:10]:
    print(sample[1])

X = []
y = []

for features,label in training_data:
    X.append(features)
    y.append(label)

print(X[0].reshape(-1, IMG_SIZE, IMG_SIZE, 1))

X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

In [0]:
import pickle

pickle_out = open("X.pickle","wb")
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open("y.pickle","wb")
pickle.dump(y, pickle_out)
pickle_out.close()

pickle_in = open("X.pickle","rb")
X = pickle.load(pickle_in)

pickle_in = open("y.pickle","rb")
y = pickle.load(pickle_in)

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
# more info on callbakcs: https://keras.io/callbacks/ model saver is cool too.
from tensorflow.keras.callbacks import TensorBoard

import time

In [0]:
pickle_in = open("X.pickle","rb")
X = pickle.load(pickle_in)

pickle_in = open("y.pickle","rb")
y = pickle.load(pickle_in)

X = X/255.0

X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

y = np.array(y)

dense_layers = [0]

In [0]:
layer_sizes = [64]
conv_layers = [3]

for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            NAME = "{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time()))
            print(NAME)

            model = Sequential()

            model.add(Conv2D(layer_size, (3, 3), input_shape=X.shape[1:]))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2)))

            for l in range(conv_layer-1):
                model.add(Conv2D(layer_size, (3, 3)))
                model.add(Activation('relu'))
                model.add(MaxPooling2D(pool_size=(2, 2)))

            model.add(Flatten())

            for _ in range(dense_layer):
                model.add(Dense(layer_size))
                model.add(Activation('relu'))

            model.add(Dense(1))
            model.add(Activation('sigmoid'))

            tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

            model.compile(loss='binary_crossentropy',
                          optimizer='adam',
                          metrics=['accuracy'],
                          )
            

            model.fit(X, y,
                      batch_size=32,
                      epochs=25,
                      validation_split=0.3,
                      callbacks=[tensorboard])

3-conv-64-nodes-0-dense-1587233258
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [0]:
TEST_DATADIR = "/content/drive/My Drive/test"
test_data = []
def create_test_data():
    for category in CATEGORIES:  # 

        path = os.path.join(TEST_DATADIR,category)  # 
        class_num = CATEGORIES.index(category)  # get the classification  (0 or a 1). 0=PNEUMONIA 1=NORMAL

        for img in tqdm(os.listdir(path)):  # iterate over each image per dogs and cats
            try:
                img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_GRAYSCALE)  # convert to array
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize to normalize data size
                test_data.append([new_array, class_num])  # add this to our training_data
            except Exception as e:  # in the interest in keeping the output clean...
                pass
            #except OSError as e:
            #    print("OSErrroBad img most likely", e, os.path.join(path,img))
            #except Exception as e:
            #    print("general exception", e, os.path.join(path,img))

create_test_data()

100%|██████████| 390/390 [00:02<00:00, 152.12it/s]
100%|██████████| 234/234 [00:03<00:00, 69.48it/s]


In [0]:
Z_ = []
W_ = []

for features,label in test_data:
    Z_.append(features)
    W_.append(label)

Z_ = np.array(Z_).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

Z_ = Z_/255.0

Z_ = np.array(Z_).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

W_ = np.array(W_)

model.evaluate(x=Z_, y=W_, batch_size=32)
# eval = model.evalutate(x=Z_, y=W_, batch_size=32)



[3.15144944190979, 0.7467948794364929]

In [0]:
 path = '/content/drive/My Drive/test/PNEUMONIA/'
 path2 = '/content/drive/My Drive/test/NORMAL/'
 test_data = []
 test_data2 = []
 def prepare(filepath):
    IMG_SIZE = 150  

    img_array = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
    new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
    return new_array.reshape(-1, IMG_SIZE, IMG_SIZE, 1)

 for img in tqdm(os.listdir(path)):  # iterate over each test image 
            try:
                test_data.append(prepare(os.path.join(path,img)) )  # add this to our test_data
            except Exception as e:  # in the interest in keeping the output clean...
                pass
 for img in tqdm(os.listdir(path2)):  # iterate over each test image 
            try:
                test_data2.append(prepare(os.path.join(path2,img)) )  # add this to our test_data
            except Exception as e:  # in the interest in keeping the output clean...
                pass

100%|██████████| 390/390 [00:02<00:00, 148.12it/s]
100%|██████████| 234/234 [00:03<00:00, 67.41it/s]


In [0]:
test_data