In [1]:
import pandas as pd 
from PIL import Image
import cv2                 
import numpy as np         
import os                  
from resizeimage import resizeimage
print(os.listdir("Documents/chest_xray"))

['.DS_Store', 'test', 'train', 'val']


In [2]:
print(os.listdir("Documents/chest_xray/train"))
print(os.listdir("Documents/chest_xray/test"))

['.DS_Store', 'NORMAL', 'PNEUMONIA']
['.DS_Store', 'NORMAL', 'PNEUMONIA']


In [3]:
TRAIN_DIR = "Documents/chest_xray/train/"
TEST_DIR = "Documents/chest_xray/test/"

In [4]:
def get_label(Dir):
    for nextdir in os.listdir(Dir):
        if not nextdir.startswith('.'):
            if nextdir in ['NORMAL']:
                label = 0
            elif nextdir in ['PNEUMONIA']:
                label = 1
            else:
                label = 2
    return nextdir, label

In [5]:
def preprocessing_data(Dir):
    X = []
    y = []
    
    for nextdir in os.listdir(Dir):
        nextdir, label = get_label(Dir)
        temp = Dir + nextdir
        
        for image_filename in tqdm(os.listdir(temp)):
            path = os.path.join(temp + '/' , image_filename)
            img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
           
            if img is not None:
                print(img.shape)
                img = skimage.transform.resize(img, (50, 50, 3))
                img = np.asarray(img)
                X.append(img)
                y.append(label)
            
    X = np.asarray(X)
    y = np.asarray(y)
    
    return X,y

In [6]:
#X_train, y_train = preprocessing_data(TRAIN_DIR)

In [20]:
def get_data(Dir):
    X = []
    y = []
    for nextDir in os.listdir(Dir):
        if not nextDir.startswith('.'):
            if nextDir in ['NORMAL']:
                label = 0
            elif nextDir in ['PNEUMONIA']:
                label = 1
            else:
                label = 2
                
            temp = Dir + nextDir
                
            for file in os.listdir(temp):
               # img = cv2.imread(temp + '/' + file)
               # if img is not None:
                if file != '.DS_Store':
                    with open(temp + '/' + file, 'r+b') as f:
                        with Image.open(f) as img:
                            if len(np.array(img).shape) == 2:
                                img = resizeimage.resize_cover(img, [100,100])
                                img = np.asarray(img)
                                X.append(img)
                                y.append(label)
                    
    X = np.asarray(X)
    y = np.asarray(y)
    return X,y

In [21]:
X_train, y_train = get_data(TRAIN_DIR)

In [22]:
X_test, y_test = get_data (TEST_DIR)

In [23]:
print(X_train.shape,'\n', X_test.shape)

(4933, 100, 100) 
 (624, 100, 100)


In [24]:
print(y_train.shape,'\n',y_test.shape)

(4933,) 
 (624,)


In [25]:
y_train = y_train.reshape(y_train.shape[0], 1)

y_test = y_test.reshape(y_test.shape[0], 1)

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1] * X_train.shape[2])

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1] * X_test.shape[2])

In [26]:
print(y_train.shape,'\n',y_test.shape)
print(X_train.shape,'\n', X_test.shape)

(4933, 1) 
 (624, 1)
(4933, 10000) 
 (624, 10000)


In [27]:
X_train = X_train / 255
y_train = y_train / 255
X_test = X_test / 255
y_test = y_test / 255

In [28]:
#The activation function
def sigmoid (x):
    activation = 1 / (1 + np.exp(-x))
    
    return activation

In [29]:
#This initialize w and b
def initialize_parameters(dim):
    w = (np.random.rand(dim, 1) - 0.5) * 0.01 
    b = 0
    
    return w, b

In [30]:
#Forward Propogration
def forward_propogration(w, b, X, Y):
    Z = np.dot(X, w) + b
    A = sigmoid(Z)
        
    return Z, A

In [31]:
#Calculate the cost
def cost(A, Y, m):
    J = (-1/m) * np.sum((Y * np.log(A)) + (1-Y) * np.log(1-A))
    
    return J

In [32]:
#Backward Propogration
def backward_propogration(A, Y, X, m):
    dz = A - Y
    dw = (1/m) * np.dot(X.T, dz)
    db = (1/m) * np.sum(dz, axis = 0, keepdims = True)
    
    return dz, dw, db

In [33]:
#Gradient Descent
def grads (w, b, dw, db, learning_rate = 0.05):
    w_new = w - learning_rate * dw
    b_new = b - learning_rate * db
    
    return w_new, b_new

In [34]:
#Predictions
def predict(w, b, X, Y):
    y_prediction = np.zeros((X.shape[0], 1))
    A = sigmoid(np.dot(X, w) + b)
    
    for i in range(X.shape[0]):
        if A[i,0] < 0.5:
            y_prediction[i, 0] = 0
        else:
            y_prediction[i, 0] = 1
            
    return y_prediction

In [35]:
#Test Run of Neural Network
w,b = initialize_parameters(X_train.shape[1])
m = y_train.shape[0]
learning_rate = 0.00001

for i in range(500):
    Z, A = forward_propogration(w, b, X_train, y_train)

    J = cost(A, y_train, m)
    
    dz, dw, db = backward_propogration(A, y_train, X_train, m)
    
    w = w - (learning_rate * dw)
    b = b - (learning_rate * db)

    if i%10 == 0:
        print("Cost after iteration " + str(i) + ": " + str(J))
               
Y_prediction_test = predict(w, b, X_test, y_test)
Y_prediction_train = predict(w, b, X_train, y_train)

print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - y_test)) * 100))

Cost after iteration 0: 0.6457844622312814
Cost after iteration 10: 0.5762609499518568
Cost after iteration 20: 0.5175734757746663
Cost after iteration 30: 0.4678365580399438
Cost after iteration 40: 0.42547206067046195
Cost after iteration 50: 0.38918143977731856
Cost after iteration 60: 0.357906736838542
Cost after iteration 70: 0.33079016002876016
Cost after iteration 80: 0.3071370996891244
Cost after iteration 90: 0.2863844174404284
Cost after iteration 100: 0.26807425307795163
Cost after iteration 110: 0.2518328599872865
Cost after iteration 120: 0.23735371978977368
Cost after iteration 130: 0.22438416189353555
Cost after iteration 140: 0.21271479045610547
Cost after iteration 150: 0.20217113125273958
Cost after iteration 160: 0.1926070216585242
Cost after iteration 170: 0.18389936508682486
Cost after iteration 180: 0.1759439529222776
Cost after iteration 190: 0.16865212266177884
Cost after iteration 200: 0.16194807270567396
Cost after iteration 210: 0.15576669450089065
Cost after