In [3]:
import h5py
import numpy as np
import random
from tqdm import tqdm
import cv2
import os
imgdir = r'C:\Science Research\xraynet_data\images\images'
h5path = r'C:\Science Research\xraynet_data\images\images\dataset.h5'
batch_size = 256
num_classes = 2
epochs = 24
# input image dimensions
img_rows, img_cols = 128, 128

In [2]:
if not os.path.exists('dataset.h5'):
    from tflearn.data_utils import build_hdf5_image_dataset #module to create data and labels from given images

    #Deletes hidden .DS_Store file from sub
    try:
        for sub in os.listdir(imgdir):
            if not sub  == '.DS_Store':
                os.remove('{}/{}/.DS_Store'.format(imgdir,sub))
    except:
        pass

    build_hdf5_image_dataset(imgdir, #create the dataset from the images in imgdir
                             image_shape=(128, 128), #resizes all the images to a uniform size for training
                             mode='folder', #the images are in a folder
                             output_path=h5path, #saves data to a file
                             categorical_labels=True, #create labels for each category, normal or abnormal
                             grayscale=True, #make images black and white to remove unnecissary color noise
                             files_extension=['.png'],
                             normalize=True) #normalize the images by dividing by 255

    ##Only need run the above once##
else:
    print('H5 already exists')

curses is not supported on this machine (please install/reinstall curses for an optimal experience)


In [39]:
##LOAD IMAGES##
import h5py
import numpy as np #library for scientific computing
from tflearn.data_utils import shuffle
h5f = h5py.File(h5path, 'r') #open the file 'dataset.h5' in reading mode (like open())
data = h5f['X'][:-8704] # set the data to X
labels = h5f['Y'][:-8704] #set the labels to Y
data, labels = shuffle(data,labels) #shuffles the data and labels in parallel

In [41]:
collections.Counter(map(lambda x: x[0],labels))

Counter({0.0: 51708, 1.0: 51708})

# [0,1] == Normal

# [1,0] == Abnormal

# [Abnormalness, Normalness]

In [None]:
##CONSTRUCTING THE NETWORK##

#Below: import all of the layers and libraries for a TFLearn CNN
from tflearn.layers.core import input_data, fully_connected, dropout
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression

net = input_data(shape=(None,128,128)) #create layer to input data, in the same shape as our images (128x128)
net = conv_2d(net, 32, 3, activation='relu',regularizer='L2') # Create first convolutional layer, with 32 neurons, and 
#rectfied linear activation function
net = max_pool_2d(net, 2)
#create first max pool layer, which gets the maximum value from the area created by the filter

net = conv_2d(net, 64, 3, activation='relu',regularizer='L2') #same as above, but w/ 64 neurons
net = max_pool_2d(net, 2)

net = conv_2d(net, 128, 3, activation='relu',regularizer='L2') #same as above, but w/ 128 neurons
net = max_pool_2d(net, 2)

net = conv_2d(net, 256, 3, activation='relu',regularizer='L2') #256 neurons
net = max_pool_2d(net, 2)

net = conv_2d(net, 512, 3, activation='relu',regularizer='L2') #512 neurons
net = max_pool_2d(net, 2)

net = fully_connected(net, 1024, activation='relu') #1024 neurons
net = dropout(net, 0.8) #keep 80% of the outputs from this layer, given by .8 parameter
#helps eliminate chance for overfitting (the neural network memorizing the dataset)

net = fully_connected(net, 2, activation='softmax') #Create fully connected layer, 
#which is representative of our outputs, either normal, or abnormal
#uses softmax activation function, which is commonly used as last layer of NN

net = regression(net, optimizer='adam', learning_rate=1e-4, #perform logistic regression on the output, to try and create a model
#that matches the dataset, even though we have more than one output (why we can't perform linear regression)
                     loss='categorical_crossentropy') #use the categorical crossentropy loss function, used for binary classficiation (2 outputs - normal or abnormal)

In [None]:
##SETUP THE TRAINING##

from tflearn.models import dnn #import library for deep neural network
model = dnn.DNN(net, tensorboard_dir='log',tensorboard_verbose=3) #create the model from the net
#establish the directory to store the logs of the model

In [None]:
##TRAIN##
model.fit(data, labels, n_epoch=24, #train the model on the data and corresponding labels
           validation_set=.1, #establish set to validate the model on (aka the last 10% of the dataset)
           snapshot_step=200, show_metric=True, run_id='italiaNet') #show the progress of the NN, and give it a name