<a href="https://colab.research.google.com/github/balling/ml-hackpack/blob/master/Chest_X_Ray.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A bit of setup

First, change your runtime to GPU so that the network is trained faster.

![dropdown](https://github.com/balling/ml-hackpack/raw/master/img/dropdown.png =400x) ![settings](https://github.com/balling/ml-hackpack/raw/master/img/settings.png =400x)

Then, make sure you run the two cells below!

In [0]:
#@title Import the necessary libraries
import os
import numpy as np
import pandas as pd
from skimage.io import imread 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
from keras.utils import plot_model
from numpy.random import seed
from tensorflow import set_random_seed

In [0]:
#@title Download the images
!git clone https://github.com/balling/ml-hackpack.git

# Expore Chest X-Ray Dataset


![xray](https://github.com/balling/ml-hackpack/blob/master/data/train/00000001_000.png?raw=true =300x)

In [10]:
image = imread('ml-hackpack/data/train/00000001_000.png', True)
print(image.shape)
print(image)

(1024, 1024)
[[202 199 195 ...   5   2   0]
 [199 196 195 ...   5   2   0]
 [196 194 193 ...   5   2   0]
 ...
 [255 255 255 ...   0   0   0]
 [255 255 254 ...   0   0   0]
 [255 255 255 ...   0   0   0]]


# Let's start building the model!

In [12]:
seed(42)
set_random_seed(42)

NUM_IMG = 25

def get_training_data(train_path, labels_path):
	train_images = []
	train_files = []
	for filename in os.listdir(train_path):
		if filename.endswith(".png"):
			train_files.append(train_path + filename)

	features = []
		
	for i, train_file in enumerate(train_files):
			if i >= NUM_IMG: break
			train_image = imread(train_file, True)
			feature_set = np.asarray(train_image)
			features.append(feature_set)

	labels_df = pd.read_csv(labels_path) #["Finding Labels"]
	labels_df = labels_df["Finding Labels"]
	labels = np.zeros(NUM_IMG) # 0 for no finding, 1 for finding.

	# loading all labels
	for i in range(NUM_IMG):
		if (labels_df[i] == 'No Finding'):
			labels[i] = 0
		else:
			labels[i] = 1
	images = np.expand_dims(np.array(features), axis=3).astype('float32') / 255 # adding single channel
	return images, labels
	
X_train, y_train = get_training_data("ml-hackpack/data/train/", "ml-hackpack/data/train-labels.csv")
X_test, y_test = get_training_data("ml-hackpack/data/test/", "ml-hackpack/data/test-labels.csv")

model = Sequential()

model.add(Conv2D(4, (3, 3), strides=(2,2), activation='relu', input_shape=(1024, 1024, 1), data_format='channels_last'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(4, (3, 3), strides=(2,2), activation='relu'))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
 
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
 
model.fit(X_train, y_train, batch_size=8, nb_epoch=10, verbose=1)
 
score = model.evaluate(X_test, y_test, verbose=0)




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
