<a href="https://colab.research.google.com/github/ayandalab/Deep_Learning_Knee_X_ray/blob/main/Deep_Learning_Knee_X_ray.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [37]:
from google_drive_downloader import GoogleDriveDownloader as gdd

gdd.download_file_from_google_drive(file_id='1NdDqPK4NLn2aV8ZdF5ilux1sfG6IyebC',
dest_path='content/KneeXray.zip',
unzip=True)

In [38]:
import pandas as pd #
import numpy as np # 
import tensorflow as tf # 
import os # 
import cv2 # 
from sklearn.model_selection import train_test_split # 
from sklearn.metrics import f1_score

In [39]:
labels = pd.read_csv("/content/content/KneeXray/Train.csv") # loading the labels
labels.head() #

Unnamed: 0,filename,label
0,Image_1.jpg,0
1,Image_2.jpg,1
2,Image_3.jpg,0
3,Image_4.jpg,1
4,Image_5.jpg,2


In [40]:
labels.tail() #

Unnamed: 0,filename,label
7823,Image_7824.jpg,0
7824,Image_7825.jpg,4
7825,Image_7826.jpg,0
7826,Image_7827.jpg,2
7827,Image_7828.jpg,1


In [41]:
file_paths = [[fname, '/content/content/KneeXray/train/' + fname] for fname in labels['filename']]

In [42]:
# Confirm if number of images is same as number of labels given
if len(labels) == len(file_paths):
    print('Number of labels i.e. ', len(labels), 'matches the number of filenames i.e. ', len(file_paths))
else:
    print('Number of labels does not match the number of filenames')

Number of labels i.e.  7828 matches the number of filenames i.e.  7828


In [43]:
images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
images.head()

Unnamed: 0,filename,filepaths
0,Image_1.jpg,/content/content/KneeXray/train/Image_1.jpg
1,Image_2.jpg,/content/content/KneeXray/train/Image_2.jpg
2,Image_3.jpg,/content/content/KneeXray/train/Image_3.jpg
3,Image_4.jpg,/content/content/KneeXray/train/Image_4.jpg
4,Image_5.jpg,/content/content/KneeXray/train/Image_5.jpg


In [44]:
train_data = pd.merge(images, labels, how = 'inner', on = 'filename')
train_data.head()

Unnamed: 0,filename,filepaths,label
0,Image_1.jpg,/content/content/KneeXray/train/Image_1.jpg,0
1,Image_2.jpg,/content/content/KneeXray/train/Image_2.jpg,1
2,Image_3.jpg,/content/content/KneeXray/train/Image_3.jpg,0
3,Image_4.jpg,/content/content/KneeXray/train/Image_4.jpg,1
4,Image_5.jpg,/content/content/KneeXray/train/Image_5.jpg,2


In [45]:
data = [] # initialize an empty numpy array
image_size = 100 # image size taken is 100 here. one can take other size too
for i in range(len(train_data)):

    img_array = cv2.imread(train_data['filepaths'][i], cv2.IMREAD_GRAYSCALE) # 

    new_img_array = cv2.resize(img_array, (image_size, image_size)) # 
    data.append([new_img_array, train_data['label'][i]])

In [46]:
# image pixels of a image
data[0]

[array([[114, 118, 125, ..., 113, 108, 102],
        [115, 119, 124, ..., 110, 108,  99],
        [113, 118, 122, ..., 109, 104, 100],
        ...,
        [ 82,  81,  83, ..., 111, 113, 110],
        [ 81,  80,  84, ..., 114, 114, 112],
        [ 82,  82,  85, ..., 119, 115, 114]], dtype=uint8), 0]

In [47]:
np.random.shuffle(data)

In [48]:
x = []
y = []
for image in data:
    x.append(image[0])
    y.append(image[1])

# converting x & y to numpy array as they are list
x = np.array(x)
y = np.array(y)

In [49]:
np.unique(y, return_counts=True)

(array([0, 1, 2, 3, 4]), array([3085, 1416, 2062, 1029,  236]))

In [50]:
x = x.reshape(-1, 100, 100, 1)

In [51]:
# split the data
X_train, X_val, y_train, y_val = train_test_split(x,y,test_size=0.3, random_state = 42)

In [52]:
cnn = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 1)),
tf.keras.layers.MaxPooling2D((2, 2)),

tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),

# tf.keras.layers.Flatten(input_shape=(100, 100, 1)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(16, activation='relu'),
tf.keras.layers.Dense(8, activation='relu'),
tf.keras.layers.Dense(5, activation='sigmoid')
])

In [53]:
cnn.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])

In [54]:
cnn.fit(X_train, y_train, epochs=15, batch_size=20)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7f2713ec7810>

In [55]:
cnn.evaluate(X_val, y_val)



[1.637315034866333, 0.3929331600666046]

In [56]:
# Loading the order of the image's name that has been provided
test_image_order = pd.read_csv("/content/content/KneeXray/Test.csv")
test_image_order.head()

Unnamed: 0,filename
0,Image_1.jpg
1,Image_2.jpg
2,Image_3.jpg
3,Image_4.jpg
4,Image_5.jpg


In [57]:
file_paths = [[fname, '/content/content/KneeXray/test/' + fname] for fname in test_image_order['filename']]

In [58]:
# Confirm if number of images is same as number of labels given
if len(test_image_order) == len(file_paths):
    print('Number of image names i.e. ', len(test_image_order), 'matches the number of file paths i.e. ', len(file_paths))
else:
    print('Number of image names does not match the number of filepaths')

Number of image names i.e.  1958 matches the number of file paths i.e.  1958


In [59]:
test_images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
test_images.head()

Unnamed: 0,filename,filepaths
0,Image_1.jpg,/content/content/KneeXray/test/Image_1.jpg
1,Image_2.jpg,/content/content/KneeXray/test/Image_2.jpg
2,Image_3.jpg,/content/content/KneeXray/test/Image_3.jpg
3,Image_4.jpg,/content/content/KneeXray/test/Image_4.jpg
4,Image_5.jpg,/content/content/KneeXray/test/Image_5.jpg


In [60]:
test_pixel_data = [] # initialize an empty numpy array
image_size = 100 # image size taken is 100 here. one can take other size too
for i in range(len(test_images)):

    img_array = cv2.imread(test_images['filepaths'][i], cv2.IMREAD_GRAYSCALE) # converting the image to gray scale

    new_img_array = cv2.resize(img_array, (image_size, image_size)) # resizing the image array

    test_pixel_data.append(new_img_array)

In [61]:
test_pixel_data = np.array(test_pixel_data)

In [62]:
test_pixel_data = test_pixel_data.reshape(-1, 100, 100, 1)

In [63]:
pred = cnn.predict(test_pixel_data)

In [64]:
# The predicted values are the probabilities value
pred[0]

array([0.6728878 , 0.4839225 , 0.58772254, 0.40800232, 0.13510704],
      dtype=float32)

In [65]:
prediction = []
for value in pred:
    prediction.append(np.argmax(value))

In [66]:
prediction[0]

0

In [67]:
res = pd.DataFrame({'filename': test_images['filename'], 'label': prediction}) # prediction is nothing but the final predictions of your model on input features of your new unseen test data
res.to_csv("submission.csv", index = False)

# To download the csv file locally
from google.colab import files
files.download('submission.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>