<a href="https://colab.research.google.com/github/jimtete/pneumonia-detection-nov21/blob/main/ResNet-50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Within this notebook we are going to preprocess the data with a padding and a resize to 224*224

In [None]:
#Connects notebook with google drive.
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/MyDrive/Machine\ Learning\ 2021

Mounted at /content/gdrive
/content/gdrive/MyDrive/Machine Learning 2021


In [None]:
import numpy as np
from PIL import Image, ImageDraw
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
import pandas as pd
import itertools
import os
import shutil
import random
import glob
import matplotlib.pyplot as plt
import warnings
import sys

np.set_printoptions(threshold=sys.maxsize)
warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline



In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0],True)

Num GPUs Available:  1


#**Data preperation**

In [None]:
train_path = "train_images/train_images"
test_path = "test_images/test_images"


###**Choose your desired size**
```size = 256,256```

In [None]:
size = 224,224


#**Adding Padding to x-ray images and resizing them**

In [None]:
%cd $train_path

imageNamesList = []

k = 0;
for file_name in glob.glob("*.jpg"):
  img = Image.open(file_name)

  ##Adding padding to the x-ray
  width, height = img.size
  top,left = 0,0

  if width>height:
    newHeight,newWidth = width,width
    top = (int)((width-height)/2)
  else:
    newHeight,newWidth = height,height
    left = (int)((height-width)/2)

  result = Image.new(img.mode, (newWidth, newHeight))
  result.paste(img,(left,top))
  img = result.resize(size)


  img.save("../../Data/train_images/"+file_name)

  k = k+1;

  if(k % 50 ==0):
    print("Finished no {} out of 4672".format(k))
  


%cd ../..

/content/gdrive/My Drive/Machine Learning 2021/train_images/train_images


#**Save train images into x_train and y_train**

In [None]:
%cd Data
data = pd.read_csv("labels.csv")['file_name']
y_train = pd.read_csv("labels.csv")['class_id'].to_numpy()

%cd train_images
x_train = np.zeros(((4672,224,224,3)))
i=0;
for file_name in data:
  temp = Image.open(file_name).convert("RGB")
  x_train[i] = np.array(temp)
  print("%.2f" % round(((i/4672)*100), 2),"% done...")
  i+=1




/content/gdrive/MyDrive/Machine Learning 2021/Data
/content/gdrive/MyDrive/Machine Learning 2021/Data/train_images
0.00 % done...
0.02 % done...
0.04 % done...
0.06 % done...
0.09 % done...
0.11 % done...
0.13 % done...
0.15 % done...
0.17 % done...
0.19 % done...
0.21 % done...
0.24 % done...
0.26 % done...
0.28 % done...
0.30 % done...
0.32 % done...
0.34 % done...
0.36 % done...
0.39 % done...
0.41 % done...
0.43 % done...
0.45 % done...
0.47 % done...
0.49 % done...
0.51 % done...
0.54 % done...
0.56 % done...
0.58 % done...
0.60 % done...
0.62 % done...
0.64 % done...
0.66 % done...
0.68 % done...
0.71 % done...
0.73 % done...
0.75 % done...
0.77 % done...
0.79 % done...
0.81 % done...
0.83 % done...
0.86 % done...
0.88 % done...
0.90 % done...
0.92 % done...
0.94 % done...
0.96 % done...
0.98 % done...
1.01 % done...
1.03 % done...
1.05 % done...
1.07 % done...
1.09 % done...
1.11 % done...
1.13 % done...
1.16 % done...
1.18 % done...
1.20 % done...
1.22 % done...
1.24 % done...


In [None]:
%cd ../test_images

test_image_name_list = []
x_test = np.zeros(((1168,224,224,3)))
i = 0
for file_name in glob.glob("*.jpg"):
  test_image_name_list.append(file_name)
  temp = Image.open(file_name).convert("RGB")
  x_test[i] = np.array(temp)
  print("%.2f" % round(((i/1168)*100), 2),"% done...")
  i+=1

/content/gdrive/My Drive/Machine Learning 2021/Data/test_images
0.00 % done...
0.09 % done...
0.17 % done...
0.26 % done...
0.34 % done...
0.43 % done...
0.51 % done...
0.60 % done...
0.68 % done...
0.77 % done...
0.86 % done...
0.94 % done...
1.03 % done...
1.11 % done...
1.20 % done...
1.28 % done...
1.37 % done...
1.46 % done...
1.54 % done...
1.63 % done...
1.71 % done...
1.80 % done...
1.88 % done...
1.97 % done...
2.05 % done...
2.14 % done...
2.23 % done...
2.31 % done...
2.40 % done...
2.48 % done...
2.57 % done...
2.65 % done...
2.74 % done...
2.83 % done...
2.91 % done...
3.00 % done...
3.08 % done...
3.17 % done...
3.25 % done...
3.34 % done...
3.42 % done...
3.51 % done...
3.60 % done...
3.68 % done...
3.77 % done...
3.85 % done...
3.94 % done...
4.02 % done...
4.11 % done...
4.20 % done...
4.28 % done...
4.37 % done...
4.45 % done...
4.54 % done...
4.62 % done...
4.71 % done...
4.79 % done...
4.88 % done...
4.97 % done...
5.05 % done...
5.14 % done...
5.22 % done...
5.31 %

#**Check the shape of the arrays**

In [None]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)

print("x_train shape: {}".format(x_train.shape))
print("y_train shape: {}".format(y_train.shape))
print("x_test shape: {}".format(x_test.shape))
print("test image name list length: {}".format(len(test_image_name_list)))

x_train shape: (4672, 224, 224, 3)
y_train shape: (4672, 3)
x_test shape: (1168, 224, 224, 3)
test image name list length: 1168


In [None]:
import tensorflow.keras.backend as Ke
Ke.clear_session()
try:
    del model
    print('Model deleted')
except:
    print('No model to delete')

No model to delete


In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

In [None]:
import tensorflow.keras as K
input_t = K.Input(shape=(224,224,3))
res_model = K.applications.ResNet50(include_top = False,
                                    weights="imagenet",
                                    input_tensor = input_t)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
import tensorflow as tf
to_res = (224, 224)

model = K.models.Sequential()
model.add(K.layers.Lambda(lambda image: tf.image.resize(image, to_res)))
model.add(res_model)
model.add(K.layers.Flatten())
model.add(K.layers.Dense(3, activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',
                  optimizer=K.optimizers.RMSprop(learning_rate=0.0000004),
                  metrics=['accuracy'])

In [None]:
history = model.fit(x_train, y_train, 
                    batch_size=32, epochs=22, verbose=1, 
                    validation_data=(None))

Epoch 1/22
Epoch 2/22
Epoch 3/22
Epoch 4/22
Epoch 5/22
Epoch 6/22
Epoch 7/22
Epoch 8/22
Epoch 9/22
Epoch 10/22
Epoch 11/22
Epoch 12/22
Epoch 13/22
Epoch 14/22
Epoch 15/22
Epoch 16/22
Epoch 17/22
Epoch 18/22
Epoch 19/22
Epoch 20/22
Epoch 21/22
Epoch 22/22


In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lambda (Lambda)             (None, 224, 224, 3)       0         
                                                                 
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 flatten (Flatten)           (None, 100352)            0         
                                                                 
 dense (Dense)               (None, 3)                 301059    
                                                                 
Total params: 23,888,771
Trainable params: 23,835,651
Non-trainable params: 53,120
_________________________________________________________________


In [None]:
y_test = np.zeros(1168)
y_test_categorical = model.predict(x_test,verbose=1)




In [None]:
index=0
for i in y_test_categorical:
  a,b,c = i
  if a>b and a>c:
    y_test[index] = 0
  elif b>a and b>c:
    y_test[index] = 1
  elif c>a and c>b:
    y_test[index] = 2
  print(i)
  print(y_test[index])
  index = index+1

[1.0000000e+00 5.0846499e-08 1.2217103e-09]
0.0
[4.1924462e-08 9.9949265e-01 5.0733885e-04]
1.0
[9.9999368e-01 3.1017480e-06 3.1672166e-06]
0.0
[9.9999952e-01 8.8382407e-08 4.0486208e-07]
0.0
[9.9999952e-01 1.8430667e-07 2.0234640e-07]
0.0
[9.9264994e-15 6.0010855e-08 9.9999988e-01]
2.0
[1.1264318e-11 8.2007086e-09 1.0000000e+00]
2.0
[2.750840e-03 9.968761e-01 3.730171e-04]
1.0
[1.0000000e+00 1.6923495e-14 6.1662511e-09]
0.0
[1.4004641e-05 5.1816070e-05 9.9993420e-01]
2.0
[9.9987435e-01 3.4630146e-05 9.0955233e-05]
0.0
[9.8609071e-08 7.4648488e-01 2.5351503e-01]
1.0
[2.4083220e-09 9.9999809e-01 1.9025407e-06]
1.0
[1.1014822e-09 9.9743491e-01 2.5651252e-03]
1.0
[9.9287194e-01 6.9268891e-03 2.0120971e-04]
0.0
[9.9002141e-01 2.1706783e-04 9.7615244e-03]
0.0
[9.8294199e-01 1.7056251e-02 1.8069984e-06]
0.0
[6.5642693e-05 2.2923870e-03 9.9764204e-01]
2.0
[1.7089836e-03 1.1788179e-04 9.9817312e-01]
2.0
[1.7034261e-05 9.9998271e-01 2.6678421e-07]
1.0
[8.2041924e-14 1.0000000e+00 9.4251362e-10]

In [None]:
# Using above second method to create a
# 2D array
rows, cols = (1168, 2)
exported_predictions=[]
print(y_test.shape)
for i in range(rows):
    col = []
    for j in range(cols):
        col.append(0)
    exported_predictions.append(col)

for i in range(1168):
  exported_predictions[i][0] = test_image_name_list[i]
  exported_predictions[i][1] = int(y_test[i])

print(exported_predictions)

(1168,)
[['img_943999834212424978.jpg', 0], ['img_9041102629474061625.jpg', 1], ['img_8910557226104159684.jpg', 0], ['img_9036007147622061008.jpg', 0], ['img_935944140711976007.jpg', 0], ['img_9056846072161814641.jpg', 2], ['img_89674115523456831.jpg', 2], ['img_9085138795147025469.jpg', 1], ['img_9055447089309368165.jpg', 0], ['img_9206820654982296001.jpg', 2], ['img_9059612294556593638.jpg', 0], ['img_1067627733181002215.jpg', 1], ['img_1162351550721440578.jpg', 1], ['img_1102208210595046337.jpg', 1], ['img_1260586497798188644.jpg', 0], ['img_1085919836874816265.jpg', 0], ['img_1037372824298815463.jpg', 0], ['img_1330693339587696783.jpg', 2], ['img_1123863876791783558.jpg', 2], ['img_1289148150235016680.jpg', 1], ['img_1037910048520457739.jpg', 1], ['img_1292614197918388809.jpg', 2], ['img_1082179150533558647.jpg', 0], ['img_1307931683460582151.jpg', 1], ['img_1207853828423997041.jpg', 1], ['img_1131369692869272887.jpg', 1], ['img_1219280710752744599.jpg', 1], ['img_11282973478458714

In [None]:
%cd train_images

/content/gdrive/My Drive/Machine Learning 2021/Data/train_images


In [None]:
%cd ..
export = pd.DataFrame(exported_predictions)
export.columns=['file_name','class_id']
export.to_csv("predictions/DetNETTypeR50_135Epochs_17.csv",index = False)


/content/gdrive/My Drive/Machine Learning 2021/Data


In [None]:
export

#**Για αύριο 22/12 Δοκίμασε ακριβώς το ίδιο δίκτυο αλλά για 100 εποχές.**