<a href="https://colab.research.google.com/github/jimtete/pneumonia-detection-nov21/blob/main/Notebooks/LSTM_DetNET_TypeR_VTEC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**A recursive approach into solving the problem**

In [None]:
#Connects notebook with google drive.
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/MyDrive/Machine\ Learning\ 2021

Mounted at /content/gdrive
/content/gdrive/MyDrive/Machine Learning 2021


#**Importing the libraries**

In [None]:
import gc
import numpy as np
from PIL import Image, ImageDraw
from google.colab.patches import cv2_imshow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
import cv2
import pandas as pd
import os
import shutil
import random
import glob
import matplotlib.pyplot as plt
import warnings
import sys

np.set_printoptions(threshold=sys.maxsize)
warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline



#**Custom made methods**

In [None]:
'''Parameters
----------
image : ndarray
    Input image data. Will be converted to float.
mode : str
    One of the following strings, selecting the type of noise to add:

    'gauss'     Gaussian-distributed additive noise.
    'poisson'   Poisson-distributed noise generated from the data.
    's&p'       Replaces random pixels with 0 or 1.
    'speckle'   Multiplicative noise using out = image + n*image,where
                n is uniform noise with specified mean & variance.'''
def ResizeImage(x,y):

  golden_ratio = 0.95

  position_ratio = (1-golden_ratio)/2
  size_ratio = golden_ratio+position_ratio

  new_x = ((int)(x*position_ratio))
  new_y = ((int)(y*position_ratio))
  new_width = ((int)(x*size_ratio))
  new_height = ((int)(y*size_ratio))

  return new_x,new_y,new_width,new_height




def noisyGauss(image):
  row,col,ch= image.shape
  mean = 0
  var = 0.1
  sigma = var**0.5
  gauss = np.random.normal(mean,sigma,(row,col,ch))
  gauss = gauss.reshape(row,col,ch)
  noisy = image + gauss
  return noisy

def noisySnP(image):
  row,col,ch = image.shape
  s_vs_p = 0.5
  amount = 0.04
  out = np.copy(image)
  # Salt mode
  num_salt = np.ceil(amount * image.size * s_vs_p)
  coords = [np.random.randint(0, i - 1, int(num_salt))
          for i in image.shape]
  out[coords] = 255

  # Pepper mode
  num_pepper = np.ceil(amount* image.size * (1. - s_vs_p))
  coords = [np.random.randint(0, i - 1, int(num_pepper))
          for i in image.shape]
  out[coords] = 0
  return out

def noisyPoisson(image):
  vals = len(np.unique(image))
  vals = 2 ** np.ceil(np.log2(vals))
  noisy = np.random.poisson(image * vals) / float(vals)
  return noisy

def noisySpeckle(image):
  row,col,ch = image.shape
  gauss = np.random.randn(row,col,ch)
  gauss = gauss.reshape(row,col,ch)        
  noisy = image + image * gauss/6
  return noisy

def change_contrast(img, level):
  factor = (259 * (level + 255)) / (255 * (259 - level))
  def contrast(c):
    return 128 + factor * (c-128)
  return img.point(contrast)



##**Custom made paths**

In [None]:
train_path = "train_images/train_images"
test_path = "test_images/test_images"

#**Reading the image data**

##**Training set**

In [None]:
%cd /content/gdrive/My Drive/Machine Learning 2021/Data

data = pd.read_csv("train_labels.csv")['file_name']
y_train = (pd.read_csv("labels.csv")['class_id']).to_numpy()

%cd /content/gdrive/My Drive/Machine Learning 2021/Data/train_rnn

x_train = np.zeros((4672,69,69))

i = 0
a = 4000
for file_name in data:
  temp = Image.open(file_name).convert("L")
  x_train[i] = np.array(temp)

  i+=1
  print("%.2f" % round(((i/4672)*100), 2),"% done...")

gc.collect()
print(x_train.shape)

/content/gdrive/My Drive/Machine Learning 2021/Data
/content/gdrive/My Drive/Machine Learning 2021/Data/train_rnn
0.02 % done...
0.04 % done...
0.06 % done...
0.09 % done...
0.11 % done...
0.13 % done...
0.15 % done...
0.17 % done...
0.19 % done...
0.21 % done...
0.24 % done...
0.26 % done...
0.28 % done...
0.30 % done...
0.32 % done...
0.34 % done...
0.36 % done...
0.39 % done...
0.41 % done...
0.43 % done...
0.45 % done...
0.47 % done...
0.49 % done...
0.51 % done...
0.54 % done...
0.56 % done...
0.58 % done...
0.60 % done...
0.62 % done...
0.64 % done...
0.66 % done...
0.68 % done...
0.71 % done...
0.73 % done...
0.75 % done...
0.77 % done...
0.79 % done...
0.81 % done...
0.83 % done...
0.86 % done...
0.88 % done...
0.90 % done...
0.92 % done...
0.94 % done...
0.96 % done...
0.98 % done...
1.01 % done...
1.03 % done...
1.05 % done...
1.07 % done...
1.09 % done...
1.11 % done...
1.13 % done...
1.16 % done...
1.18 % done...
1.20 % done...
1.22 % done...
1.24 % done...
1.26 % done...
1

##**For validation data**

In [None]:
"""%cd /content/gdrive/My Drive/Machine Learning 2021/Data

data = pd.read_csv("val_labels.csv")['file_name']
y_val = (pd.read_csv("val_labels.csv")['class_id']).to_numpy()

%cd /content/gdrive/My Drive/Machine Learning 2021/Data/cropped_95_percento/val_images

x_val = np.zeros((672,224,224))

i = 0
for file_name in data:
  temp = Image.open(file_name).convert("L")
  x_val[i] = np.array(temp)

  i+=1
  print("%.2f" % round(((i/672)*100), 2),"% done...")

gc.collect()
print(x_val.shape)"""

'%cd /content/gdrive/My Drive/Machine Learning 2021/Data\n\ndata = pd.read_csv("val_labels.csv")[\'file_name\']\ny_val = (pd.read_csv("val_labels.csv")[\'class_id\']).to_numpy()\n\n%cd /content/gdrive/My Drive/Machine Learning 2021/Data/cropped_95_percento/val_images\n\nx_val = np.zeros((672,224,224))\n\ni = 0\nfor file_name in data:\n  temp = Image.open(file_name).convert("L")\n  x_val[i] = np.array(temp)\n\n  i+=1\n  print("%.2f" % round(((i/672)*100), 2),"% done...")\n\ngc.collect()\nprint(x_val.shape)'

##**For test images**

In [None]:
%cd /content/gdrive/My Drive/Machine Learning 2021/Data/test_rnn

test_image_name_list = []
x_test = np.zeros(((1168,69,69)))
i = 0
for file_name in glob.glob("*.jpg"):
  test_image_name_list.append(file_name)
  temp = Image.open(file_name).convert("L")
  x_test[i] = np.array(temp)
  print("%.2f" % round(((i/1168)*100), 2),"% done...")
  i+=1

print(x_test.shape)

/content/gdrive/My Drive/Machine Learning 2021/Data/test_rnn
0.00 % done...
0.09 % done...
0.17 % done...
0.26 % done...
0.34 % done...
0.43 % done...
0.51 % done...
0.60 % done...
0.68 % done...
0.77 % done...
0.86 % done...
0.94 % done...
1.03 % done...
1.11 % done...
1.20 % done...
1.28 % done...
1.37 % done...
1.46 % done...
1.54 % done...
1.63 % done...
1.71 % done...
1.80 % done...
1.88 % done...
1.97 % done...
2.05 % done...
2.14 % done...
2.23 % done...
2.31 % done...
2.40 % done...
2.48 % done...
2.57 % done...
2.65 % done...
2.74 % done...
2.83 % done...
2.91 % done...
3.00 % done...
3.08 % done...
3.17 % done...
3.25 % done...
3.34 % done...
3.42 % done...
3.51 % done...
3.60 % done...
3.68 % done...
3.77 % done...
3.85 % done...
3.94 % done...
4.02 % done...
4.11 % done...
4.20 % done...
4.28 % done...
4.37 % done...
4.45 % done...
4.54 % done...
4.62 % done...
4.71 % done...
4.79 % done...
4.88 % done...
4.97 % done...
5.05 % done...
5.14 % done...
5.22 % done...
5.31 % do

#**Checking the shape of the arrays and pre-processing the target data**

In [None]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes=3)
#y_val = tf.keras.utils.to_categorical(y_val, num_classes=3)

print("x_train shape: {}".format(x_train.shape))
print("y_train shape: {}".format(y_train.shape))
#print("x_val shape: {}".format(x_val.shape))
#print("y_val shape: {}".format(y_val.shape))
print("x_test shape: {}".format(x_test.shape))
print("test image name list length: {}".format(len(test_image_name_list)))

x_train shape: (4672, 69, 69)
y_train shape: (4672, 3)
x_test shape: (1168, 69, 69)
test image name list length: 1168


In [None]:
x_train = x_train.reshape(4672,69*69)
x_test = x_test.reshape(1168,69*69)

print("x_train shape: {}".format(x_train.shape))
print("y_train shape: {}".format(y_train.shape))
#print("x_val shape: {}".format(x_val.shape))
#print("y_val shape: {}".format(y_val.shape))
print("x_test shape: {}".format(x_test.shape))
print("test image name list length: {}".format(len(test_image_name_list)))

x_train shape: (4672, 4761)
y_train shape: (4672, 3)
x_test shape: (1168, 4761)
test image name list length: 1168


#**Start training**

In [None]:
gc.collect()
import tensorflow.keras.backend as Ke
Ke.clear_session()
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
try:
    del model
    print('Model deleted')
except:
    print('No model to delete')

Model deleted


In [None]:
model = keras.Sequential()

model.add(layers.Embedding(input_dim=(69*69), output_dim=256))
model.add(layers.LSTM(256,input_shape=(1,69*69)))
model.add(layers.Embedding(input_dim=(256), output_dim=128))
model.add(layers.LSTM(128,input_shape=(1,128)))
model.add(layers.Embedding(input_dim=(128), output_dim=64))
model.add(layers.LSTM(64,input_shape=(1,64)))
model.add(layers.Embedding(input_dim=(64), output_dim=32))
model.add(layers.LSTM(32,input_shape=(1,32)))

#model.add(layers.LSTM(64,input_shape=(1,224*224)))
#model.add(layers.LSTM(32,input_shape=(1,224*224)))
model.add(layers.Dense(3))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 256)         1218816   
                                                                 
 lstm (LSTM)                 (None, 256)               525312    
                                                                 
 embedding_1 (Embedding)     (None, 256, 128)          32768     
                                                                 
 lstm_1 (LSTM)               (None, 128)               131584    
                                                                 
 embedding_2 (Embedding)     (None, 128, 64)           8192      
                                                                 
 lstm_2 (LSTM)               (None, 64)                33024     
                                                                 
 embedding_3 (Embedding)     (None, 64, 32)            2

In [None]:
model.compile(
    loss='categorical_crossentropy',
    optimizer="sgd",
    metrics=["accuracy"]
)

In [None]:
gc.collect()
%cd /content/gdrive/My Drive/Machine Learning 2021/Data

history = model.fit(
    x_train,y_train,
    batch_size=40,
    epochs=150
)

/content/gdrive/My Drive/Machine Learning 2021/Data
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150

In [None]:
y_test = np.zeros(1168)
y_test_categorical = model.predict(x_test,verbose=1)

index=0
for i in y_test_categorical:
  a,b,c = i
  if a>b and a>c:
    y_test[index] = 0
  elif b>a and b>c:
    y_test[index] = 1
  elif c>a and c>b:
    y_test[index] = 2
  print(i)
  print(y_test[index])
  index = index+1

In [None]:
# Using above second method to create a
# 2D array
rows, cols = (1168, 2)
exported_predictions=[]
print(y_test.shape)
for i in range(rows):
    col = []
    for j in range(cols):
        col.append(0)
    exported_predictions.append(col)

for i in range(1168):
  exported_predictions[i][0] = test_image_name_list[i]
  exported_predictions[i][1] = int(y_test[i])

print(exported_predictions)

%cd /content/gdrive/My Drive/Machine Learning 2021/Data
export = pd.DataFrame(exported_predictions)
export.columns=['file_name','class_id']
export.to_csv("predictions/DetNET-TypeR-VTEC-02.csv",index = False)

