# Usando a ResNet50 para treinar sem data augmentation e sem windowing

In [None]:
import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Flatten, Dense, Dropout, Rescaling
from tensorflow.keras import optimizers
from tensorflow.keras.utils import image_dataset_from_directory

!pip install pydicom
import numpy as np
import pydicom
from PIL import Image
import os

import cv2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydicom
  Downloading pydicom-2.3.0-py3-none-any.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 7.3 MB/s 
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.3.0


In [None]:
!mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

# Ajustando o DS

## Baixando o DS

In [None]:
!kaggle datasets download -d pedroamaro/smaller-rsna-ds-train-test

Downloading smaller-rsna-ds-train-test.zip to /content
100% 7.25G/7.25G [01:40<00:00, 109MB/s] 
100% 7.25G/7.25G [01:40<00:00, 77.8MB/s]


In [None]:
!unzip /content/smaller-rsna-ds-train-test.zip | awk 'BEGIN {ORS=" "} {if(NR%1000==0)print "."}'
!rm /content/smaller-rsna-ds-train-test.zip

. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 

In [None]:
print('Numero de imagens: NO')
!ls /content/small_rsna_ds_train/no | wc -l
print('Numero de imagens: YES')
!ls /content/small_rsna_ds_train/yes | wc -l

Numero de imagens: NO
20047
Numero de imagens: YES
3287


## Salvando como JPG e aplicando full windowing

In [None]:
!mkdir small_rsna_ds_as_jpg
!mkdir /content/small_rsna_ds_as_jpg/no
!mkdir /content/small_rsna_ds_as_jpg/yes

!mkdir test_as_jpg
!mkdir test_as_jpg/no
!mkdir test_as_jpg/yes

In [None]:
def save_W_as_jpg(path, id, dest_folder):

  dcm = pydicom.dcmread(path)

  im = dcm.pixel_array.astype("float32") * dcm.RescaleSlope + dcm.RescaleIntercept

  rescaled_im = (np.maximum(im, 0)/im.max())*255.0
  final_im = np.uint8(rescaled_im)

  final_im = Image.fromarray(final_im)
  final_im.save(dest_folder+id+'.jpg')

### Para o conjunto Train

In [None]:
train_path = "/content/small_rsna_ds_train/yes/"
folder = os.listdir("/content/small_rsna_ds_train/yes/")
dest_folder = "/content/small_rsna_ds_as_jpg/yes/"

for id_ in folder:
  img_id = id_[:12]
  save_W_as_jpg(train_path + f"{id_}", img_id, dest_folder)

In [None]:
train_path = "/content/small_rsna_ds_train/no/"
folder = os.listdir("/content/small_rsna_ds_train/no/")
dest_folder = "/content/small_rsna_ds_as_jpg/no/"

for id_ in folder:
  img_id = id_[:12]
  save_W_as_jpg(train_path + f"{id_}", img_id, dest_folder)

In [None]:
print('Numero de imagens: NO')
!ls /content/small_rsna_ds_as_jpg/no/ | wc -l
print('Numero de imagens: YES')
!ls /content/small_rsna_ds_as_jpg/yes/ | wc -l

Numero de imagens: NO
20047
Numero de imagens: YES
3287


### Para o conjunto Test

In [None]:
train_path = "/content/small_rsna_ds_test/yes/"
folder = os.listdir("/content/small_rsna_ds_test/yes/")
dest_folder = "/content/test_as_jpg/yes/"

for id_ in folder:
  img_id = id_[:12]
  save_W_as_jpg(train_path + f"{id_}", img_id, dest_folder)

In [None]:
train_path = "/content/small_rsna_ds_test/no/"
folder = os.listdir("/content/small_rsna_ds_test/no/")
dest_folder = "/content/test_as_jpg/no/"

for id_ in folder:
  img_id = id_[:12]
  save_W_as_jpg(train_path + f"{id_}", img_id, dest_folder)

  import sys


In [None]:
print('Numero de imagens: NO')
!ls /content/test_as_jpg/no/ | wc -l
print('Numero de imagens: YES')
!ls /content/test_as_jpg/yes/ | wc -l

Numero de imagens: NO
10022
Numero de imagens: YES
1643


# Carregando o dataset

In [None]:
train_data = image_dataset_from_directory(
    "/content/small_rsna_ds_as_jpg",
     labels='inferred',
     validation_split=1/3,
     subset="training",
     seed=123,
     batch_size = 64,
     color_mode = "rgb"
     )

Found 23334 files belonging to 2 classes.
Using 15556 files for training.


In [None]:
val_data = image_dataset_from_directory(
    "/content/small_rsna_ds_as_jpg",
     labels='inferred',
     validation_split=1/3,
     subset="validation",
     seed=123,
     batch_size = 64,
     color_mode = "rgb"
)

Found 23334 files belonging to 2 classes.
Using 7778 files for validation.


In [None]:
test_data = image_dataset_from_directory(
    "/content/test_as_jpg",
     labels='inferred',
     label_mode="int",
     batch_size = 64,
     color_mode = "rgb"
     )

Found 11665 files belonging to 2 classes.


In [None]:
class_names = train_data.class_names

## Normalizando e Pegando o tamanho de entrada

In [None]:
normalization_layer = Rescaling(1./255)

normalized_data = train_data.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_data))

normalized_val_data = val_data.map(lambda x, y: (normalization_layer(x), y))
val_image_batch, val_labels_batch = next(iter(normalized_val_data))

normalized_test_data = test_data.map(lambda x, y: (normalization_layer(x), y))
test_image_batch, test_labels_batch = next(iter(normalized_test_data))

# ResNet

In [None]:
keras.backend.clear_session()

In [None]:
resnet_model = Sequential()

resnet = tf.keras.applications.resnet50.ResNet50(
    include_top=False,
    input_shape=image_batch.shape[1:],
    pooling='avg',
    weights='imagenet',
    classes=2,
)

resnet_model.add(resnet)

resnet_model.add(Flatten())

resnet_model.add(Dense(512, activation="relu"))

resnet_model.add(Dropout(0.5))

resnet_model.add(Dense(1, activation="sigmoid"))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
neg = 20047
pos = 3287
total = pos+neg

w0 = (1 / neg) * (total / 2.0)
w1 = (1 / pos) * (total / 2.0)

print('Peso para a classe 0: {:.2f}'.format(w0))
print('Peso para a classe 1: {:.2f}'.format(w1))

Peso para a classe 0: 0.58
Peso para a classe 1: 3.55


In [None]:
resnet_model.compile(loss='binary_crossentropy', optimizer=optimizers.Adam(learning_rate=0.00001), metrics=[keras.metrics.AUC(), 'accuracy'])

In [None]:
run_hist = resnet_model.fit(normalized_data, validation_data=normalized_val_data, epochs=20, class_weight={0:w0, 1:w1}, 
  callbacks=[keras.callbacks.EarlyStopping(
                  monitor="val_auc",
                  min_delta=0,
                  patience=5,
                  verbose=1,
                  mode="auto",
                  baseline=None,
                  restore_best_weights=True,
              )])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 13: early stopping


In [None]:
results = resnet_model.evaluate(normalized_test_data)
print(results)

[0.5784299969673157, 0.8448062539100647, 0.8555507659912109]
