In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
class DataGenerator(Sequence):

  def __init__(self, csv_file, base_dir, output_size, shuffle=False, batch_size=10):
    """
    Initializes a data generator object
      :param csv_file: file in which image names and numeric labels are stored
      :param base_dir: the directory in which all images are stored
      :param output_size: image output size after preprocessing
      :param shuffle: shuffle the data after each epoch
      :param batch_size: The size of each batch returned by __getitem__
    """
    self.df = pd.read_csv(csv_file)
    self.base_dir = base_dir
    self.output_size = output_size
    self.shuffle = shuffle
    self.batch_size = batch_size
    self.on_epoch_end()

  def on_epoch_end(self):
    self.indices = np.arange(len(self.df))
    if self.shuffle:
      np.random.shuffle(self.indices)

  def __len__(self):
    return int(len(self.df) / self.batch_size)

  def __getitem__(self, idx):
    ## Initializing Batch
    #  that one in the shape is just for a one channel images
    # if you want to use colored images you might want to set that to 3
    X = np.empty((self.batch_size, *self.output_size, 1))
    # (x, y, h, w)
    y = np.empty((self.batch_size, 4, 1))

    # get the indices of the requested batch
    indices = self.indices[idx*self.batch_size:(idx+1)*self.batch_size]

    for i, data_index in enumerate(indices):
      img_path = os.path.join(self.base_dir,
                  self.df.iloc[data_index, 0])

      img = mpimg.imread()
      img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # to reduce it to one channel to match the shape
      ## this is where you preprocess the image
      ## make sure to resize it to be self.output_size

      label = self.df.iloc[data_index, 1:].to_numpy()
      ## if you have any preprocessing for
      ## the labels too do it here

      X[i,] = img
      y[i] = label

    return X, y


## Defining and training the model

model = Sequential([
  ## define the model's architecture
])

train_gen = DataGenerator("data.csv", "data", (244, 244), batch_size=20, shuffle=True)

## compile the model first of course

# now let's train the model
model.fit(train_gen, epochs=5, ...)

In [None]:
#Define Features and Label
#features = ['StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID', 'pe_present_on_image', 'negative_exam_for_pe',
           # 'qa_motion', 'flow_artifact', 'rv_lv_ratio_gte_1', 'leftsided_pe', 'chronic_pe', 'true_filling_defect_not_pe', 'rightsided_pe', 
           # 'acute_and_chronic_pe', 'central_pe', 'indeterminate'] 

#x=train[features].values
#y=train[features].values

#Considering y variable holds numpy array
#y_tensor = tf.convert_to_tensor(y, dtype=tf.int64) 


#Train Test Split
#x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state = 2)

In [None]:
import os
import gc
import vtk
import cv2
import time
import pydicom
import numpy as np 
import pandas as pd 
import scipy.ndimage
import seaborn as sns


from glob import glob
from skimage import measure
from tensorflow import keras
from plotly import __version__
from plotly.graph_objs import*
from skimage import morphology
from vtk.util import numpy_support
from sklearn.cluster import KMeans
from skimage.transform import resize
from IPython.display import clear_output
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from plotly.tools import FigureFactory as FF
from tensorflow.keras.models import load_model
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from tensorflow.keras.callbacks import ModelCheckpoint as MC
from tensorflow.keras.layers import Input, Dense, Dropout, Conv2D
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from sklearn.model_selection import train_test_split

In [None]:
from tensorflow import keras
#import tensor as tf
import warnings
warnings.filterwarnings("ignore")
from tensorflow.keras.models import Sequential, Model,load_model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D,MaxPool2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.preprocessing import image
from tensorflow.keras.initializers import glorot_uniform
import numpy as np
np.random.seed(1000)

 
model=Sequential()
input_shape=(224, 224, 3)
model.add(Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), input_shape=input_shape, padding='same')) #1st convo
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(3,3), strides=(2,2), padding='same'))
model.add(BatchNormalization())

model.add(Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), padding='same')) #2nd convo
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(3,3), strides=(2,2), padding='same'))
model.add(BatchNormalization())


model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same')) #3rd convo
model.add(Activation('relu'))
model.add(BatchNormalization())


model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same')) #4th convo
model.add(Activation('relu'))
model.add(BatchNormalization())


model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same')) #5th convo
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(3,3), strides=(2,2), padding='same'))
model.add(BatchNormalization())


model.add(Flatten())
model.add(Dense(4096, input_shape=(224*224*3,)))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(BatchNormalization())


model.add(Dense(4096))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(BatchNormalization())


model.add(Dense(17))
model.add(Activation('softmax'))

model.summary()







In [None]:
# let's visualize layer names and layer indices to see how many layers
# we should freeze:
from tensorflow.keras import layers
for i, layer in enumerate(model.layers):
   print(i, layer.name)

In [None]:
# we chose to train the top 2 conv blocks, i.e. we will freeze
# the first 8 layers and unfreeze the rest:
print("Freezed layers:")
for i, layer in enumerate(model.layers[:20]):
    print(i, layer.name)
    layer.trainable = False

In [None]:
#trainable parameters decrease after freezing some bottom layers   
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# image preprocessing
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

batch_size = 128
#base_dir = "../input/rsna-str-pulmonary-embolism-detection"
root_path ='../input/rsna-str-pulmonary-embolism-detection'

x = train_datagen.flow_from_directory(root_path+'/train',
                                                 target_size=(224, 224),
                                                 batch_size=batch_size,
                                                 class_mode='categorical')

y = test_datagen.flow_from_directory(root_path+'/test',
                                            target_size=(224, 224),
                                            batch_size=batch_size,
                                            class_mode='categorical')

In [None]:
class_dict = training_set.class_indices
print(class_dict)

In [None]:
li = list(class_dict.keys())
print(li)

In [None]:
train_num = x.samples
test_num = y.samples

In [None]:
# Splitting the dataset for training and testing.
def is_test(x, _):
    return x % 4 == 0


def is_train(x, y):
    return not is_test(x, y)


recover = lambda x, y: y



In [None]:
from tensorflow.keras.utils import Sequence 
from skimage.io import imread
from skimage.transform import resize
import numpy as np
import math
# Here, `x_set` is list of path to the images
# and `y_set` are the associated classes.
class  rsna_str_pulmonary_embolism_detectionSequence(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size
    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)
    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) *
        self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) *
        self.batch_size]
        return np.array([
            resize(imread( rsna_str_pulmonary_embolism_detection), (200, 200))for rsna_str_pulmonary_embolism_detection in batch_x]), np.array(batch_y)

x =  rsna_str_pulmonary_embolism_detectionSequence((),(),100)

from tensorflow.python.keras.engine import data_adapter
adapter = data_adapter.KerasSequenceAdapter(x,y=None)
print(adapter)
model.fit(x, y, batch_size=10, epochs=5, verbose=1, shuffle=True)

score=model.evaluate(x, y)
print('Test Loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

x_train=np.asarray(x_train).astype(np.int)
y_train=np.asarray(y_train).astype(np.int)

model.fit(x_train, y_train, batch_size=10, epochs=5, verbose=1, validation_split=0.2, shuffle=True)


score=model.evaluate(x_train, y_train)
print('Test Loss:', score[0])
print('Test accuracy:', score[1]
      model.save('pe_detection_model.h0')