In [None]:
import os
import cv2
import pydicom
import pandas as pd
import numpy as np 
import tensorflow as tf
from tensorflow.keras.layers import (
    Dense, Input, BatchNormalization, Add, Conv2D, GlobalAvgPool2D, Concatenate, ReLU, MaxPool2D
)
from tensorflow.keras import Model
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import train_test_split
import random

In [None]:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)

In [None]:
data = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv') 
data.head()

In [None]:
data['Ex-smoker'] = data['SmokingStatus'].map(lambda x: 1 if x=="Ex-smoker" else 0)
data['Never smoked'] = data['SmokingStatus'].map(lambda x: 1 if x=="Never smoked" else 0)
data['Currently smokes'] = data['SmokingStatus'].map(lambda x: 1 if x=="Currently smokes" else 0)
del data['SmokingStatus']
del data['Percent']

data['Sex'] = data['Sex'].map(lambda x: 1 if x=="Male" else 0)


data.head()

In [None]:
data.std()

In [None]:
data.mean()

In [None]:
data['Weeks'] = data['Weeks'].map(lambda x: (x-32)/23)
data["Age"] = data["Age"].map(lambda x: (x-67)/7)
data.head()

In [None]:
def get_img(path):
    d = pydicom.dcmread(path)
    return cv2.resize(d.pixel_array / 2**11, (128, 128))

In [None]:
def build_model(input_shape=(128, 128, 1)):
    def conv_bn_rl(x, f, k=1, s=1, p='same'):
        x = Conv2D(f, k, strides=s, padding=p)(x)
        x = BatchNormalization()(x)
        x = ReLU()(x)
        return x
    
    def identity_block(tensor, f):
        x = conv_bn_rl(tensor, f)
        x = conv_bn_rl(x, f, 3)
        x = Conv2D(4*f, 1)(x)
        x = BatchNormalization()(x)

        x = Add()([x, tensor])
        output = ReLU()(x)
        return output
    
    def conv_block(tensor, f, s):
        x = conv_bn_rl(tensor, f)
        x = conv_bn_rl(x, f, 3, s)
        x = Conv2D(4*f, 1)(x)
        x = BatchNormalization()(x)
    
        shortcut = Conv2D(4*f, 1, strides=s)(tensor)
        shortcut = BatchNormalization()(shortcut)

        x = Add()([x, shortcut])
        output = ReLU()(x)
        return output
  
  
    def resnet_block(x, f, r, s=2):
        x = conv_block(x, f, s)
        for _ in range(r-1):
            x = identity_block(x, f)
        return x
    input = Input(input_shape)
  
    x = conv_bn_rl(input, 64, 7, 2)
    x = MaxPool2D(3, strides=2, padding='same')(x)

    x = resnet_block(x, 64, 3, 1)
    x = resnet_block(x, 128, 4)
    x = resnet_block(x, 256, 6)
    x = resnet_block(x, 512, 3)

    x = GlobalAvgPool2D()(x)
    
    inp2 = Input(shape=(6,))
    x2 = Dense(6)(inp2)
    x2 = ReLU()(x2)
    
    x = Concatenate()([x, x2])
    x = Dense(8)(x)
    x = ReLU()(x)
    output = Dense(1)(x)

    model = Model([input, inp2], output)
    return model
model = build_model()
tf.keras.utils.plot_model(
    model
)

In [None]:
patient = list(data["Patient"].unique())
patient.remove('ID00011637202177653955184')
patient.remove('ID00052637202186188008618')
train, test= train_test_split(patient, test_size=0.1, random_state=31)

In [None]:
tr = data[data["Patient"].isin(train)]
te = data[data["Patient"].isin(test)]

tr = tr.sample(frac = 1)

tr.reset_index(inplace=True)
te.reset_index(inplace=True)
tr

In [None]:
tr_x = tr[['Patient', 'Weeks','Age', 'Sex', 'Ex-smoker','Never smoked', 'Currently smokes']].values.tolist()
tr_y = tr[['Patient', 'Weeks', 'FVC']].values.tolist()
te_x = te[['Patient', 'Weeks','Age', 'Sex', 'Ex-smoker','Never smoked', 'Currently smokes']].values.tolist()
te_y = te[['Patient', 'Weeks', 'FVC']].values.tolist()

In [None]:
class IGenerator(Sequence):
    def __init__(self, x, y):
        self.seed=1
        self.x=x
        self.y=y
        self.step=0
        self.train_data = {}
        for i in x:
            self.train_data[i[0]] = os.listdir(f'../input/osic-pulmonary-fibrosis-progression/train/{i[0]}/')
    
    def __len__(self):
        return (len(self.x)//8)
            
    def __getitem__(self, idx):
        b_csv=[]
        b_p=[]
        b_fvc=[]
        for j in range(self.step, self.step+8):
            b_csv.append(self.x[j][1:])
            b_p.append(self.x[j][0])
            b_fvc.append(self.y[j][2])
        imgs = []
        if self.step>=(len(self.x))-15:
            self.step=0
        else:
            self.step+=8
        for k in b_p:
            random.seed(a=self.seed)
            self.seed+=1
            i = random.choice(self.train_data[k])
            img = get_img(f'../input/osic-pulmonary-fibrosis-progression/train/{k}/{i}')
            imgs.append(img)
        
       
        imgs,b_fvc,b_csv = np.array(imgs), np.array(b_fvc), np.array(b_csv)
        imgs = np.expand_dims(imgs, axis=-1)
        return [imgs, b_csv] , b_fvc

In [None]:
epochs_ = 50


cpt = tf.keras.callbacks.ModelCheckpoint(
    filepath=f'effnet_{epochs_}.h5',
    monitor='val_loss', 
    verbose=1, 
    save_best_only=True,
    mode='auto'
)

rlp = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.5,
    patience=5, 
    verbose=1, 
    min_lr=10**(-8)
)
model = build_model()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss="mae") 
model.fit_generator(IGenerator(x=tr_x, y=tr_y), 
                    steps_per_epoch = (len(tr_x)//8),
                    validation_data=IGenerator(x=te_x, y=te_y),
                    validation_steps = (len(te_x)//8), 
                    callbacks = [cpt, rlp], 
                    epochs=epochs_)

In [None]:
data={}
epochs_ = 50
model = build_model()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss="mse") 
model.load_weights(f'effnet_{epochs_}.h5')
for i in te_x:
    data[i[0]] = os.listdir(f'../input/osic-pulmonary-fibrosis-progression/train/{i[0]}/')
b_csv=[]
b_p=[]
b_fvc=[]
for j in range(len(te_x)):
    b_csv.append(te_x[j][1:])
    b_p.append(te_x[j][0])
    b_fvc.append(te_y[j][2])
imgs = [[],[],[],[],[]]
for k in b_p:
    i = data[k]
    for a in range(1,6):
        img = get_img(f'../input/osic-pulmonary-fibrosis-progression/train/{k}/{i[a*(len(data[k])-1)//5]}')
        imgs[a-1].append(img)
    
for a in range(5):
    imgIn,b_fvc,b_csv = np.array(imgs[a]), np.array(b_fvc), np.array(b_csv)
    imgIn = np.expand_dims(imgIn, axis=-1)
    if a==0:
        yCap = model.predict([imgIn, b_csv])
    else:
        yCap += model.predict([imgIn, b_csv])
yCap /= 5
print(yCap)
print(b_fvc)

In [None]:
te["FVC predicted"] = yCap
te