## Medical

In [1]:
from math import *
import pandas as pd
import numpy as np
import tensorflow as tf
import warnings
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from matplotlib import gridspec
from scipy import stats
import cv2
import keras
from keras.models import Sequential, model_from_yaml, Model
from keras.layers import Dense, Dropout, BatchNormalization, Activation, Convolution2D, Flatten, MaxPooling2D,Input
from keras.optimizers import Adam, RMSprop
from keras import backend as K
from tensorflow.python.client import device_lib
from tensorflow.python.ops import array_ops
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, TensorBoard
from keras.applications.densenet import DenseNet169
import albumentations
import seaborn as sns

sns.set_style("whitegrid")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)
warnings.filterwarnings('ignore')

print(device_lib.list_local_devices())
config = tf.ConfigProto(device_count={"CPU": 1, "GPU" : 1})
session = tf.Session(config=config)
K.set_session(session)

Using TensorFlow backend.


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 12341127001846435675
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 4793146554
locality {
  bus_id: 1
  links {
  }
}
incarnation: 11796126582323103706
physical_device_desc: "device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:08:00.0, compute capability: 6.1"
]


In [2]:
def preprocess_np(image):
    return (image.astype(np.uint8))


In [3]:
files = pd.read_csv(r"c:/users/ajaln/train_labels.csv", dtype={"label": np.str, "id": np.str})
patches = pd.read_csv(r"c:/users/ajaln/patch_id_wsi.csv")
wsi = patches["wsi"].unique()
np.random.shuffle(wsi)
wsi_split = pd.DataFrame(data=wsi, columns=["wsi"])
wsi_split["is_val"] = False

val_length = wsi.shape[0] * 0.33
wsi_split.loc[0:val_length, "is_val"] = True

patches = pd.merge(patches, wsi_split, on="wsi")
files = pd.merge(files, patches, on = "id")
files["filename"] = files["id"]+".tif"
files = shuffle(files)

In [4]:
files.head()

Unnamed: 0,id,label,wsi,is_val,filename
177428,aa0b9af4e71c4dc83530c282ff6c72edf3868284,0,camelyon16_train_normal_120,True,aa0b9af4e71c4dc83530c282ff6c72edf3868284.tif
173752,f559ea6c31b740b59b638aa8e7d7c1c377948b97,1,camelyon16_train_tumor_061,True,f559ea6c31b740b59b638aa8e7d7c1c377948b97.tif
81623,c33134b3923cb067f9024a7acdf3115bec872a10,1,camelyon16_train_tumor_088,False,c33134b3923cb067f9024a7acdf3115bec872a10.tif
30453,4a21c92a5a5637be52a6f1ecd9902a09fbc00e1e,0,camelyon16_train_tumor_008,False,4a21c92a5a5637be52a6f1ecd9902a09fbc00e1e.tif
127766,2ab23c9e6dd2343293f2c7195deace63e7c20ec9,1,camelyon16_train_tumor_016,False,2ab23c9e6dd2343293f2c7195deace63e7c20ec9.tif


In [5]:

datagen_np = ImageDataGenerator(preprocessing_function=preprocess_np)


In [6]:
np_generator = datagen_np.flow_from_dataframe(dataframe=files[files["is_val"]==False],
                                                    directory=r"c:/users/ajaln/trn/",
                                                    x_col='filename',
                                                    y_col='label',
                                                    has_ext=True,
                                                    target_size = (96, 96),
                                                    batch_size = 3200,
                                                    color_mode="rgb", 
                                                    class_mode = "binary")

np_val_generator = datagen_np.flow_from_dataframe(dataframe=files[files["is_val"]==True],
                                                    directory=r"c:/users/ajaln/trn/",
                                                    x_col='filename',
                                                    y_col='label',
                                                    has_ext=True,
                                                    target_size = (96, 96),
                                                    batch_size = 3200,
                                                    color_mode="rgb", 
                                                    class_mode = "binary")

np_test_generator = datagen_np.flow_from_directory(directory=r"c:/users/ajaln/test/",
                                                    target_size=(96, 96),
                                                    color_mode="rgb", batch_size=3200,
                                                    class_mode="binary", shuffle=False)

Found 124576 images belonging to 2 classes.
Found 68176 images belonging to 2 classes.
Found 57458 images belonging to 1 classes.


In [8]:
indep = np.empty([0,96,96,3])
dep = np.empty([0])

for i, d in np_generator:
    indep = np.append(indep, i, axis=0)
    dep = np.append(dep, d, axis=0)
    print (indep.shape)
    if (indep.shape[0]>96000):
        break

(3200, 96, 96, 3)
(6400, 96, 96, 3)
(9600, 96, 96, 3)
(12800, 96, 96, 3)
(16000, 96, 96, 3)
(19200, 96, 96, 3)
(22400, 96, 96, 3)
(25600, 96, 96, 3)
(28800, 96, 96, 3)
(32000, 96, 96, 3)
(35200, 96, 96, 3)
(38400, 96, 96, 3)
(41600, 96, 96, 3)
(44800, 96, 96, 3)
(48000, 96, 96, 3)
(51200, 96, 96, 3)
(54400, 96, 96, 3)
(57600, 96, 96, 3)
(60800, 96, 96, 3)
(64000, 96, 96, 3)
(67200, 96, 96, 3)
(70400, 96, 96, 3)
(73600, 96, 96, 3)
(76800, 96, 96, 3)
(80000, 96, 96, 3)
(83200, 96, 96, 3)
(86400, 96, 96, 3)
(89600, 96, 96, 3)
(92800, 96, 96, 3)
(96000, 96, 96, 3)
(99200, 96, 96, 3)


In [9]:
indep = indep[:96000,:,:,:]        
dep = dep[:96000]        
np.save("indep.npy", indep)
np.save("dep.npy", dep)

In [10]:
indep_val = np.empty([0,96,96,3])
dep_val = np.empty([0])

for i, d in np_val_generator:
    indep_val = np.append(indep_val, i, axis=0)
    dep_val = np.append(dep_val, d, axis=0)
    print (indep_val.shape)
    if (indep_val.shape[0]>36000):
        break

(3200, 96, 96, 3)
(6400, 96, 96, 3)
(9600, 96, 96, 3)
(12800, 96, 96, 3)
(16000, 96, 96, 3)
(19200, 96, 96, 3)
(22400, 96, 96, 3)
(25600, 96, 96, 3)
(28800, 96, 96, 3)
(32000, 96, 96, 3)
(35200, 96, 96, 3)
(38400, 96, 96, 3)


In [11]:
indep_val = indep_val[:36000,:,:,:]        
dep_val = dep_val[:36000]        
np.save("indep_val.npy", indep_val)
np.save("dep_val.npy", dep_val)

In [None]:
res = np.empty([0,96,96,3])
samples = pd.read_csv(r"c:/users/ajaln/sample_submission.csv", usecols=["id"])
n = samples.shape[0]
print(n)
for i, d in np_test_generator:
    res = np.append(res, i, axis=0)
    print (res.shape)
    if (res.shape[0]>n):
        break
res = res[:n]
np.save("res.npy", res)
print (res.shape)