## Medical

In [1]:
from math import *
import pandas as pd
import numpy as np
import tensorflow as tf
import warnings
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from matplotlib import gridspec
from scipy import stats
import cv2
import keras
from keras.models import Sequential, model_from_yaml, Model
from keras.layers import Dense, Dropout, BatchNormalization, Activation, Convolution2D, Flatten, MaxPooling2D,Input
from keras.optimizers import Adam, RMSprop
from keras import backend as K
from tensorflow.python.client import device_lib
from tensorflow.python.ops import array_ops
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, TensorBoard
from keras.applications.densenet import DenseNet169
import albumentations
import seaborn as sns

sns.set_style("whitegrid")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)
warnings.filterwarnings('ignore')

print(device_lib.list_local_devices())
config = tf.ConfigProto(device_count={"CPU": 1, "GPU" : 1})
session = tf.Session(config=config)
K.set_session(session)

Using TensorFlow backend.


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 17562200555054932196
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 4864552140
locality {
  bus_id: 1
  links {
  }
}
incarnation: 5968941611066319613
physical_device_desc: "device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:08:00.0, compute capability: 6.1"
]


In [2]:
def preprocess_np(image):
    return (image.astype(np.uint8))


In [3]:
files = pd.read_csv(r"c:/users/ajaln/train_labels.csv", dtype={"label": np.str, "id": np.str})
patches = pd.read_csv(r"c:/users/ajaln/patch_id_wsi.csv")
wsi = patches["wsi"].unique()
np.random.shuffle(wsi)
wsi_split = pd.DataFrame(data=wsi, columns=["wsi"])
fold_size = int((wsi_split.shape[0]/20))

for i in range(0, 20):
    wsi_split.loc[i*fold_size:(i+1)*fold_size, "fold"] = i

patches = pd.merge(patches, wsi_split, on="wsi")
files = pd.merge(files, patches, on = "id")
files["filename"] = files["id"]+".tif"
files = shuffle(files)

In [4]:
files.head()

Unnamed: 0,id,label,wsi,fold,filename
0,f38a6374c348f90b587e046aac6079959adf3835,0,camelyon16_train_normal_033,3.0,f38a6374c348f90b587e046aac6079959adf3835.tif
1,c18f2d887b7ae4f6742ee445113fa1aef383ed77,1,camelyon16_train_tumor_054,0.0,c18f2d887b7ae4f6742ee445113fa1aef383ed77.tif
2,755db6279dae599ebb4d39a9123cce439965282d,0,camelyon16_train_tumor_008,2.0,755db6279dae599ebb4d39a9123cce439965282d.tif
3,bc3f0c64fb968ff4a8bd33af6971ecae77c75e08,0,camelyon16_train_tumor_077,3.0,bc3f0c64fb968ff4a8bd33af6971ecae77c75e08.tif
4,acfe80838488fae3c89bd21ade75be5c34e66be7,0,camelyon16_train_tumor_036,5.0,acfe80838488fae3c89bd21ade75be5c34e66be7.tif


In [5]:

datagen_np = ImageDataGenerator(preprocessing_function=preprocess_np)


In [7]:
for i in range(0,20):
    np_generator = datagen_np.flow_from_dataframe(dataframe=files[files["fold"]==i],
                                                    directory=r"c:/users/ajaln/trn/",
                                                    x_col='filename',
                                                    y_col='label',
                                                    has_ext=True,
                                                    target_size = (96, 96),
                                                    batch_size = 3200,
                                                    color_mode="rgb", 
                                                    class_mode = "binary")
    indep = np.empty([0,96,96,3])
    dep = np.empty([0])

    for j, d in np_generator:
        indep = np.append(indep, j, axis=0)
        dep = np.append(dep, d, axis=0)
        print (indep.shape)
        if (indep.shape[0]>np_generator.n):
            indep = indep[0:np_generator.n]
            dep = dep[0:np_generator.n]
            break
    np.save("indep_{0}.npy".format(i), indep)
    np.save("dep_{0}.npy".format(i), dep)
        

Found 7926 images belonging to 2 classes.
(3200, 96, 96, 3)
(6400, 96, 96, 3)
(7926, 96, 96, 3)
(11126, 96, 96, 3)
Found 9990 images belonging to 2 classes.
(3200, 96, 96, 3)
(6400, 96, 96, 3)
(9600, 96, 96, 3)
(9990, 96, 96, 3)
(13190, 96, 96, 3)
Found 8746 images belonging to 2 classes.
(3200, 96, 96, 3)
(6400, 96, 96, 3)
(8746, 96, 96, 3)
(11946, 96, 96, 3)
Found 7146 images belonging to 2 classes.
(3200, 96, 96, 3)
(6400, 96, 96, 3)
(7146, 96, 96, 3)
(10346, 96, 96, 3)
Found 10084 images belonging to 2 classes.
(3200, 96, 96, 3)
(6400, 96, 96, 3)
(9600, 96, 96, 3)
(10084, 96, 96, 3)
(13284, 96, 96, 3)
Found 8294 images belonging to 2 classes.
(3200, 96, 96, 3)
(6400, 96, 96, 3)
(8294, 96, 96, 3)
(11494, 96, 96, 3)
Found 9250 images belonging to 2 classes.
(3200, 96, 96, 3)
(6400, 96, 96, 3)
(9250, 96, 96, 3)
(12450, 96, 96, 3)
Found 6961 images belonging to 2 classes.
(3200, 96, 96, 3)
(6400, 96, 96, 3)
(6961, 96, 96, 3)
(10161, 96, 96, 3)
Found 7468 images belonging to 2 classes.


In [None]:
np_test_generator = datagen_np.flow_from_directory(directory=r"c:/users/ajaln/test/",
                                                    target_size=(96, 96),
                                                    color_mode="rgb", batch_size=3200,
                                                    class_mode="binary", shuffle=False)

res = np.empty([0,96,96,3])
samples = pd.read_csv(r"c:/users/ajaln/sample_submission.csv", usecols=["id"])
n = samples.shape[0]
print(n)
for i, d in np_test_generator:
    res = np.append(res, i, axis=0)
    print (res.shape)
    if (res.shape[0]>n):
        break
res = res[:n]
np.save("res.npy", res)
print (res.shape)