In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import pandas as pd
import tensorflow as tf
import scanpy as sc

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

import tensorflow_hub as hub

In [2]:
!pip install tensorflow_hub



In [2]:
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 3956013210888104972,
 name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 6219080011094119183
 physical_device_desc: "device: XLA_CPU device",
 name: "/device:XLA_GPU:0"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 12731547205585100115
 physical_device_desc: "device: XLA_GPU device",
 name: "/device:XLA_GPU:1"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 11890204870399654755
 physical_device_desc: "device: XLA_GPU device",
 name: "/device:XLA_GPU:2"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 2928258574088036564
 physical_device_desc: "device: XLA_GPU device",
 name: "/device:XLA_GPU:3"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 17231559295860033636
 physical_device_desc: "device: XLA_GPU device"]

In [3]:
# imgs = np.load('/data/breast/model_inputs/he_resized.npy')
X = np.load('/data/breast/model_inputs/he_resized.npy').astype(np.uint8)
adata = sc.read_h5ad('/data/breast/model_inputs/adata.h5ad')
trans_adata = sc.read_h5ad('/data/breast/model_inputs/sctransform_adata.h5ad')
X.shape, adata.shape, trans_adata.shape

((53217, 224, 224, 3), (53217, 36601), (53217, 3000))

In [4]:
# IMAGE_SHAPE = (224, 224, 3)
# from skimage.transform import resize
# xs = [resize(imgs[i], IMAGE_SHAPE) * 255. for i in range(imgs.shape[0])]
# X = np.asarray(xs)

In [5]:
# np.save('/data/breast/model_inputs/he_resized.npy', X)

In [6]:
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder

In [7]:
from collections import Counter
Counter(adata.obs['predicted_cell_type']).most_common()

[('LumB-Tumor', 16903),
 ('Plasma', 12422),
 ('CAF', 8185),
 ('Basal-Tumor', 5371),
 ('Endothelial', 3592),
 ('Mono-Macro', 2783),
 ('Normal-duct', 2545),
 ('CD4-T', 760),
 ('DC', 337),
 ('B', 165),
 ('LumA-Tumor', 120),
 ('Her2-Tumor', 17),
 ('Mast', 12),
 ('Treg', 4),
 ('CD8-T', 1)]

In [8]:
adata.obs['label'] = adata.obs['predicted_cell_type'].to_list()

# getting rid of connective
mask = [True if x not in ['Mast', 'Treg', 'CD8-T'] else False for x in adata.obs['label']]
adata = adata[mask]
trans_adata = trans_adata[mask]
X = X[mask]

  res = method(*args, **kwargs)


In [9]:
encoder = OneHotEncoder()
labels = encoder.fit_transform(adata.obs['label'].to_numpy().reshape(-1, 1)).astype(np.int64).toarray()
labels[:5], encoder.categories_[0]

(array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
 array(['B', 'Basal-Tumor', 'CAF', 'CD4-T', 'DC', 'Endothelial',
        'Her2-Tumor', 'LumA-Tumor', 'LumB-Tumor', 'Mono-Macro',
        'Normal-duct', 'Plasma'], dtype=object))

In [10]:
idxs = np.arange(X.shape[0])
train_idxs = np.random.choice(idxs, size=int(.7 * X.shape[0]), replace=False)
val_idxs = np.asarray([i for i in idxs if i not in train_idxs])

X_train = X[train_idxs]
X_val = X[val_idxs]
X_train_trans = trans_adata.X[train_idxs].toarray()
X_val_trans = trans_adata.X[val_idxs].toarray()
y_train = labels[train_idxs]
y_val = labels[val_idxs]
X_train.shape, X_val.shape

((37240, 224, 224, 3), (15960, 224, 224, 3))

In [11]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))

BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 100

train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

In [13]:
def augment(image_label, seed):
    image, label = image_label
    # Make a new seed
    new_seed = tf.random.experimental.stateless_split(seed, num=1)[0, :]
    image = tf.image.random_flip_left_right(image)
#     image = tf.keras.preprocessing.image.
    image = tf.keras.preprocessing.image.random_rotation(image, .2)
#     # Random crop back to the original size
#     image = tf.image.stateless_random_crop(
#       image, size=[IMG_SIZE, IMG_SIZE, 3], seed=seed)
#     # Random brightness
#     image = tf.image.stateless_random_brightness(
#       image, max_delta=0.5, seed=new_seed)
#     image = tf.clip_by_value(image, 0, 1)
    return image, label

In [17]:
# Create counter and zip together with train dataset
counter = tf.data.experimental.Counter()
train_dataset = tf.data.Dataset.zip((train_dataset, (counter, counter)))

In [20]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [21]:
train_dataset = (
    train_dataset
#     .shuffle(1000)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)

AttributeError: in converted code:

    <ipython-input-13-5da3612d2fa4>:4 augment  *
        new_seed = tf.random.experimental.stateless_split(seed, num=1)[0, :]

    AttributeError: module 'tensorflow_core._api.v2.random.experimental' has no attribute 'stateless_split'


In [12]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
  tf.keras.layers.experimental.preprocessing.RandomFlip('vertical'),
  tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
])

AttributeError: module 'tensorflow_core.keras.layers.experimental.preprocessing' has no attribute 'RandomFlip'

In [None]:
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

In [None]:
rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1)

In [None]:
for image, _ in train_dataset.take(1):
  plt.figure(figsize=(10, 10))
  first_image = image[0]
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    augmented_image = data_augmentation(tf.expand_dims(first_image, 0))
    plt.imshow(augmented_image[0] / 255)
    plt.axis('off')

In [None]:
# Create the base model from the pre-trained model MobileNet V2
IMG_SHAPE = (224, 224, 3)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

In [None]:
image_batch, label_batch = next(iter(train_dataset))
feature_batch = base_model(image_batch)
print(feature_batch.shape)

In [None]:
base_model.trainable = False
base_model.summary()

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

In [None]:
# out_layer = tf.keras.layers.Dense(len(encoder.categories_[0]))
# prediction_layer = tf.keras.layers.Softmax()
prediction_layer = tf.keras.Sequential()
prediction_layer.add(tf.keras.layers.Dense(len(encoder.categories_[0])))
prediction_layer.add(tf.keras.layers.Softmax())
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

In [None]:
inputs = tf.keras.Input(shape=IMG_SHAPE)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(1280)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)

In [None]:
# sklearn.utils.class_weight.compute_class_weight(class_weight, *, classes, y)
from sklearn.utils.class_weight import compute_class_weight
weights = compute_class_weight('balanced', np.arange(labels.shape[1]), np.argmax(labels, axis=1).flatten())
weights = {i:x for i, x in enumerate(weights)}
weights

In [None]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'],)

In [None]:
model.summary()

In [None]:
initial_epochs = 10

loss0, accuracy0 = model.evaluate(test_dataset)

In [None]:
history = model.fit(train_dataset,
                    epochs=initial_epochs,
                    validation_data=test_dataset,
                    class_weight=weights)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
# plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
# plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()