In [1]:
gpu_info = !nvidia-smi
gpu_info = "\n".join(gpu_info)
if gpu_info.find("failed") >= 0:
    print("Not connected to a GPU")
else:
    print(gpu_info)

Fri Oct  6 13:15:56 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    54W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from psutil import virtual_memory

ram_gb = virtual_memory().total / 1e9
print("Your runtime has {:.1f} gigabytes of available RAM\n".format(ram_gb))

if ram_gb < 20:
    print("Not using a high-RAM runtime")
else:
    print("You are using a high-RAM runtime!")

Your runtime has 89.6 gigabytes of available RAM

You are using a high-RAM runtime!


In [3]:
!pip install wandb -qU
!pip install -U tensorflow
!pip install python-dotenv



In [4]:
from google.colab import drive

# Mount the Google Drive to access the files
drive.mount("/content/gdrive/")
work_directory = "/content/gdrive/MyDrive/wsi_code"

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [5]:
import os
import sys
from datetime import datetime
import numpy as np
import logging

# Add the path to your project root directory
if work_directory not in sys.path:
    sys.path.append(work_directory)

# my utility functions
from utils.general import create_directory
from utils.dataloader import select_case_data

# TensorFlow and Keras imports
import tensorflow as tf
from keras.layers import (
    Dense,
    GlobalAveragePooling2D,
)

from keras.optimizers import Adam
from keras.callbacks import (
    ReduceLROnPlateau,
    TensorBoard,
)
import wandb

# load env variables
from dotenv import load_dotenv

load_dotenv(os.path.join(work_directory, ".env"))

True

In [6]:
# Define data directories
DATASETS_PATH = os.path.join(work_directory, "datasets")
PROCESSED_PATH = os.path.join(DATASETS_PATH, "processed")
hdf5_file = os.path.join(PROCESSED_PATH, "patchs_384_40k.hdf5")
run_dir = os.path.join(work_directory, "runs", "40k")

# Create directories with datetime
model_dir = os.path.join(run_dir, "inception")

# Create the directories
create_directory(model_dir)

# Get the current datetime
current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

log_path = os.path.join(model_dir, f"{current_datetime}.log")

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    filename=log_path,  # Specify the file name and path
    filemode="w",  # 'w' for write mode, use 'a' to append to an existing file
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)

Directory already exists: /content/gdrive/MyDrive/wsi_code/runs/40k/inception


In [7]:
# Load and preprocess the data
logger.info("Loading and preprocessing data...")
validation_images, validation_labels, train_images, train_labels = select_case_data(
    hdf5_file, selected_cases=[4]
)

# Define a normalization layer
normalization_layer = tf.keras.layers.Rescaling(1.0 / 255)

# One-hot encode the labels
num_classes = np.unique(train_labels).shape[
    0
]  # Replace with the actual number of classes
train_labels = tf.keras.utils.to_categorical(train_labels, num_classes)
validation_labels = tf.keras.utils.to_categorical(validation_labels, num_classes)


def preprocess_data(images, labels):
    images = normalization_layer(images)
    return images, labels


def create_and_preprocess_dataset(
    images, labels, batch_size, augment=False, shuffle_buffer_size=1000
):
    # Create a dataset from the input images and labels
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))

    # Shuffle the dataset for randomness
    dataset = dataset.shuffle(shuffle_buffer_size)

    if augment:
        # Apply data augmentation within the dataset pipeline
        dataset = dataset.map(lambda x, y: (tf.image.random_flip_left_right(x), y))
        dataset = dataset.map(lambda x, y: (tf.image.random_flip_up_down(x), y))
        dataset = dataset.map(
            lambda x, y: (tf.image.random_brightness(x, max_delta=0.05), y)
        )
        dataset = dataset.map(
            lambda x, y: (tf.image.random_contrast(x, lower=0.9, upper=1.1), y)
        )

    # Normalize the images
    dataset = dataset.map(preprocess_data)

    # Batch the dataset
    dataset = dataset.batch(batch_size)

    # Prefetch for efficient loading during training
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

    return dataset


# Create TensorFlow datasets and apply normalization
logger.info("Creating datasets and applying normalization...")

batch_size = 16  # You can adjust this based on your available memory
num_parallel_calls = tf.data.AUTOTUNE

normalized_validation_ds = create_and_preprocess_dataset(
    validation_images, validation_labels, batch_size=batch_size
)
normalized_train_ds = create_and_preprocess_dataset(
    train_images, train_labels, augment=False, batch_size=batch_size
)

# Delete unused variables to free up memory
del validation_images, validation_labels, train_images, train_labels

logger.info("Data loading and preprocessing complete.")

In [8]:
# Create InceptionV3 base model
inception = tf.keras.applications.InceptionV3(
    input_shape=(384, 384, 3), weights="imagenet", include_top=False
)

# Freeze layers in the base model
for layer in inception.layers:
    layer.trainable = False

x = GlobalAveragePooling2D()(inception.output)
x = Dense(256, activation="relu")(x)
outputs = Dense(num_classes, activation="softmax")(x)

model = tf.keras.Model(inception.inputs, outputs, name="InceptionV3")

model.compile(
    optimizer=Adam(learning_rate=5e-5),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[
        tf.keras.metrics.CategoricalAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
    ],
)

# Display model summary
# model.summary()

In [9]:
# Define early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="loss", patience=10, restore_best_weights=True
)

# Define ReduceLROnPlateau callback
reduce_lr_on_plateau = ReduceLROnPlateau(
    monitor="loss", factor=0.5, patience=2, min_lr=1e-7
)

# Define TensorBoard callback
tensorboard_callback = TensorBoard(
    log_dir=model_dir,
    histogram_freq=1,  # Enable histogram computation
    write_graph=True,  # Write model graph to file
    write_images=True,  # Write model weights to file
    update_freq="epoch",
)

In [10]:
wandb_api_key = os.getenv("WANDB_API_KEY")

if wandb_api_key:
    wandb.login(key=wandb_api_key)
else:
    print("WANDB_API_KEY not found in the .env file.")

# Before wandb.init, call wandb.tensorboard.patch
wandb.tensorboard.patch(
    root_logdir=model_dir
)  # Replace model_dir with your log directory
wandb.init(
    project="wsi-classification-40k",
    sync_tensorboard=True,
    entity="hacettepe-cerrahpasa-sts",
    notes="inception_cross_4_final",
    tags=["inception", "v3", "cross_4", "final"],
)
# Initialize wandb callback
wandb_callback = wandb.keras.WandbCallback()

[34m[1mwandb[0m: Currently logged in as: [33maemreusta[0m ([33mhacettepe-cerrahpasa-sts[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc




In [11]:
# Train the model with callbacks
history = model.fit(
    normalized_train_ds,
    validation_data=normalized_validation_ds,
    epochs=100,
    workers=-1,
    use_multiprocessing=True,
    callbacks=[
        wandb_callback,
        early_stopping,
        reduce_lr_on_plateau,
        tensorboard_callback,
    ],
)

Epoch 1/100
   5/2000 [..............................] - ETA: 55s - loss: 0.7207 - categorical_accuracy: 1.0000 - precision: 1.0000 - recall: 0.4500





  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 2/100

  saving_api.save_model(
[34m[1mwandb[0m: Network error resolved after 0:00:11.534469, resuming normal operation.
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 3/100
Epoch 4/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 5/100
Epoch 6/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 7/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 8/100
Epoch 9/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 14/100
Epoch 15/100
Epoch 16/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 17/100
Epoch 18/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 19/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 20/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 21/100
Epoch 22/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 23/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 24/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s


Epoch 25/100

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231006_132116-se3c1v0a/files/model-best)... Done. 0.3s




In [12]:
wandb.finish()

VBox(children=(Label(value='1635.898 MB of 1635.898 MB uploaded (6.453 MB deduped)\r'), FloatProgress(value=1.…

0,1
categorical_accuracy,▃▅▅▆▆▇▇▇▇▇█████████▇█▆▆▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
global_step,▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,▆▇▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▂▂▃▃▇█
precision,▃▄▅▆▆▇▇▇▇▇▇████████▇█▆▆▁▁
recall,▄▅▆▆▇▇▇▇▇▇█████████▇█▆▆▁▁
train/epoch_categorical_accuracy,▃▅▅▆▆▇▇▇▇▇█████████▇█▆▆▁▁
train/epoch_loss,▆▇▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▂▂▃▃▇█
train/epoch_lr,█████████████████▄▄▂▂▁▁▁▁
train/epoch_precision,▃▄▅▆▆▇▇▇▇▇▇████████▇█▆▆▁▁

0,1
best_epoch,24.0
best_val_loss,1.96068
categorical_accuracy,0.96394
epoch,24.0
global_step,50000.0
loss,0.11247
precision,0.96879
recall,0.96028
train/epoch_categorical_accuracy,0.96394
train/epoch_loss,0.11247


In [13]:
# runtime.unassign()