In [6]:
!pip3 install cloudml-hypertune



In [5]:
import os

In [7]:
%%bash
export PROJECT=$(gcloud config list project --format "value(core.project)")
echo "Your current GCP Project Name is: "${PROJECT}

Your current GCP Project Name is: qwiklabs-gcp-04-014eea1283f0


In [3]:
# TODO: Change these to try this notebook out
PROJECT = "qwiklabs-gcp-04-014eea1283f0"  # Replace with your PROJECT
BUCKET = "chest-xray-us-central"   # defaults to PROJECT
REGION = "us-central1"  # Replace with your REGION

In [8]:
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET
os.environ["REGION"] = REGION
os.environ["TFVERSION"] = "2.1"
os.environ["PYTHONVERSION"] = "3.7"

In [9]:
%%bash
gcloud config set project ${PROJECT}
gcloud config set compute/region ${REGION}

Updated property [core/project].
Updated property [compute/region].


In [32]:
%%bash
if ! gsutil ls | grep -q gs://${BUCKET}; then
    gsutil mb -l ${REGION} gs://${BUCKET}
fi

In [33]:
%%bash
gsutil ls gs://${BUCKET}/chest_xray/

gs://chest-xray-us-central/chest_xray/chest_xray_labels.csv
gs://chest-xray-us-central/chest_xray/test/
gs://chest-xray-us-central/chest_xray/train/
gs://chest-xray-us-central/chest_xray/val/


In [19]:
%%bash
mkdir -p pneumonia/trainer
touch pneumonia/trainer/__init__.py

In [18]:
%%writefile pneumonia/trainer/task.py
import argparse
import json
import os

from trainer import model

import tensorflow as tf

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--job-dir",
        help="this model ignores this field, but it is required by gcloud",
        default="junk"
    )
    parser.add_argument(
        "--train_data_path",
        help="GCS location of training data",
        required=True
    )
    parser.add_argument(
        "--eval_data_path",
        help="GCS location of evaluation data",
        required=True
    )
    parser.add_argument(
        "--output_dir",
        help="GCS location to write checkpoints and export models",
        required=True
    )
    parser.add_argument(
        "--batch_size",
        help="Number of examples to compute gradient over.",
        type=int,
        default=32
    )

    parser.add_argument(
        "--num_epochs",
        help="Number of epochs to train the model.",
        type=int,
        default=10
    )
    parser.add_argument(
        "--train_examples",
        help="""Number of examples (in thousands) to run the training job over.
        If this is more than actual # of examples available, it cycles through
        them. So specifying 1000 here when you have only 100k examples makes
        this 10 epochs.""",
        type=int,
        default=5000
    )
    parser.add_argument(
        "--eval_steps",
        help="""Positive number of steps for which to evaluate model. Default
        to None, which means to evaluate until input_fn raises an end-of-input
        exception""",
        type=int,
        default=None
    )


    # Parse all arguments
    args = parser.parse_args()
    arguments = args.__dict__

    # Unused args provided by service
    arguments.pop("job_dir", None)
    arguments.pop("job-dir", None)

    # Modify some arguments
    arguments["train_examples"] *= 1000

    # Append trial_id to path if we are doing hptuning
    # This code can be removed if you are not using hyperparameter tuning
    arguments["output_dir"] = os.path.join(
        arguments["output_dir"],
        json.loads(
            os.environ.get("TF_CONFIG", "{}")
        ).get("task", {}).get("trial", "")
    )

    # Run the training job
    model.train_and_evaluate(arguments)

Overwriting pneumonia/trainer/task.py


In [20]:
%%writefile pneumonia/trainer/model.py
import os
import shutil
import datetime

import hypertune

import numpy as np
import pandas as pd
import pathlib

#import cv2

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.layers import Dense, Flatten, Softmax

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D,GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

import warnings
warnings.filterwarnings('ignore')

import logging
logger = tf.get_logger()
logger.setLevel(logging.ERROR)


def load_data(train_path, val_path, batch_size):
    
    CLASS_LABELS = ['NORMAL', 'PNEUMONIA'] 

    def process_path(nb_class):
    
        def f(file_path):
            
            label = 0 if tf.strings.split(file_path, os.path.sep)[-2]=='NORMAL' else 1
            
            image = tf.io.read_file(file_path)    
            image = tf.image.decode_jpeg(image, channels=3)
            image = tf.image.convert_image_dtype(image, tf.float32)
         
            image = tf.image.resize(image, [127, 127], method='area')
            return image, label
    
        return f

    def reader_image(path_file, batch_size, nb_class):

        list_ds = tf.data.Dataset.list_files(path_file)
        labeled_ds = list_ds.map(process_path(nb_class))
    
        return labeled_ds.shuffle(100).batch(batch_size).prefetch(1)
    
    train_ds = reader_image(train_path, batch_size, 2)
    val_ds = reader_image(val_path, batch_size, 2)

   # train_ds = reader_image('gs://chest-xray-us-central/chest_xray/train/*/*.jpeg', 16, 2)
   # val_ds = reader_image('gs://chest-xray-us-central/chest_xray/test/*/*.jpeg', 16, 2)
    print(type(train_ds))


    for image, label in train_ds.take(1):
        df = pd.DataFrame(image[0, :, :, 0].numpy())
    
    print(f'Outoupt : \n image shape: {df.shape}')
    
    return train_ds, val_ds

def train_and_evaluate(args):
    from tensorflow.keras.applications.densenet import DenseNet169
    from tensorflow.keras.applications.densenet import preprocess_input as densenet_preprocess

    base = DenseNet169(weights = 'imagenet', include_top = False, input_shape = (127, 127, 3))
    tf.keras.backend.clear_session()

    for layer in base.layers:
        layer.trainable =  False 

    densenet_model = Sequential()
    densenet_model.add(base)
    densenet_model.add(GlobalAveragePooling2D())
    densenet_model.add(BatchNormalization())
    densenet_model.add(Dense(256, activation='relu'))
    densenet_model.add(Dropout(0.5))
    densenet_model.add(BatchNormalization())
    densenet_model.add(Dense(128, activation='relu'))
    densenet_model.add(Dropout(0.5))
    densenet_model.add(Dense(1, activation='sigmoid'))

    densenet_model.summary()
    
    eval_steps = args["eval_steps"]
    
    optm = Adam(lr=0.0001)
    densenet_model.compile(loss='binary_crossentropy', optimizer=optm, 
                  metrics=['accuracy'])

    checkpoint_path = os.path.join(args["output_dir"], "checkpoints/pneumonia")
    cp_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path, verbose=1, save_weights_only=True)
    
    train_ds, val_ds = load_data(args["train_data_path"], args["eval_data_path"], args["batch_size"])
  
    dense_history = densenet_model.fit(
              train_ds,
              validation_data=val_ds,
              epochs=args["num_epochs"])
    print("cheking the model run")
    
    EXPORT_PATH = os.path.join(
        args["output_dir"], datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
    tf.saved_model.save(
        obj=densenet_model, export_dir=EXPORT_PATH)
    
    print("Exported trained model to {}".format(EXPORT_PATH))
    

    hp_metric = dense_history.history['val_accuracy'][eval_steps-1]
    
    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='accuracy',
        metric_value=hp_metric,
        global_step=eval_steps
    )
    return dense_history
    


Overwriting pneumonia/trainer/model.py


In [21]:
%%bash
OUTDIR=pneumonia_trained
rm -rf ${OUTDIR}
export PYTHONPATH=${PYTHONPATH}:${PWD}/pneumonia
python3 -m trainer.task \
    --job-dir=./tmp \
    --train_data_path=gs://${BUCKET}/chest_xray/train/*/*.jpeg \
    --eval_data_path=gs://${BUCKET}/chest_xray/test/*/*.jpeg \
    --output_dir=${OUTDIR} \
    --batch_size=4 \
    --num_epochs=1 \
    --train_examples=1 \
    --eval_steps=1 

Process is terminated.


In [65]:
%%bash

OUTDIR=gs://${BUCKET}/pneumonia/trained_model
JOBID=pneumonia_$(date -u +%y%m%d_%H%M%S)

gcloud ai-platform jobs submit training ${JOBID} \
    --region=${REGION} \
    --module-name=trainer.task \
    --package-path=$(pwd)/pneumonia/trainer \
    --job-dir=${OUTDIR} \
    --staging-bucket=gs://${BUCKET} \
    --master-machine-type=n1-standard-8 \
    --scale-tier=CUSTOM \
    --runtime-version=${TFVERSION} \
    --python-version=${PYTHONVERSION} \
    -- \
    --train_data_path=gs://${BUCKET}/chest_xray/train/*/*.jpeg \
    --eval_data_path=gs://${BUCKET}/chest_xray/test/*/*.jpeg \
    --output_dir=${OUTDIR} \
    --num_epochs=1 \
    --batch_size=32 \


jobId: pneumonia_210624_041649
state: QUEUED


Job [pneumonia_210624_041649] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ai-platform jobs describe pneumonia_210624_041649

or continue streaming the logs with the command

  $ gcloud ai-platform jobs stream-logs pneumonia_210624_041649


In [1]:
%%writefile hyperparam.yaml
trainingInput:
    scaleTier: STANDARD_1
    hyperparameters:
        hyperparameterMetricTag: accuracy
        goal: MAXIMIZE
        maxTrials: 20
        maxParallelTrials: 5
        enableTrialEarlyStopping: True
        params:
        - parameterName: batch_size
          type: INTEGER
          minValue: 8
          maxValue: 512
          scaleType: UNIT_LOG_SCALE

Overwriting hyperparam.yaml


In [24]:
%%bash
OUTDIR=gs://${BUCKET}/pneumonia/hyperparam
JOBNAME=pneumonia_$(date -u +%y%m%d_%H%M%S)
echo ${OUTDIR} ${REGION} ${JOBNAME}
gsutil -m rm -rf ${OUTDIR}

gcloud ai-platform jobs submit training ${JOBNAME} \
    --staging-bucket=gs://${BUCKET} \
    --region=${REGION} \
    --module-name=trainer.task \
    --package-path=$(pwd)/pneumonia/trainer \
    --master-machine-type=n1-standard-8 \
    --runtime-version=${TFVERSION} \
    --python-version=${PYTHONVERSION} \
    --scale-tier=CUSTOM \
    --config=hyperparam.yaml \
    -- \
    --train_data_path=gs://${BUCKET}/chest_xray/train/*/*.jpeg \
    --eval_data_path=gs://${BUCKET}/chest_xray/test/*/*.jpeg \
    --output_dir=${OUTDIR} \
    --num_epochs=20 \
    --eval_steps=5

gs://chest-xray-us-central/pneumonia/hyperparam us-central1 pneumonia_210624_143333
jobId: pneumonia_210624_143333
state: QUEUED


Removing gs://chest-xray-us-central/pneumonia/hyperparam/#1624544624546399...
Removing gs://chest-xray-us-central/pneumonia/hyperparam/1/#1624544624738746...
Removing gs://chest-xray-us-central/pneumonia/hyperparam/1/20210624142241/#1624544624958532...
Removing gs://chest-xray-us-central/pneumonia/hyperparam/1/20210624142241/assets/#1624544627952308...
Removing gs://chest-xray-us-central/pneumonia/hyperparam/1/20210624142241/saved_model.pb#1624544632145147...
Removing gs://chest-xray-us-central/pneumonia/hyperparam/1/20210624142241/variables/#1624544625148010...
Removing gs://chest-xray-us-central/pneumonia/hyperparam/1/20210624142241/variables/variables.data-00000-of-00001#1624544627251267...
Removing gs://chest-xray-us-central/pneumonia/hyperparam/1/20210624142241/variables/variables.index#1624544627435226...
/ [8/8 objects] 100% Done                                                       
Operation completed over 8 objects.                                              
Job [pneumonia