In [None]:
PROJECT_ID = "vtxdemos"
STAGING_FOLDER_URI =  "gs://vtxdemos-staging"
IMAGE_URI = "gcr.io/vtxdemos/tensorflow-gpu-nlp:v1"
MODEL_URI = "gs://vtxdemos-models/nlp"

In [None]:
from google.cloud import aiplatform as aip

In [None]:
!rm -fr training
!mkdir training

In [None]:
%%writefile training/train.py
#%%
import os
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from google.cloud import bigquery
import tensorflow_datasets as tfds
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

client = bigquery.Client(project="vtxdemos")

## Loading testing dataset from bigquery
sql = "select * from `public.train_nlp`"
train_df = client.query(sql).to_dataframe()
train_examples = np.array([i.encode('utf-8') for i in train_df['text']], dtype="object")
train_labels = train_df['labels'].to_numpy(dtype=int)

## Loading testing dataset from bigquery
sql = "select * from `vtxdemos.public.train_nlp`"
test_df = client.query(sql).to_dataframe()
test_examples = np.array([i.encode('utf-8') for i in test_df['text']], dtype="object")
test_labels = test_df['labels'].to_numpy(dtype=int)

## Load pre-trained model (BERT)
model = "https://tfhub.dev/google/nnlm-en-dim50/2"
hub_layer = hub.KerasLayer(model, input_shape=[], dtype=tf.string, trainable=True)

## Splitting datasets
x_val = train_examples[:10000]
partial_x_train = train_examples[10000:]

y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]

## Create new nn layers
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1))

model.compile(optimizer='adam',
              loss=tf.losses.BinaryCrossentropy(from_logits=True),
              metrics=[tf.metrics.BinaryAccuracy(threshold=0.0, name='accuracy')])

#%%
history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val),
                    verbose=1,
                    callbacks=[callback])
model.save(os.getenv('AIP_MODEL_DIR'))

In [None]:
%%writefile training/requirements.txt
tensorflow==2.11.0
tensorflow_hub
tensorflow-datasets
numpy
pandas
google-cloud-bigquery
db-dtypes

In [None]:
%%writefile training/Dockerfile
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
ARG DEBIAN_FRONTEND=noninteractive

COPY train.py train.py
COPY requirements.txt requirements.txt
RUN apt update -y
RUN apt-get install -y python3.10 && \
     apt-get install -y python3-pip
RUN pip install -r requirements.txt

CMD ["python3", "train.py"]

In [None]:
!docker build -t $IMAGE_URI training/.
!docker push $IMAGE_URI

In [None]:
aip.init(project=PROJECT_ID, staging_bucket=STAGING_FOLDER_URI)

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type" : "n1-standard-8",
            "accelerator_type": "NVIDIA_TESLA_T4",
            "accelerator_count": 1
        },
        "replica_count": "1",
        "container_spec": {
            "image_uri" : IMAGE_URI
        }
    }
]


job = aip.CustomJob(
    display_name="tensorflow-gpu-nlp",
    worker_pool_specs=worker_pool_specs,
    base_output_dir=MODEL_URI,
)

job.run()