In [None]:
%tensorflow_version 2.x
!pip3 install --upgrade pip
!pip install -U numpy==1.24
!pip install -U t5==0.9.2
!pip install -U flax
!pip install -U jax jaxlib
# Restart the runtime after install.
# Upload adc.json and operative_config.json to folder.

# These fix a dependency issue for tensorflow_gcs_config:
!pip uninstall -y tensorflow
!pip install tensorflow==2.12.0
!pip install -U tensorflow-text==2.12.0

In [None]:
# This import was moved here from below to ensure the dependency is fixed before going forward:
import tensorflow_gcs_config

In [None]:
import functools
import sys
import os
import time
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

import tensorflow.compat.v1 as tf
import tensorflow_datasets as tfds

import t5

#Set the base dir(Google cloud bucket)
#Made sure to use a valid GCS Bucket containing the datasets
BASE_DIR = "gs://tse_extension"  #@param { type: "string" }

if not BASE_DIR or BASE_DIR == "gs://":
    raise ValueError("You must enter a BASE_DIR.")
ON_CLOUD = True

if ON_CLOUD:
    from google.colab import auth
    # Set credentials for GCS reading/writing from Colab and TPU.
    TPU_TOPOLOGY = "2x2"
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
        TPU_ADDRESS = tpu.get_master()
        print('Running on TPU:', TPU_ADDRESS)
    except ValueError:
        raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')
    auth.authenticate_user()
    tf.config.experimental_connect_to_host(TPU_ADDRESS)
    tensorflow_gcs_config.configure_gcs_from_colab_auth()

tf.disable_v2_behavior()

# Improve logging.
from contextlib import contextmanager
import logging as py_logging

if ON_CLOUD:
    tf.get_logger().propagate = False
    py_logging.root.setLevel('INFO')

@contextmanager
def tf_verbosity_level(level):
    og_level = tf.logging.get_verbosity()
    tf.logging.set_verbosity(level)
    yield
    tf.logging.set_verbosity(og_level)

Running on TPU: grpc://10.13.250.178:8470


Instructions for updating:
non-resource variables are not supported in the long term


In [None]:
tsv_path_bf_small = {
    "train":      'gs://amh_t5_test/bfs_train_stacked.tsv',
    "validation": 'gs://amh_t5_test/bf_s_eval.tsv'
}
examples_bf_small = dict(train=425580, validation=5835)
tsv_path_bf_medium = {
    "train":      'gs://amh_t5_test/bfm_train_stacked.tsv',
    "validation": 'gs://amh_t5_test/bf_m_eval.tsv'
}
examples_bf_medium = dict(train=522760, validation=6546)
tsv_path_mg = {
    "train":      'gs://amh_t5_test/mg_train_stacked.tsv',
    "validation": 'gs://amh_t5_test/mg_eval.tsv'
}
examples_mg = dict(train=924410, validation=5835)
tsv_path_bf_small_baseline = {
    "train":      'gs://amh_t5_test/baseline/bfs_baseline_training.tsv',
    "validation": 'gs://amh_t5_test/bf_s_eval.tsv'
}
examples_bf_small_baseline = dict(train=46680, validation=5835)
tsv_path_ag = {
    "train":      'gs://amh_t5_test/ag_training_original_shuffled.tsv',
    "validation": 'gs://amh_t5_test/ag_eval.tsv'
}
examples_ag = dict(train=46680, validation=5835)

In [None]:
from t5.data import postprocessors as t5_postprocessors
from t5.seqio import Feature,SentencePieceVocabulary


# # Set the path of sentencepiece model and vocab files
# # Must be the same used for the pre-trained phase
vocab_model_path = 'gs://amh_t5_test/dl4se_vocab.model' #@param { type: "string" }

TaskRegistry = t5.data.TaskRegistry
TfdsTask = t5.data.TfdsTask


def get_default_vocabulary():
    return SentencePieceVocabulary(vocab_model_path, 100)

DEFAULT_OUTPUT_FEATURES = {
    "inputs": Feature(
        vocabulary=get_default_vocabulary(), add_eos=True, required=False),

    "targets": Feature(
        vocabulary=get_default_vocabulary(), add_eos=True)
}

In [None]:
def nq_dataset_bfp_small_single(split, shuffle_files=False):
    del shuffle_files

    # Load lines from the text file as examples.
    ds = tf.data.TextLineDataset(tsv_path_bf_small[split])
    ds = ds.map(
        functools.partial(tf.io.decode_csv, record_defaults=["string","string"],
                        field_delim="\t", use_quote_delim=False),
        num_parallel_calls=tf.data.experimental.AUTOTUNE)

    ds = ds.map(lambda *ex: dict(zip(["X", "Y"], ex)))
    return ds


print("A few raw valid examples...")
for ex in tfds.as_numpy(nq_dataset_bfp_small_single("validation").take(5)):
    print(ex)

def bfp_preprocessing_small_single(ds):

    def to_inputs_and_targets(ex):

        inputs = tf.strings.join(['generate small patch: '  + ex['X']], separator=' ')
        class_label = tf.strings.join([ex['Y']], separator=' ')
        return {'inputs': inputs, 'targets': class_label }

    return ds.map(to_inputs_and_targets,
                num_parallel_calls=tf.data.experimental.AUTOTUNE)

TaskRegistry = t5.data.TaskRegistry
TfdsTask = t5.data.TfdsTask

t5.data.TaskRegistry.remove('bfp_small_single')
t5.data.TaskRegistry.add(
    "bfp_small_single",
    dataset_fn=nq_dataset_bfp_small_single,
    splits=["train", "validation"],
    text_preprocessor=[bfp_preprocessing_small_single],
    output_features = DEFAULT_OUTPUT_FEATURES,
    metric_fns=[t5.evaluation.metrics.accuracy],
    num_input_examples = examples_bf_small
)

def nq_dataset_bfp_medium_single(split, shuffle_files=False):
    del shuffle_files

    # Load lines from the text file as examples.
    ds = tf.data.TextLineDataset(tsv_path_bf_medium[split])
    ds = ds.map(
        functools.partial(tf.io.decode_csv, record_defaults=["string","string"],
                        field_delim="\t", use_quote_delim=False),
        num_parallel_calls=tf.data.experimental.AUTOTUNE)

    ds = ds.map(lambda *ex: dict(zip(["X", "Y"], ex)))
    return ds


print("A few raw valid examples...")
for ex in tfds.as_numpy(nq_dataset_bfp_medium_single("validation").take(5)):
    print(ex)

def bfp_preprocessing_medium_single(ds):

    def to_inputs_and_targets(ex):

        inputs = tf.strings.join(['generate medium patch: '  + ex['X']], separator=' ')
        class_label = tf.strings.join([ex['Y']], separator=' ')
        return {'inputs': inputs, 'targets': class_label }

    return ds.map(to_inputs_and_targets,
                num_parallel_calls=tf.data.experimental.AUTOTUNE)

TaskRegistry = t5.data.TaskRegistry
TfdsTask = t5.data.TfdsTask

t5.data.TaskRegistry.remove('bfp_medium_single')
t5.data.TaskRegistry.add(
    "bfp_medium_single",
    dataset_fn=nq_dataset_bfp_medium_single,
    splits=["train", "validation"],
    text_preprocessor=[bfp_preprocessing_medium_single],
    output_features = DEFAULT_OUTPUT_FEATURES,
    metric_fns=[t5.evaluation.metrics.accuracy],
    num_input_examples = examples_bf_medium
)

def nq_dataset_mg_single(split, shuffle_files=False):
    del shuffle_files

    # Load lines from the text file as examples.
    ds = tf.data.TextLineDataset(tsv_path_mg[split])
    ds = ds.map(
        functools.partial(tf.io.decode_csv, record_defaults=["string","string"],
                        field_delim="\t", use_quote_delim=False),
        num_parallel_calls=tf.data.experimental.AUTOTUNE)

    ds = ds.map(lambda *ex: dict(zip(["X", "Y"], ex)))
    return ds


print("A few raw valid examples...")
for ex in tfds.as_numpy(nq_dataset_mg_single("validation").take(5)):
    print(ex)

def mg_preprocessing_single(ds):

    def to_inputs_and_targets(ex):

        inputs = tf.strings.join(['generate mutant: '  + ex['X']], separator=' ')
        class_label = tf.strings.join([ex['Y']], separator=' ')
        return {'inputs': inputs, 'targets': class_label }

    return ds.map(to_inputs_and_targets,
                num_parallel_calls=tf.data.experimental.AUTOTUNE)

TaskRegistry = t5.data.TaskRegistry
TfdsTask = t5.data.TfdsTask

t5.data.TaskRegistry.remove('mg_single')
t5.data.TaskRegistry.add(
    "mg_single",
    dataset_fn=nq_dataset_mg_single,
    splits=["train", "validation"],
    text_preprocessor=[mg_preprocessing_single],
    output_features = DEFAULT_OUTPUT_FEATURES,
    metric_fns=[t5.evaluation.metrics.accuracy],
    num_input_examples = examples_mg
)


def nq_dataset_bfp_small_baseline(split, shuffle_files=False):
    del shuffle_files

    # Load lines from the text file as examples.
    ds = tf.data.TextLineDataset(tsv_path_bf_small_baseline[split])
    ds = ds.map(
        functools.partial(tf.io.decode_csv, record_defaults=["string","string"],
                        field_delim="\t", use_quote_delim=False),
        num_parallel_calls=tf.data.experimental.AUTOTUNE)

    ds = ds.map(lambda *ex: dict(zip(["X", "Y"], ex)))
    return ds


print("A few raw valid examples...")
for ex in tfds.as_numpy(nq_dataset_bfp_small_baseline("validation").take(5)):
    print(ex)

def bfp_preprocessing_small_baseline(ds):

    def to_inputs_and_targets(ex):

        inputs = tf.strings.join(['generate small patch: '  + ex['X']], separator=' ')
        class_label = tf.strings.join([ex['Y']], separator=' ')
        return {'inputs': inputs, 'targets': class_label }

    return ds.map(to_inputs_and_targets,
                num_parallel_calls=tf.data.experimental.AUTOTUNE)

TaskRegistry = t5.data.TaskRegistry
TfdsTask = t5.data.TfdsTask

t5.data.TaskRegistry.remove('bfp_small_baseline')
t5.data.TaskRegistry.add(
    "bfp_small_baseline",
    dataset_fn=nq_dataset_bfp_small_baseline,
    splits=["train", "validation"],
    text_preprocessor=[bfp_preprocessing_small_baseline],
    output_features = DEFAULT_OUTPUT_FEATURES,
    metric_fns=[t5.evaluation.metrics.accuracy],
    num_input_examples = examples_bf_small_baseline
)

# Asserts
def nq_dataset_ag_single(split, shuffle_files=False):
    del shuffle_files

    # Load lines from the text file as examples.
    ds = tf.data.TextLineDataset(tsv_path_ag[split])
    ds = ds.map(
        functools.partial(tf.io.decode_csv, record_defaults=["string","string"],
                        field_delim="\t", use_quote_delim=False),
        num_parallel_calls=tf.data.experimental.AUTOTUNE)

    ds = ds.map(lambda *ex: dict(zip(["X", "Y"], ex)))
    return ds


print("A few raw valid examples...")
for ex in tfds.as_numpy(nq_dataset_ag_single("validation").take(5)):
    print(ex)

def ag_preprocessing_single(ds):

    def to_inputs_and_targets(ex):

        inputs = tf.strings.join(['generate assert: '  + ex['X']], separator=' ')
        class_label = tf.strings.join([ex['Y']], separator=' ')
        return {'inputs': inputs, 'targets': class_label }

    return ds.map(to_inputs_and_targets,
                num_parallel_calls=tf.data.experimental.AUTOTUNE)

TaskRegistry = t5.data.TaskRegistry
TfdsTask = t5.data.TfdsTask

t5.data.TaskRegistry.remove('ag_single')
t5.data.TaskRegistry.add(
    "ag_single",
    dataset_fn=nq_dataset_ag_single,
    splits=["train", "validation"],
    text_preprocessor=[ag_preprocessing_single],
    output_features = DEFAULT_OUTPUT_FEATURES,
    metric_fns=[t5.evaluation.metrics.accuracy],
    num_input_examples = examples_ag
)

A few raw valid examples...
{'X': b'"public java.util.List < TYPE_1 > METHOD_1 ( ) { java.util.ArrayList < TYPE_1 > VAR_1 = new java.util.ArrayList < TYPE_1 > ( ) ; for ( TYPE_2 VAR_2 : VAR_3 ) { VAR_1 . METHOD_2 ( VAR_2 . METHOD_1 ( ) ) ; } return VAR_1 ; }"', 'Y': b'"public java.util.List < TYPE_1 > METHOD_1 ( ) { return VAR_1 ; }"'}
{'X': b'"public TYPE_1 < TYPE_2 > METHOD_1 ( TYPE_3 VAR_1 , java.lang.String VAR_2 ) { return METHOD_1 ( VAR_1 . toString ( ) , VAR_2 ) ; }"', 'Y': b'"public TYPE_1 < TYPE_2 > METHOD_1 ( TYPE_3 VAR_1 , java.lang.String VAR_2 , java.util.HashMap < java.lang.String , java.lang.String > parameters ) { return METHOD_1 ( VAR_1 . toString ( ) , VAR_2 , parameters ) ; }"'}
{'X': b'"public static void main ( java.lang.String [ ] args ) throws java.lang.Exception { TYPE_1 VAR_1 = new TYPE_1 ( ) ; VAR_1 . METHOD_1 ( ) ; VAR_1 . add ( VAR_2 ) ; VAR_1 . METHOD_2 ( true ) ; VAR_1 . init ( STRING_1 ) ; }"', 'Y': b'"public static void main ( java.lang.String [ ] args )

<t5.data.dataset_providers.FunctionTask at 0x7c12b6a3d570>

In [None]:

#Uncomment the following for the proportional sampling

# def _rate_num_input_examples(task):
#   if "train" in task.splits:
#     return float(task.num_input_examples("train"))
#   elif "validation" in task.splits:
#     return float(task.num_input_examples("validation"))
#   else:
#     raise ValueError("Task %s does not have a train or validation split." % (task.name))

# Balanced training strategy
t5.data.MixtureRegistry.add(
    "all_tasks",
    ["bfp_small_single","bfp_medium_single","mg_single","ag_single"],
     default_rate=1.0
)

<seqio.dataset_providers.Mixture at 0x7c12a4c379a0>

In [None]:
# MODEL_DIR is where to store the new model.
# PRETRAINED_MODEL is where the existing model is.
# The pretrained model dir must have the operative_config.gin.

import t5.models
from mesh_tensorflow.transformer.learning_rate_schedules import truncated_rsqrt

MODEL_SIZE = "small"
MODEL_DIR = 'gs://amh_t5_test/CURRENT/baseline_ag_wpt_200_contrastive_fib_1-10'
#MODEL_DIR = 'gs://amh_t5_test/CURRENT/pretrained_bfs_25ep_temp1/' #@param { type: "string" }
PRETRAINED_MODEL = 'gs://amh_t5_test/baseline/BASE_baseline_ag_w_pretrain/'
#PRETRAINED_MODEL = 'gs://amh_t5_test/pretrain_250k/'
#PRETRAINED_MODEL = 'gs://amh_t5_test/baseline/pretrain/' #@param { type: "string" }
NUM_EPOCH = 20
DATASET_SIZE = 184792

model_parallelism, train_batch_size, keep_checkpoint_max = {
    "small": (1, 256, 200),
    "base": (2, 128, 8),
    "large": (8, 64, 4),
    "3B": (8, 16, 1),
    "11B": (8, 16, 1)}[MODEL_SIZE]

tf.io.gfile.makedirs(MODEL_DIR)

model = t5.models.MtfModel(
    model_dir=MODEL_DIR,
    tpu=TPU_ADDRESS,
    #tpu_topology=TPU_TOPOLOGY,
    model_parallelism=model_parallelism,
    batch_size=train_batch_size,
    learning_rate_schedule = truncated_rsqrt,
    sequence_length={"inputs": 512, "targets": 512},
    save_checkpoints_steps=10000,
    keep_checkpoint_max=keep_checkpoint_max if ON_CLOUD else None,
    iterations_per_loop=100,
)

In [None]:
# Note: This block is only necessary if you get a "f flag" error. (encountered on Colab)
# It is safe to run on Colab if not encountering the error.
import sys

print("ARGS:", sys.argv)
old_args = sys.argv
sys.argv = [old_args[0]]


ARGS: ['/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py', '-f', '/root/.local/share/jupyter/runtime/kernel-09ad1838-067f-4b46-a4c6-f6a331e00bd6.json']


In [None]:
import gin
import numpy as np
import mesh_tensorflow as mtf
from mesh_tensorflow import placement_mesh_impl
import math

def fibonacci_rounded(number):
    rounded_number = int(number)  # Round down the number
    fib_sequence = [0, 1]  # Initialize Fibonacci sequence

    # Generate Fibonacci sequence up to the rounded number
    while len(fib_sequence) <= rounded_number:
        next_fib = fib_sequence[-1] + fib_sequence[-2]
        fib_sequence.append(next_fib)

    return fib_sequence[rounded_number]  # Return the Fibonacci number

def fibonacci_unrounded(number):
    rounded_number = int(number)  # Round down the number
    fib_sequence = [0, 1]  # Initialize Fibonacci sequence

    # Generate Fibonacci sequence up to the rounded number
    while len(fib_sequence) <= rounded_number:
        next_fib = fib_sequence[-1] + fib_sequence[-2]
        fib_sequence.append(next_fib)

    remainder = number = rounded_number

    return fib_sequence[rounded_number] + remainder  # Return the Fibonacci number

def linear_rounded(number):
    rounded_number = int(number)

    return rounded_number

gin.enter_interactive_mode() # This allows us to re-assign the CustomContrastiveLoss during development.
@gin.configurable(module='tf.losses')
class CustomContrastiveLoss:
    def __init__(self, param=0.5):
        # Param is margin for margin-based and temperature for temperature-based implementations.
        self.param = param
        self.epochs = 200
        self.datasize = 184792 # replace with length of training dataset
        self.batchsize = 256
        self.steps = (self.datasize / self.batchsize) * self.epochs
        self.steps = math.floor(self.steps)
        self.minval = 2 # Skip double 1s
        self.maxval = 10 # gives the fib vals 1, 1, 2, 3, 5, 8, 13
        self.rate = ((self.maxval - self.minval) / self.steps)
        self.temp = self.minval
        # This print statement exists so that we can confirm the custom
        # loss function is being called at runtime.
        print("Using Contrastive Learning with Schedule=", self.minval, self.maxval)
        print("Steps: ", self.steps)
        print("Rate: ", self.rate)
        print(self.temp)
    # Temperature-based approach
    def __call__(self, transformer, context, logits, targets, output_vocab_dim, temperature):
        """Temperature-based contrastive loss function.

        Args:
            logits: A mtf.Tensor of shape [outer_batch, batch, length].
            targets: A mtf.Tensor of shape [outer_batch, batch, length].
            output_vocab_dim: An integer representing the dimension to reduce the logits.
            temperature: A float value representing the temperature parameter.

        Returns:
            A mtf.Tensor of shape [outer_batch, batch] representing the temperature-based contrastive loss.

        Raises:
            ValueError: If the shapes of logits and targets do not match.
        """
        self.temp += self.rate
        #self.temp -= self.rate
        fib = fibonacci_rounded(self.temp)
        targets = mtf.cast(targets, logits.dtype)
        reduced_logits = mtf.reduce_sum(logits, reduced_dim=output_vocab_dim)

        pairwise_diff = mtf.sub(targets, reduced_logits)
        abs_diff = mtf.abs(pairwise_diff)

        scaled_logits = mtf.divide(abs_diff, fib)  # Scale logits by temperature
        softmax_scores = mtf.softmax(-scaled_logits, dim=output_vocab_dim)  # Apply softmax operation
        log_probs = mtf.log(softmax_scores)

        loss = -log_probs
        mean_loss = mtf.reduce_mean(loss)

        return mean_loss

    # # Margin-based approach
    # def __call__(self, transformer, context, logits, targets, output_vocab_dim):
    #     """Contrastive loss function.

    #     Args:
    #         logits: A mtf.Tensor of shape [outer_batch, batch, length].
    #         targets: A mtf.Tensor of shape [outer_batch, batch, length].
    #         margin: A float value representing the margin for contrastive loss.

    #     Returns:
    #         A mtf.Tensor of shape [outer_batch, batch] representing the contrastive loss.

    #     Raises:
    #         ValueError: If the shapes of logits and targets do not match.
    #     """
    #     targets = mtf.cast(targets, logits.dtype)
    #     reduced_logits = mtf.reduce_sum(logits, reduced_dim=output_vocab_dim)

    #     pairwise_diff = mtf.sub(targets, reduced_logits)
    #     abs_diff = mtf.abs(pairwise_diff)
    #     modified_abs_diff = mtf.sub(self.param, abs_diff)
    #     loss = mtf.maximum(modified_abs_diff, 0)
    #     mean_loss = mtf.reduce_mean(loss)

    #     return mean_loss

In [None]:
import gin
from tqdm import tqdm

PATH_GIN_FILE = '/content/operative_config.gin'
STEPS_PER_EPOCH = int(DATASET_SIZE/train_batch_size)
task_list = ["ag_single"]

# def set_dynamic_task(selection):
#     t5.data.TaskRegistry.remove('bfp_small')
#     t5.data.TaskRegistry.add(
#         "bfp_small",
#         dataset_fn=lambda split, shuffle_files: nq_dataset_bfp_small(selection, split, shuffle_files),
#         splits=["train", "validation"],
#         text_preprocessor=[bfp_preprocessing_small],
#         output_features = DEFAULT_OUTPUT_FEATURES,
#         metric_fns=[t5.evaluation.metrics.accuracy],
#         num_input_examples = examples_bf_small
#     )

# for epoch in range(NUM_EPOCH):
#     print("EPOCH: ", epoch)
#     dataset_selection = epoch % 10
#     set_dynamic_task(dataset_selection)

#     model.batch_size = 128
#     with gin.unlock_config():
#         gin.parse_config_file(PATH_GIN_FILE)
#         # MtfModel determines how much to train based on (PRETRAINED_STEPS) + (finetune_steps)
#         # As we keep the same pretrained model, we have to increase the number of finetune_steps
#         # in each epoch to provide the new final target.
#         # If this isn't done, after the first epoch you will see epochs pass without any processing.
#         # This is due to it starting the training, realizing it is at the number of steps (from the first epoch),
#         # and stopping.
#         model.finetune('bfp_small',
#                       finetune_steps=STEPS_PER_EPOCH * (epoch + 1),
#                       pretrained_model_dir=PRETRAINED_MODEL,
#         )

with gin.unlock_config():
  gin.parse_config_file(PATH_GIN_FILE)
  # MtfModel determines how much to train based on (PRETRAINED_STEPS) + (finetune_steps)
  # As we keep the same pretrained model, we have to increase the number of finetune_steps
  # in each epoch to provide the new final target.
  # If this isn't done, after the first epoch you will see epochs pass without any processing.
  # This is due to it starting the training, realizing it is at the number of steps (from the first epoch),
  # and stopping.
  model.finetune('ag_single',
                finetune_steps=144360, #400 epochs at 52861 rows, 128 batch size
                pretrained_model_dir=PRETRAINED_MODEL,
  )
    # model.batch_size = 64
    # for task in task_list:

    #     model.eval(
    #       mixture_or_task_name=task,
    #       checkpoint_steps=-1
    #       )

INFO:root:system_path_file_exists:gs://amh_t5_test/baseline/BASE_baseline_ag_w_pretrain/operative_config.gin
ERROR:root:Path not found: gs://amh_t5_test/baseline/BASE_baseline_ag_w_pretrain/operative_config.gin
From /usr/local/lib/python3.10/dist-packages/mesh_tensorflow/transformer/utils.py:2043: TPUConfig.__new__ (from tensorflow_estimator.python.estimator.tpu.tpu_config) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.keras instead.
From /usr/local/lib/python3.10/dist-packages/mesh_tensorflow/transformer/utils.py:2059: RunConfig.__init__ (from tensorflow_estimator.python.estimator.tpu.tpu_config) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.keras instead.
From /usr/local/lib/python3.10/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_config.py:268: RunConfig.__init__ (from tensorflow_estimator.python.estimator.run_config) is deprecated and will be removed in a future version.
Instructions

Using Contrastive Learning with Schedule= 2 10
Steps:  144368
Rate:  5.54139421478444e-05
2


From /usr/local/lib/python3.10/dist-packages/tensorflow/python/training/training_util.py:396: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
From /usr/local/lib/python3.10/dist-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py:2371: StepCounterHook.__init__ (from tensorflow.python.training.basic_session_run_hooks) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.keras instead.
From /usr/local/lib/python3.10/dist-packages/tensorflow/python/training/basic_session_run_hooks.py:686: SecondOrStepTimer.__init__ (from tensorflow.python.training.basic_session_run_hooks) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.keras instead.
SimdMeshImpl ignoring devices ['', '', '', '

In [None]:
# # Original baseline fine tuning.

# PATH_GIN_FILE = 'operative_config.gin'
# STEP = 37371 # Roughly 90 epochs for bf_s, (51350/128)*90

# with gin.unlock_config():
#     gin.parse_config_file(PATH_GIN_FILE)
#     model.finetune('bfp_small',
#                    finetune_steps=STEP,
#                    pretrained_model_dir=PRETRAINED_MODEL,
#     )


In [None]:
# # With eval: WIP

# import t5.models
# from mesh_tensorflow.transformer.learning_rate_schedules import truncated_rsqrt
# import tensorflow_addons as tfa

# MODEL_SIZE = "small"
# MODEL_DIR = 'gs://amh_t5_test/bf_s_categorized/categorical_model/' #@param { type: "string" }
# PRETRAINED_MODEL = 'gs://amh_t5_test/baseline/bf_nopretrain/' #@param { type: "string" }

# # MODEL_DIR = 'gs://amh_t5_test/no_pt/bf_s' #@param { type: "string" }
# # PRETRAINED_MODEL = 'gs://amh_t5_test/no_pt' #@param { type: "string" }


# model_parallelism, train_batch_size, keep_checkpoint_max = {
#     "small": (1, 128, 200),
#     "base": (2, 128, 8),
#     "large": (8, 64, 4),
#     "3B": (8, 16, 1),
#     "11B": (8, 16, 1)}[MODEL_SIZE]

# tf.io.gfile.makedirs(MODEL_DIR)
# def set_dynamic_task(selection):
#     t5.data.TaskRegistry.remove('bfp_small')
#     t5.data.TaskRegistry.add(
#         "bfp_small",
#         dataset_fn=lambda split, shuffle_files: nq_dataset_bfp_small(selection, split, shuffle_files),
#         splits=["train", "validation"],
#         text_preprocessor=[bfp_preprocessing_small],
#         output_features = DEFAULT_OUTPUT_FEATURES,
#         metric_fns=[t5.evaluation.metrics.accuracy],
#         num_input_examples = examples_bf_small
#     )
# model = t5.models.MtfModel(
#     model_dir=MODEL_DIR,
#     tpu=TPU_ADDRESS,
#     #tpu_topology=TPU_TOPOLOGY,
#     model_parallelism=model_parallelism,
#     batch_size=64,
#     learning_rate_schedule = truncated_rsqrt,
#     sequence_length={"inputs": 512, "targets": 512},
#     save_checkpoints_steps=10000,
#     keep_checkpoint_max=keep_checkpoint_max if ON_CLOUD else None,
#     iterations_per_loop=100,
# )
# set_dynamic_task(0)
# # Use a larger batch size for evaluation, which requires less memory.
# # For Mutant Generation we rely on TF's predictions with beam size K=1

# PATH_GIN_FILE = 'operative_config.gin'
# import gin

# with gin.unlock_config():
#     gin.external_configurable(tfa.losses.ContrastiveLoss, module='tf.losses')
#     gin.parse_config_file(PATH_GIN_FILE)
#     task_list = ["bfp_small"]
#     model.batch_size = 64
#     for task in task_list:

#         model.eval(
#           mixture_or_task_name=task,
#           checkpoint_steps=-1
#           )
