DeepCT is written for tensorflow v1. To make this notebook work it has to be updated to v2 using the tensorflow update script.

Furthermore the following things have to be changed manually:


*   run_deepct.py
  * Line 33: flags = tf.compat.v1.flags
  * Line 112: tf.compat.v1.flags.DEFINE_string
  * Line 118: tf.compat.v1.flags.DEFINE_string
  * Line 124: tf.compat.v1.flags.DEFINE_string
  * Line 130: tf.compat.v1.flags.DEFINE_string
  * Line 795: hidden_size = bert_output_layer.shape[-1]
  * Line 796: eq_length = bert_output_layer.shape[-2]
* modeling.py
  * Line 365: layer = tf.keras.layers.LayerNormalization(name=name,axis=-1,epsilon=1e-12,dtype=tf.float32)

    return layer(input_tensor)



In [None]:
!git clone https://github.com/AdeDZY/DeepCT.git

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
import tensorflow

!tf_upgrade_v2 --intree /content/DeepCT --outtree /content/DeepCT

# Convert the HuggingFace safetensor to a tensorflow ckpt file

In [4]:
var_map = (
        ("layer.", "layer_"),
        ("word_embeddings.weight", "word_embeddings"),
        ("position_embeddings.weight", "position_embeddings"),
        ("token_type_embeddings.weight", "token_type_embeddings"),
        (".", "/"),
        ("LayerNorm/weight", "LayerNorm/gamma"),
        ("LayerNorm/bias", "LayerNorm/beta"),
        ("weight", "kernel"),
)

In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

In [None]:
import os
import numpy as np
from transformers import BertForSequenceClassification, BertModel


def to_tf_var_name(name: str):
    for patt, repl in iter(var_map):
        name = name.replace(patt, repl)

    return "{}".format(name)

def create_tf_var(tensor: np.ndarray, name: str, session: tf.Session):
    tf_dtype = tf.dtypes.as_dtype(tensor.dtype)
    tf_var = tf.get_variable(dtype=tf_dtype, shape=tensor.shape, name=name, initializer=tf.zeros_initializer())
    session.run(tf.variables_initializer([tf_var]))
    session.run(tf_var)
    return tf_var


def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, model_name: str):
    """
    Args:
        model: BertModel Pytorch model instance to be converted
        ckpt_dir: Tensorflow model directory
        model_name: model name

    Currently supported HF models:

        - Y BertModel
        - N BertForMaskedLM
        - N BertForPreTraining
        - N BertForMultipleChoice
        - N BertForNextSentencePrediction
        - N BertForSequenceClassification
        - N BertForQuestionAnswering
    """

    tensors_to_transpose = ("dense.weight", "attention.self.query", "attention.self.key", "attention.self.value")

    # variable mapping
    var_map = (
        ("layer.", "layer_"),
        ("word_embeddings.weight", "word_embeddings"),
        ("position_embeddings.weight", "position_embeddings"),
        ("token_type_embeddings.weight", "token_type_embeddings"),
        (".", "/"),
        ("LayerNorm/weight", "LayerNorm/gamma"),
        ("LayerNorm/bias", "LayerNorm/beta"),
        ("weight", "kernel"),
    )

    if not os.path.isdir(ckpt_dir):
        os.makedirs(ckpt_dir)

    state_dict = model.state_dict()

    print("preprocess... ")
    tf.reset_default_graph()
    with tf.Session() as session:
        for var_name in state_dict:
            print(var_name)
            if var_name in {'classifier.bias', 'classifier.weight'}:
                print(f'skippin {var_name}')
                continue
            print("var_name", var_name)
            tf_name = to_tf_var_name(var_name)

            # classification weight and bias
            if tf_name == "classifier/kernel":
                tf_name = "output_weights"
            if tf_name == "classifier/bias":
                tf_name = "output_bias"

            print(tf_name)
            torch_tensor = state_dict[var_name].numpy()
            if any([x in var_name for x in tensors_to_transpose]):
                torch_tensor = torch_tensor.T
            tf_var = create_tf_var(tensor=torch_tensor, name=tf_name, session=session)
            tf.keras.backend.set_value(tf_var, torch_tensor)
            tf_weight = session.run(tf_var)
            print("Successfully created {}: {}".format(tf_name, np.allclose(tf_weight, torch_tensor)))

        # save tensorflow checkpoint file
        saver = tf.train.Saver(tf.trainable_variables())
        saver.save(session, os.path.join(ckpt_dir, model_name.replace("-", "_") + ".ckpt"))


def main(raw_args=None):

    model = BertForSequenceClassification.from_pretrained('merged-model')
    #model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

    for var_name in model.state_dict():
        print(var_name)

    convert_pytorch_checkpoint_to_tf(model=model,
                                     ckpt_dir="merged-model",
                                     model_name="convertedModel")


if __name__ == "__main__":
    main()

# Training

The data file can be downloaded from https://boston.lti.cs.cmu.edu/appendices/arXiv2019-DeepCT-Zhuyun-Dai/data/.

If BERT is trained the vocab and the config file can be the one from the google bert-base-uncased network.

The init checkpoint is the path to the tensorflow network.

In [None]:
project_path = ""

!python /DeepCT/run_deepct.py  \
  --task_name=marcodoc \
  --do_train=true \
  --do_eval=false \
  --do_predict=false \
  --data_dir=path_to_data \
  --vocab_file=path_to_vocab \
  --bert_config_file=path_to_config \
  --init_checkpoint=path_to_converted_model \
  --max_seq_length=128 \
  --train_batch_size=16 \
  --learning_rate=2e-5 \
  --num_train_epochs=1.0 \
  --recall_field=title \
  --save_checkpoints_steps=10000 \
  --output_dir=DeepCT_Trained