## Fine-Tuning a Summarization Model

In [1]:
import tensorflow as tf 
import pandas as pd 
import numpy as np 
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer, create_optimizer

from datasets import Dataset, load_from_disk

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
try:
    for device in physical_devices:
        tf.config.experimental.set_memory_growth(device, True)
    print("Memory growth enabled")
except RuntimeError as e:
    print(f"Error enabling memory growth: {e}")


Memory growth enabled


In [3]:
drug_dataset = load_from_disk('./data/drug-reviews/') # This is the processed drugs dataset we processed in datasets.ipynb notebook

In [4]:
drug_dataset

DatasetDict({
    train: Dataset({
        features: ['patient_id', 'drugName', 'condition', 'review', 'rating', 'date', 'usefulCount', 'review_length'],
        num_rows: 110811
    })
    validation: Dataset({
        features: ['patient_id', 'drugName', 'condition', 'review', 'rating', 'date', 'usefulCount', 'review_length'],
        num_rows: 27703
    })
    test: Dataset({
        features: ['patient_id', 'drugName', 'condition', 'review', 'rating', 'date', 'usefulCount', 'review_length'],
        num_rows: 46108
    })
})

In [5]:
drug_dataset['train'][0]

{'patient_id': 89879,
 'drugName': 'Cyclosporine',
 'condition': 'keratoconjunctivitis sicca',
 'review': '"I have used Restasis for about a year now and have seen almost no progress.  For most of my life I\'ve had red and bothersome eyes. After trying various eye drops, my doctor recommended Restasis.  He said it typically takes 3 to 6 months for it to really kick in but it never did kick in.  When I put the drops in it burns my eyes for the first 30 - 40 minutes.  I\'ve talked with my doctor about this and he said it is normal but should go away after some time, but it hasn\'t. Every year around spring time my eyes get terrible irritated  and this year has been the same (maybe even worse than other years) even though I\'ve been using Restasis for a year now. The only difference I notice was for the first couple weeks, but now I\'m ready to move on."',
 'rating': 2.0,
 'date': 'April 20, 2013',
 'usefulCount': 69,
 'review_length': 147}

In [6]:
model_name = 'facebook/bart-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)

2024-12-19 19:09:44.142873: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-12-19 19:09:44.142975: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-12-19 19:09:44.142998: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-12-19 19:09:44.143290: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-12-19 19:09:44.143557: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
All PyTorch model weights were used when initializing TFBartForConditionalGeneration.

All the weights of TFBartForConditionalGeneration were initialized from the PyTorch model.
If your t

In [7]:
def create_summaries(example): 
    """ 
    Purpose: Create a summary combining drug name, condition, and rating
    """
    example['summary'] = f"Patient review of {example['drugName']} for {example['condition']}. Rating: {example['rating']}/5"
    return example

In [8]:
def prepare_data(examples): 
    """ 
    Purpose: Tokenize inputs and targets
    """
    inputs = examples['review']
    targets = examples['summary'] 

    model_inputs = tokenizer(
        inputs, 
        max_length=512, 
        padding='max_length', 
        truncation=True,
        return_tensors='tf'
    )

    with tokenizer.as_target_tokenizer(): 
        labels = tokenizer(
        targets, 
        max_length=128, 
        padding='max_length', 
        truncation=True,
        return_tensors='tf'
        )

    return {
        "input_ids": model_inputs['input_ids'], 
        "attention_mask": model_inputs['attention_mask'], 
        "labels": labels['input_ids']
    }
    
    

In [50]:
def create_tf_dataset(dataset, batch_size=16): 
    """
    Purpose: Convert Hugging Face dataset to TF dataset 
    """
    dataset = dataset.map(create_summaries) # This add summary field to the dataset 

    processed_dataset = dataset.map(prepare_data, remove_columns=dataset.column_names, batched=True, batch_size=100) # tokenize data for model to make sense 

    tf_dataset = tf.data.Dataset.from_tensor_slices({
        'input_ids': processed_dataset['input_ids'], 
        'attention_mask': processed_dataset['attention_mask'], 
        'labels': processed_dataset['labels'], 
    })

    return tf_dataset.map(
        lambda x: (
            {
                "input_ids": x["input_ids"], 
                "attention_mask": x["attention_mask"]
            }, 
            x['labels']
        )
    )

In [51]:
batch_size = 16 
train_dataset = create_tf_dataset(drug_dataset['train'], batch_size)
val_dataset = create_tf_dataset(drug_dataset['validation'], batch_size)
test_dataset = create_tf_dataset(drug_dataset['test'], batch_size)

Map:   0%|          | 0/110811 [00:00<?, ? examples/s]



In [65]:
sample = train_dataset.take(1)
for example in sample:  
  print(example)

({'input_ids': <tf.Tensor: shape=(512,), dtype=int32, numpy=
array([    0,   113,   100,    33,   341,  8426, 17048,    13,    59,
          10,    76,   122,     8,    33,   450,   818,   117,  2017,
           4,  1437,   286,   144,     9,   127,   301,    38,   348,
          56,  1275,     8, 15304, 12465,  2473,     4,   572,   667,
        1337,  2295,  9305,     6,   127,  3299,  5131,  8426, 17048,
           4,  1437,    91,    26,    24,  3700,  1239,   155,     7,
         231,   377,    13,    24,     7,   269,  3151,    11,    53,
          24,   393,   222,  3151,    11,     4,  1437,   520,    38,
         342,     5,  9305,    11,    24, 14827,   127,  2473,    13,
           5,    78,   389,   111,   843,   728,     4,  1437,    38,
         348,  3244,    19,   127,  3299,    59,    42,     8,    37,
          26,    24,    16,  2340,    53,   197,   213,   409,    71,
         103,    86,     6,    53,    24,  2282,    75,     4,  4337,
          76,   198,  2428,  

In [66]:
# Set up training parameters

num_train_steps = (len(drug_dataset['train']) // batch_size) * 3
initial_learning_rate = 2e-5
num_warmup_steps = num_train_steps // 10 # got to warmup those muscles buddy

In [67]:
# Create Learning rate schedule with warmp 
lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate, 
    num_train_steps - num_warmup_steps, 
    end_learning_rate=0.0, 
    power=1.0
)

In [68]:
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=initial_learning_rate)

In [69]:
model.compile(optimizer=optimizer, loss=None, metrics=['accuracy'])

In [70]:
logdir = 'logs'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [71]:
history = model.fit(
    train_dataset, 
    validation_data=val_dataset, 
    epochs=3,
    callbacks=[
        tensorboard_callback,
    ]
)

Epoch 1/3


ValueError: in user code:

    File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/transformers/modeling_tf_utils.py", line 1672, in train_step
        y_pred = self(x, training=True)
    File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/var/folders/qj/_xr448610yg3dn_k91tss4s80000gn/T/__autograph_generated_fileo2tlt_c4.py", line 37, in tf__run_call_with_unpacked_inputs
        retval_ = ag__.converted_call(ag__.ld(func), (ag__.ld(self),), dict(**ag__.ld(unpacked_inputs)), fscope)
    File "/var/folders/qj/_xr448610yg3dn_k91tss4s80000gn/T/__autograph_generated_fileqkmxk_y2.py", line 53, in tf__call
        outputs = ag__.converted_call(ag__.ld(self).model, (ag__.ld(input_ids),), dict(attention_mask=ag__.ld(attention_mask), decoder_input_ids=ag__.ld(decoder_input_ids), encoder_outputs=ag__.ld(encoder_outputs), decoder_attention_mask=ag__.ld(decoder_attention_mask), decoder_position_ids=ag__.ld(decoder_position_ids), head_mask=ag__.ld(head_mask), decoder_head_mask=ag__.ld(decoder_head_mask), cross_attn_head_mask=ag__.ld(cross_attn_head_mask), past_key_values=ag__.ld(past_key_values), inputs_embeds=ag__.ld(inputs_embeds), decoder_inputs_embeds=ag__.ld(decoder_inputs_embeds), use_cache=ag__.ld(use_cache), output_attentions=ag__.ld(output_attentions), output_hidden_states=ag__.ld(output_hidden_states), return_dict=ag__.ld(return_dict), training=ag__.ld(training)), fscope)
    File "/var/folders/qj/_xr448610yg3dn_k91tss4s80000gn/T/__autograph_generated_fileo2tlt_c4.py", line 37, in tf__run_call_with_unpacked_inputs
        retval_ = ag__.converted_call(ag__.ld(func), (ag__.ld(self),), dict(**ag__.ld(unpacked_inputs)), fscope)
    File "/var/folders/qj/_xr448610yg3dn_k91tss4s80000gn/T/__autograph_generated_filex6z9qu7j.py", line 39, in tf__call
        ag__.if_stmt(ag__.and_(lambda : ag__.ld(decoder_input_ids) is None, lambda : ag__.ld(decoder_inputs_embeds) is None), if_body_1, else_body_1, get_state_1, set_state_1, ('decoder_input_ids',), 1)
    File "/var/folders/qj/_xr448610yg3dn_k91tss4s80000gn/T/__autograph_generated_filex6z9qu7j.py", line 34, in if_body_1
        decoder_input_ids = ag__.converted_call(ag__.ld(shift_tokens_right), (ag__.ld(input_ids), ag__.ld(self).config.pad_token_id, ag__.ld(self).config.decoder_start_token_id), None, fscope)
    File "/var/folders/qj/_xr448610yg3dn_k91tss4s80000gn/T/__autograph_generated_filelo5bpi1q.py", line 14, in tf__shift_tokens_right
        shifted_input_ids = ag__.converted_call(ag__.ld(tf).concat, ([ag__.ld(start_tokens), ag__.ld(input_ids)[:, :-1]], -1), None, fscope)

    ValueError: Exception encountered when calling layer 'tf_bart_for_conditional_generation' (type TFBartForConditionalGeneration).
    
    in user code:
    
        File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/transformers/modeling_tf_utils.py", line 1435, in run_call_with_unpacked_inputs  *
            return func(self, **unpacked_inputs)
        File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/transformers/models/bart/modeling_tf_bart.py", line 1465, in call  *
            outputs = self.model(
        File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "/var/folders/qj/_xr448610yg3dn_k91tss4s80000gn/T/__autograph_generated_fileo2tlt_c4.py", line 37, in tf__run_call_with_unpacked_inputs
            retval_ = ag__.converted_call(ag__.ld(func), (ag__.ld(self),), dict(**ag__.ld(unpacked_inputs)), fscope)
        File "/var/folders/qj/_xr448610yg3dn_k91tss4s80000gn/T/__autograph_generated_filex6z9qu7j.py", line 39, in tf__call
            ag__.if_stmt(ag__.and_(lambda : ag__.ld(decoder_input_ids) is None, lambda : ag__.ld(decoder_inputs_embeds) is None), if_body_1, else_body_1, get_state_1, set_state_1, ('decoder_input_ids',), 1)
        File "/var/folders/qj/_xr448610yg3dn_k91tss4s80000gn/T/__autograph_generated_filex6z9qu7j.py", line 34, in if_body_1
            decoder_input_ids = ag__.converted_call(ag__.ld(shift_tokens_right), (ag__.ld(input_ids), ag__.ld(self).config.pad_token_id, ag__.ld(self).config.decoder_start_token_id), None, fscope)
        File "/var/folders/qj/_xr448610yg3dn_k91tss4s80000gn/T/__autograph_generated_filelo5bpi1q.py", line 14, in tf__shift_tokens_right
            shifted_input_ids = ag__.converted_call(ag__.ld(tf).concat, ([ag__.ld(start_tokens), ag__.ld(input_ids)[:, :-1]], -1), None, fscope)
    
        ValueError: Exception encountered when calling layer 'model' (type TFBartMainLayer).
        
        in user code:
        
            File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/transformers/modeling_tf_utils.py", line 1435, in run_call_with_unpacked_inputs  *
                return func(self, **unpacked_inputs)
            File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/transformers/models/bart/modeling_tf_bart.py", line 1191, in call  *
                decoder_input_ids = shift_tokens_right(
            File "/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/transformers/models/bart/modeling_tf_bart.py", line 71, in shift_tokens_right  *
                shifted_input_ids = tf.concat([start_tokens, input_ids[:, :-1]], -1)
        
            ValueError: Index out of range using input dim 1; input has only 1 dims for '{{node tf_bart_for_conditional_generation/model/strided_slice}} = StridedSlice[Index=DT_INT32, T=DT_INT32, begin_mask=3, ellipsis_mask=0, end_mask=1, new_axis_mask=0, shrink_axis_mask=0](IteratorGetNext:1, tf_bart_for_conditional_generation/model/strided_slice/stack, tf_bart_for_conditional_generation/model/strided_slice/stack_1, tf_bart_for_conditional_generation/model/strided_slice/stack_2)' with input shapes: [512], [2], [2], [2] and with computed input tensors: input[3] = <1 1>.
        
        
        Call arguments received by layer 'model' (type TFBartMainLayer):
          • input_ids=tf.Tensor(shape=(512,), dtype=int32)
          • attention_mask=tf.Tensor(shape=(512,), dtype=int32)
          • decoder_input_ids=None
          • decoder_attention_mask=None
          • decoder_position_ids=None
          • head_mask=None
          • decoder_head_mask=None
          • cross_attn_head_mask=None
          • encoder_outputs=None
          • past_key_values=None
          • inputs_embeds=None
          • decoder_inputs_embeds=None
          • use_cache=True
          • output_attentions=False
          • output_hidden_states=False
          • return_dict=True
          • training=True
          • kwargs=<class 'inspect._empty'>
    
    
    Call arguments received by layer 'tf_bart_for_conditional_generation' (type TFBartForConditionalGeneration):
      • input_ids={'input_ids': 'tf.Tensor(shape=(512,), dtype=int32)', 'attention_mask': 'tf.Tensor(shape=(512,), dtype=int32)'}
      • attention_mask=None
      • decoder_input_ids=None
      • decoder_attention_mask=None
      • decoder_position_ids=None
      • head_mask=None
      • decoder_head_mask=None
      • cross_attn_head_mask=None
      • encoder_outputs=None
      • past_key_values=None
      • inputs_embeds=None
      • decoder_inputs_embeds=None
      • use_cache=None
      • output_attentions=None
      • output_hidden_states=None
      • return_dict=None
      • labels=None
      • training=True
