In [56]:
%pip install -r ../requirements.txt

Collecting matplotlib>=3.5.0 (from -r ../requirements.txt (line 8))
  Downloading matplotlib-3.10.1-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting seaborn>=0.12.0 (from -r ../requirements.txt (line 9))
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting contourpy>=1.0.1 (from matplotlib>=3.5.0->-r ../requirements.txt (line 8))
  Downloading contourpy-1.3.2-cp310-cp310-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib>=3.5.0->-r ../requirements.txt (line 8))
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib>=3.5.0->-r ../requirements.txt (line 8))
  Downloading fonttools-4.57.0-cp310-cp310-win_amd64.whl.metadata (104 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib>=3.5.0->-r ../requirements.txt (line 8))
  Downloading kiwisolver-1.4.8-cp310-cp310-win_amd64.whl.metadata (6.3 kB)
Collecting pillow>=8 (from matplotlib>=3.5.0->-r ../requirements.txt (line 8))
  Downloading pillo

In [61]:
import pandas as pd
import numpy as np
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification, AutoConfig
from transformers.models.distilbert import TFDistilBertForSequenceClassification
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import os
from tqdm.auto import tqdm

In [62]:
print(f'TensorFlow version: {tf.__version__}'); print(f'Transformers version: {transformers.__version__}')

TensorFlow version: 2.12.0
Transformers version: 4.30.2


In [63]:
# Set environment variable to enable progress bar
os.environ["TQDM_NOTEBOOK"] = "true"

In [65]:
# Custom callback with enhanced progress bar
class TqdmProgressCallback(tf.keras.callbacks.Callback):
    def __init__(self, epochs, metrics=None, overall_bar=True):
        super(TqdmProgressCallback, self).__init__()
        self.epochs = epochs
        self.metrics = metrics or []
        self.overall_bar = overall_bar
        self.epoch_start_time = None
        
    def on_train_begin(self, logs=None):
        if self.overall_bar:
            self.overall_progress = tqdm(total=self.epochs, desc='Training Progress', position=0)
        
    def on_train_end(self, logs=None):
        if self.overall_bar:
            self.overall_progress.close()
    
    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_progress = tqdm(
            desc=f'Epoch {epoch+1}/{self.epochs}',
            position=1,
            leave=True
        )
        self.current_step = 0
        self.steps = self.params['steps']
        self.epoch_start_time = time.time()
        
    def on_train_batch_end(self, batch, logs=None):
        self.current_step += 1
        self.epoch_progress.update(1)
        self.epoch_progress.total = self.steps
        
        # Update metrics in description
        metrics_str = ' - '.join(f'{m}: {logs.get(m, 0):.4f}' for m in self.metrics if m in logs)
        self.epoch_progress.set_description(
            f'Epoch {self.epoch + 1}/{self.epochs} - {metrics_str}'
        )
        
    def on_epoch_end(self, epoch, logs=None):
        self.epoch_progress.close()
        
        # Collect all available metrics
        metrics_str = ' - '.join(f'{k}: {v:.4f}' for k, v in logs.items())
        epoch_time = time.time() - self.epoch_start_time
        
        # Print a summary for the epoch including time taken
        print(f"Epoch {epoch+1}/{self.epochs} completed in {epoch_time:.2f}s - {metrics_str}")
        
        if self.overall_bar:
            self.overall_progress.update(1)
            # Update overall progress bar with key metrics (loss and accuracy)
            val_acc = logs.get('val_accuracy', 0)
            train_acc = logs.get('accuracy', 0)
            self.overall_progress.set_description(
                f'Training Progress - Loss: {logs.get("loss", 0):.4f} - Acc: {train_acc:.4f} - Val Acc: {val_acc:.4f}'
            )

In [89]:
# Learning Rate Finder class
class LRFinder:
    def __init__(self, model, min_lr=1e-7, max_lr=1e-2, steps=30):
        self.model = model
        self.min_lr = min_lr
        self.max_lr = max_lr
        self.steps = steps
        self.history = {"lr": [], "loss": []}
        
    def find(self, dataset, batch_size=16, beta=0.98):
        print("Starting Learning Rate Finder...")
        # Save original weights
        original_weights = self.model.get_weights()
        
        # Calculate step factor
        step_factor = (self.max_lr / self.min_lr) ** (1 / self.steps)
        lr = self.min_lr
        
        # Prepare dataset
        batched_dataset = dataset.batch(batch_size)
        
        # Initialize optimizer with minimum learning rate
        self.model.optimizer.lr.assign(lr)
        
        # Training loop with progress bar
        avg_loss = 0
        progress_bar = tqdm(total=self.steps, desc=f"Finding optimal learning rate", position=0)
        
        for step, (x, y) in enumerate(batched_dataset):
            if step >= self.steps:
                break
                
            # Update learning rate for this batch
            lr = self.min_lr * (step_factor ** step)
            self.model.optimizer.lr.assign(lr)
            
            # Compute loss
            with tf.GradientTape() as tape:
                logits = self.model(x, training=True)
                loss = self.model.compiled_loss(y, logits)
                
            # Apply gradients
            grads = tape.gradient(loss, self.model.trainable_variables)
            self.model.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
            
            # Track loss and lr
            loss_value = float(loss)
            avg_loss = beta * avg_loss + (1 - beta) * loss_value
            smoothed_loss = avg_loss / (1 - beta ** (step + 1))
            
            self.history["lr"].append(lr)
            self.history["loss"].append(smoothed_loss)
            
            # Update progress bar with current learning rate and loss
            progress_bar.set_description(
                f"Finding optimal learning rate - LR: {lr:.8f} - Loss: {smoothed_loss:.4f}"
            )
            progress_bar.update(1)
            
            # Stop if loss explodes
            if step > 0 and smoothed_loss > 4 * self.history["loss"][0]:
                progress_bar.set_description(
                    f"Stopping search - Loss exploded at LR: {lr:.8f}"
                )
                break
        
        progress_bar.close()
                
        # Restore original weights
        self.model.set_weights(original_weights)
        
    def plot(self, skip_start=10, skip_end=5):
        plt.figure(figsize=(12, 6))
        plt.plot(self.history["lr"][skip_start:-skip_end], 
                 self.history["loss"][skip_start:-skip_end])
        plt.xscale('log')
        plt.xlabel('Learning Rate')
        plt.ylabel('Loss')
        plt.title('Learning Rate Finder')
        plt.savefig('lr_finder_plot.png')
        plt.close()
        
        # Find the learning rate with the steepest negative gradient
        losses = self.history["loss"][skip_start:-skip_end]
        lrs = self.history["lr"][skip_start:-skip_end]
        min_grad_idx = np.argmin(np.gradient(losses))
        suggested_lr = lrs[min_grad_idx]
        
        # Find the point with minimum loss
        min_loss_idx = np.argmin(losses)
        min_loss_lr = lrs[min_loss_idx]
        
        print(f"Suggested learning rate (steepest slope): {suggested_lr:.6f}")
        print(f"Learning rate with minimum loss: {min_loss_lr:.6f}")
        return suggested_lr

In [90]:
print (suggested_lr)
# print (2e-5)

8.128305161640991e-07


In [68]:
# Load preprocessed data
input_ids = np.load('input_ids.npy')
attention_mask = np.load('attention_mask.npy')
labels = np.load('labels.npy')
label_classes = np.load('intent_encoder.npy', allow_pickle=True)

# Create mappings
id2label = {idx: label for idx, label in enumerate(label_classes)}
label2id = {label: idx for idx, label in id2label.items()}

In [69]:
# Split the data into train and validation sets
indices = np.arange(len(labels))
np.random.seed(42)
np.random.shuffle(indices)

In [70]:
train_idx = indices[:int(0.8 * len(indices))]
val_idx = indices[int(0.8 * len(indices)):]

train_input_ids = input_ids[train_idx]
train_attention_mask = attention_mask[train_idx]
train_labels = labels[train_idx]

val_input_ids = input_ids[val_idx]
val_attention_mask = attention_mask[val_idx]
val_labels = labels[val_idx]


In [71]:
 # Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices(({
        "input_ids": train_input_ids,
        "attention_mask": train_attention_mask
    }, train_labels)).shuffle(1000).batch(16)
    
val_dataset = tf.data.Dataset.from_tensor_slices(({
        "input_ids": val_input_ids,
        "attention_mask": val_attention_mask
    }, val_labels)).batch(16)

In [72]:
# Create the tokenizer first
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")



In [73]:
# Initialize config and model
config = AutoConfig.from_pretrained(
    "distilbert-base-uncased",
    num_labels=len(label_classes),
    id2label=id2label,
    label2id=label2id
)

In [74]:
model = TFDistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    config=config
)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFDistilBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should 

In [75]:
# Use Keras native training instead of TFTrainer (which is deprecated)
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [
    tf.keras.metrics.SparseCategoricalAccuracy('accuracy'),
    tf.keras.metrics.SparseCategoricalCrossentropy(name='cross_entropy', from_logits=True)
]

In [76]:
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [77]:
unbatched_train_dataset = tf.data.Dataset.from_tensor_slices(({
        "input_ids": train_input_ids,
        "attention_mask": train_attention_mask
    }, train_labels)).shuffle(1000)
    
    # Run learning rate finder
print("Running Learning Rate Finder...")
lr_finder = LRFinder(model, min_lr=1e-7, max_lr=1, steps=100)
lr_finder.find(unbatched_train_dataset)
suggested_lr = lr_finder.plot()
    
    # You can use the suggested learning rate or keep the default
print(f"Using learning rate: {suggested_lr}")
model.optimizer.lr.assign(suggested_lr)
    

Running Learning Rate Finder...
Starting Learning Rate Finder...


Finding optimal learning rate - LR: 0.00000479 - Loss: 1.6103:  25%|██▌       | 25/100 [02:30<07:30,  6.01s/it]


Suggested learning rate (steepest slope): 0.000001
Learning rate with minimum loss: 0.000002
Using learning rate: 8.128305161640991e-07


<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=8.1283054e-07>

In [82]:
epochs = 10

In [81]:
# Train the model
print("Starting training...")
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=5,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2),
        tf.keras.callbacks.TensorBoard(log_dir='./logs'),
        tf.keras.callbacks.ModelCheckpoint(
            filepath='./checkpoints/model_{epoch}',
            save_best_only=True,
            monitor='val_loss'
        )
    ]
)

Starting training...
Epoch 1/5
























INFO:tensorflow:Assets written to: ./checkpoints\model_1\assets


INFO:tensorflow:Assets written to: ./checkpoints\model_1\assets


Epoch 2/5
























INFO:tensorflow:Assets written to: ./checkpoints\model_2\assets


INFO:tensorflow:Assets written to: ./checkpoints\model_2\assets


Epoch 3/5
























INFO:tensorflow:Assets written to: ./checkpoints\model_3\assets


INFO:tensorflow:Assets written to: ./checkpoints\model_3\assets


Epoch 4/5
























INFO:tensorflow:Assets written to: ./checkpoints\model_4\assets


INFO:tensorflow:Assets written to: ./checkpoints\model_4\assets


Epoch 5/5
























INFO:tensorflow:Assets written to: ./checkpoints\model_5\assets


INFO:tensorflow:Assets written to: ./checkpoints\model_5\assets




In [50]:
# Save the model
model.save_pretrained("./intent_classifier")
tokenizer.save_pretrained("./intent_classifier")
    

('./intent_classifier\\tokenizer_config.json',
 './intent_classifier\\special_tokens_map.json',
 './intent_classifier\\vocab.txt',
 './intent_classifier\\added_tokens.json',
 './intent_classifier\\tokenizer.json')

In [51]:
# Evaluate the model
print("Evaluating final model...")
results = model.evaluate(val_dataset)
print(f"Validation loss: {results[0]}")
print(f"Validation accuracy: {results[1]}")

Evaluating final model...
Validation loss: 0.38803669810295105
Validation accuracy: 0.8100000023841858


In [52]:
 # Save the label mappings
np.save('./intent_classifier/label_classes.npy', label_classes)
with open('./intent_classifier/label_mapping.txt', 'w') as f:
    for label, idx in label2id.items():
        f.write(f"{label}: {idx}\n")

In [12]:
# Define compute metrics function
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_score(labels, predictions)
    report = classification_report(labels, predictions, target_names=list(label2id.keys()), output_dict=True)
    
    # Extract metrics from the classification report
    result = {
        "accuracy": accuracy,
    }
    
    # Add precision, recall, and f1 for each class
    for label, idx in label2id.items():
        if label in report:
            result[f"{label}_precision"] = report[label]["precision"]
            result[f"{label}_recall"] = report[label]["recall"]
            result[f"{label}_f1"] = report[label]["f1-score"]
    
    return result

In [13]:
training_args = TFTrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    save_strategy="epoch",
    load_best_model_at_end=True,
    logging_dir="./logs",
    logging_steps=10,
    report_to=["tensorboard"],
    disable_tqdm=False,  # Enable progress bar
)

In [20]:
# Initialize trainer
trainer = TFTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

In [28]:
# Train the model
print("Starting training...")
trainer.train()

Starting training...


ValueError: in user code:

    File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\transformers\trainer_tf.py", line 710, in distributed_training_steps  *
        self.args.strategy.run(self.apply_gradients, inputs)
    File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\transformers\trainer_tf.py", line 653, in apply_gradients  *
        gradients = self.training_step(features, labels, nb_instances_in_global_batch)
    File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\transformers\trainer_tf.py", line 636, in training_step  *
        per_example_loss, _ = self.run_model(features, labels, True)
    File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\transformers\trainer_tf.py", line 755, in run_model  *
        outputs = self.model(features, labels=labels, training=training)[:2]
    File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\gdaob\AppData\Local\Temp\__autograph_generated_filevrq1svx0.py", line 37, in tf__run_call_with_unpacked_inputs
        retval_ = ag__.converted_call(ag__.ld(func), (ag__.ld(self),), dict(**ag__.ld(unpacked_inputs)), fscope)
    File "C:\Users\gdaob\AppData\Local\Temp\__autograph_generated_filezmtbaml_.py", line 17, in tf__call
        distilbert_output = ag__.converted_call(ag__.ld(self).distilbert, (), dict(input_ids=ag__.ld(input_ids), attention_mask=ag__.ld(attention_mask), head_mask=ag__.ld(head_mask), inputs_embeds=ag__.ld(inputs_embeds), output_attentions=ag__.ld(output_attentions), output_hidden_states=ag__.ld(output_hidden_states), return_dict=ag__.ld(return_dict), training=ag__.ld(training)), fscope)
    File "C:\Users\gdaob\AppData\Local\Temp\__autograph_generated_filevrq1svx0.py", line 37, in tf__run_call_with_unpacked_inputs
        retval_ = ag__.converted_call(ag__.ld(func), (ag__.ld(self),), dict(**ag__.ld(unpacked_inputs)), fscope)
    File "C:\Users\gdaob\AppData\Local\Temp\__autograph_generated_fileq1kb6s64.py", line 93, in tf__call
        embedding_output = ag__.converted_call(ag__.ld(self).embeddings, (ag__.ld(input_ids),), dict(inputs_embeds=ag__.ld(inputs_embeds)), fscope)
    File "C:\Users\gdaob\AppData\Local\Temp\__autograph_generated_fileepzlanco.py", line 54, in tf__call
        final_embeddings = ag__.converted_call(ag__.ld(self).LayerNorm, (), dict(inputs=ag__.ld(final_embeddings)), fscope)

    ValueError: Exception encountered when calling layer 'tf_distil_bert_for_sequence_classification' (type TFDistilBertForSequenceClassification).
    
    in user code:
    
        File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\transformers\modeling_tf_utils.py", line 712, in run_call_with_unpacked_inputs  *
            return func(self, **unpacked_inputs)
        File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py", line 720, in call  *
            distilbert_output = self.distilbert(
        File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "C:\Users\gdaob\AppData\Local\Temp\__autograph_generated_filevrq1svx0.py", line 37, in tf__run_call_with_unpacked_inputs
            retval_ = ag__.converted_call(ag__.ld(func), (ag__.ld(self),), dict(**ag__.ld(unpacked_inputs)), fscope)
        File "C:\Users\gdaob\AppData\Local\Temp\__autograph_generated_fileq1kb6s64.py", line 93, in tf__call
            embedding_output = ag__.converted_call(ag__.ld(self).embeddings, (ag__.ld(input_ids),), dict(inputs_embeds=ag__.ld(inputs_embeds)), fscope)
        File "C:\Users\gdaob\AppData\Local\Temp\__autograph_generated_fileepzlanco.py", line 54, in tf__call
            final_embeddings = ag__.converted_call(ag__.ld(self).LayerNorm, (), dict(inputs=ag__.ld(final_embeddings)), fscope)
    
        ValueError: Exception encountered when calling layer 'distilbert' (type TFDistilBertMainLayer).
        
        in user code:
        
            File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\transformers\modeling_tf_utils.py", line 712, in run_call_with_unpacked_inputs  *
                return func(self, **unpacked_inputs)
            File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py", line 402, in call  *
                embedding_output = self.embeddings(input_ids, inputs_embeds=inputs_embeds)  # (bs, seq_length, dim)
            File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
                raise e.with_traceback(filtered_tb) from None
            File "C:\Users\gdaob\AppData\Local\Temp\__autograph_generated_fileepzlanco.py", line 54, in tf__call
                final_embeddings = ag__.converted_call(ag__.ld(self).LayerNorm, (), dict(inputs=ag__.ld(final_embeddings)), fscope)
        
            ValueError: Exception encountered when calling layer 'embeddings' (type TFEmbeddings).
            
            in user code:
            
                File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py", line 124, in call  *
                    final_embeddings = self.LayerNorm(inputs=final_embeddings)
                File "c:\Users\gdaob\anaconda3\envs\intent-env\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
                    raise e.with_traceback(filtered_tb) from None
            
                ValueError: Exception encountered when calling layer 'LayerNorm' (type LayerNormalization).
                
                Cannot reshape a tensor with 768 elements to shape [1,1,128,1] (128 elements) for '{{node tf_distil_bert_for_sequence_classification/distilbert/embeddings/LayerNorm/Reshape}} = Reshape[T=DT_FLOAT, Tshape=DT_INT32](tf_distil_bert_for_sequence_classification/distilbert/embeddings/LayerNorm/Reshape/ReadVariableOp, tf_distil_bert_for_sequence_classification/distilbert/embeddings/LayerNorm/Reshape/shape)' with input shapes: [768], [4] and with input tensors computed as partial shapes: input[1] = [1,1,128,1].
                
                Call arguments received by layer 'LayerNorm' (type LayerNormalization):
                  • inputs=tf.Tensor(shape=(16, 16, 128, 768), dtype=float32)
            
            
            Call arguments received by layer 'embeddings' (type TFEmbeddings):
              • input_ids=tf.Tensor(shape=(16, 16, 128), dtype=int32)
              • position_ids=None
              • inputs_embeds=None
              • training=True
        
        
        Call arguments received by layer 'distilbert' (type TFDistilBertMainLayer):
          • input_ids=tf.Tensor(shape=(16, 16, 128), dtype=int32)
          • attention_mask=tf.Tensor(shape=(16, 16, 128), dtype=int32)
          • head_mask=None
          • inputs_embeds=None
          • output_attentions=False
          • output_hidden_states=False
          • return_dict=True
          • training=True
    
    
    Call arguments received by layer 'tf_distil_bert_for_sequence_classification' (type TFDistilBertForSequenceClassification):
      • input_ids={'input_ids': 'tf.Tensor(shape=(16, 16, 128), dtype=int32)', 'attention_mask': 'tf.Tensor(shape=(16, 16, 128), dtype=int32)'}
      • attention_mask=None
      • head_mask=None
      • inputs_embeds=None
      • output_attentions=None
      • output_hidden_states=None
      • return_dict=None
      • labels=tf.Tensor(shape=(16, 16), dtype=int32)
      • training=True
