# Fine-Tuning and Evaluating a T5 Model (Text-to-Text Transfer Transformer)  for Multi-Class Classification

Importing Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from transformers import T5Tokenizer, T5Model
model = T5Model.from_pretrained("t5-base")
import torch
from sklearn.metrics import accuracy_score, f1_score, classification_report

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

# Loading and Combining Datasets


In [2]:
# Load all datasets
datasets = [
    'pitsA.csv',
    'pitsB.csv',
    'pitsC.csv',
    'pitsD.csv',
    'pitsE.csv',
    'pitsF.csv'
]
dfs = [pd.read_csv(dataset) for dataset in datasets]
df_combined = pd.concat(dfs, ignore_index=True)



# Handling Missing Values and Preparing Text


In [3]:
# Handle missing values
df_combined = df_combined.dropna(subset=['Severity', 'Subject', 'Description'])

# Convert Severity to strings for T5
df_combined['Severity'] = df_combined['Severity'].astype(str)

# Combine Subject and Description for input
df_combined['text'] = df_combined['Subject'] + " " + df_combined['Description']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_combined['Severity'] = df_combined['Severity'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_combined['text'] = df_combined['Subject'] + " " + df_combined['Description']


# Splitting Data into Training and Test Sets


In [4]:
# Split into training and test sets
train_data, test_data = train_test_split(
    df_combined, test_size=0.2, random_state=42, stratify=df_combined['Severity']
)


# Tokenizer and Model Setup


In [5]:
# Tokenizer and model setup
tokenizer = AutoTokenizer.from_pretrained("t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

# Preprocessing the Data


In [6]:
def preprocess_data(data, tokenizer, max_length=128):
    inputs = data['text'].apply(lambda x: f"Classify: {x}")
    labels = data['Severity']
    encodings = tokenizer(
        inputs.tolist(),
        truncation=True,
        padding='max_length',
        max_length=max_length,
        return_tensors="pt"
    )
    label_encodings = tokenizer(
        labels.tolist(),
        truncation=True,
        padding='max_length',
        max_length=2,
        return_tensors="pt"
    )
    encodings["labels"] = label_encodings["input_ids"]
    return encodings

# Tokenize datasets
train_encodings = preprocess_data(train_data, tokenizer)
test_encodings = preprocess_data(test_data, tokenizer)



# Custom Dataset Class


In [7]:
# Custom dataset class
class T5Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

# Create datasets
train_dataset = T5Dataset(train_encodings)
test_dataset = T5Dataset(test_encodings)


# Defining Evaluation Metrics


In [8]:
# Evaluation metrics
def compute_metrics(pred):

    logits = pred.predictions[0] if isinstance(pred.predictions, tuple) else pred.predictions
    preds = logits.argmax(-1)

    decoded_preds = [tokenizer.decode(pred_id, skip_special_tokens=True) for pred_id in preds]
    decoded_labels = [tokenizer.decode(label_id, skip_special_tokens=True) for label_id in pred.label_ids]

    accuracy = accuracy_score(decoded_labels, decoded_preds)
    f1 = f1_score(decoded_labels, decoded_preds, average='weighted')

    print("\nClassification Report:")
    print(classification_report(decoded_labels, decoded_preds))

    return {
        'accuracy': accuracy,
        'f1': f1
    }




# Setting Training Arguments


In [9]:
# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="none",
    greater_is_better=True
)




# Trainer Setup and Training


In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.0075,0.352556,0.679503,0.672024
2,0.3597,0.36298,0.714286,0.701017
3,0.3493,0.349415,0.722981,0.71533



Classification Report:
              precision    recall  f1-score   support

                   0.63      0.51      0.57       291
         2.0       0.87      0.54      0.67        76
          3.       0.68      0.82      0.74       438

    accuracy                           0.68       805
   macro avg       0.73      0.62      0.66       805
weighted avg       0.68      0.68      0.67       805


Classification Report:
              precision    recall  f1-score   support

                   0.73      0.47      0.57       291
         2.0       0.88      0.64      0.74        76
          3.       0.69      0.89      0.78       438

    accuracy                           0.71       805
   macro avg       0.77      0.67      0.70       805
weighted avg       0.72      0.71      0.70       805


Classification Report:
              precision    recall  f1-score   support

                   0.71      0.54      0.61       291
         2.0       0.91      0.63      0.74        76
   

There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


TrainOutput(global_step=2415, training_loss=0.4861537262026074, metrics={'train_runtime': 756.2341, 'train_samples_per_second': 12.77, 'train_steps_per_second': 3.193, 'total_flos': 1470176587284480.0, 'train_loss': 0.4861537262026074, 'epoch': 3.0})

# Evaluating the Model


In [11]:
# Evaluate the model
print("\nFinal Evaluation on Test Set:")
test_results = trainer.evaluate(eval_dataset=test_dataset)
print("Test Results:", test_results)



Final Evaluation on Test Set:



Classification Report:
              precision    recall  f1-score   support

                   0.71      0.54      0.61       291
         2.0       0.91      0.63      0.74        76
          3.       0.71      0.86      0.78       438

    accuracy                           0.72       805
   macro avg       0.78      0.68      0.71       805
weighted avg       0.73      0.72      0.72       805

Test Results: {'eval_loss': 0.349414587020874, 'eval_accuracy': 0.7229813664596273, 'eval_f1': 0.7153295909890485, 'eval_runtime': 9.6773, 'eval_samples_per_second': 83.185, 'eval_steps_per_second': 20.874, 'epoch': 3.0}


# Making Predictions and Saving Results


In [13]:
# Make predictions
predictions = trainer.predict(test_dataset)

# Extract logits if they are in a tuple
logits = predictions.predictions
if isinstance(logits, tuple):
    logits = logits[0]

pred_ids = logits.argmax(-1)
pred_labels = [tokenizer.decode(pred_id, skip_special_tokens=True) for pred_id in pred_ids]

# Add predictions to test data
test_data['Predicted_Severity'] = pred_labels

# Save predictions to CSV
test_data[['Subject', 'Description', 'Severity', 'Predicted_Severity']].to_csv('test_predictions.csv', index=False)




Classification Report:
              precision    recall  f1-score   support

                   0.71      0.54      0.61       291
         2.0       0.91      0.63      0.74        76
          3.       0.71      0.86      0.78       438

    accuracy                           0.72       805
   macro avg       0.78      0.68      0.71       805
weighted avg       0.73      0.72      0.72       805

