In [1]:
!nvidia-smi


Thu Jun  6 00:09:57 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.171.04             Driver Version: 535.171.04   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...    Off | 00000000:01:00.0 Off |                  N/A |
| N/A   43C    P3              17W /  60W |      8MiB /  8188MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
import torch

def check_cuda():
    if torch.cuda.is_available():
        print("CUDA is available!")
        print(f"CUDA Version: {torch.version.cuda}")
        print(f"PyTorch CUDA support: {torch.backends.cuda.is_built()}")
        print(f"Number of CUDA devices: {torch.cuda.device_count()}")
        print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
    else:
        print("CUDA is not available.")

if __name__ == "__main__":
    check_cuda()


CUDA is available!
CUDA Version: 12.1
PyTorch CUDA support: True
Number of CUDA devices: 1
CUDA device name: NVIDIA GeForce RTX 4060 Laptop GPU


In [3]:
import torch
torch.cuda.empty_cache()


In [4]:
import torch
from datasets import load_from_disk, load_metric
from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments
import pandas as pd

# Load the dataset
dataset_samsum = load_from_disk('samsum_dataset')
print(dataset_samsum)

# Load the model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Preprocess the data
def preprocess_function(examples):
    inputs = ["summarize: " + dialogue for dialogue in examples["dialogue"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["summary"], max_length=150, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_samsum = dataset_samsum.map(preprocess_function, batched=True)



2024-06-06 00:10:09.234258: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-06 00:10:09.268527: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 14732
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 819
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary'],
        num_rows: 818
    })
})


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
# Initialize Trainer
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="no",  # No evaluation during training
    learning_rate=1.5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=3,
    logging_dir='./logs',
    do_eval=False
)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Rouge expects a newline after each sentence
    decoded_preds = ["\n".join(pred.strip() for pred in decoded_pred.split()) for decoded_pred in decoded_preds]
    decoded_labels = ["\n".join(label.strip() for label in decoded_label.split()) for decoded_label in decoded_labels]

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    return {key: value.mid.fmeasure * 100 for key, value in result.items()}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_samsum["train"],
    eval_dataset=tokenized_samsum["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()

  0%|          | 0/22098 [00:00<?, ?it/s]

{'loss': 1.5612, 'grad_norm': 2.371866226196289, 'learning_rate': 1.4660602769481401e-05, 'epoch': 0.07}
{'loss': 0.4789, 'grad_norm': 1.6517077684402466, 'learning_rate': 1.4321205538962802e-05, 'epoch': 0.14}
{'loss': 0.4445, 'grad_norm': 1.1639117002487183, 'learning_rate': 1.3981808308444204e-05, 'epoch': 0.2}
{'loss': 0.4267, 'grad_norm': 3.335114002227783, 'learning_rate': 1.3642411077925605e-05, 'epoch': 0.27}
{'loss': 0.4342, 'grad_norm': 0.723832905292511, 'learning_rate': 1.3303013847407006e-05, 'epoch': 0.34}
{'loss': 0.4095, 'grad_norm': 1.1829761266708374, 'learning_rate': 1.2963616616888406e-05, 'epoch': 0.41}
{'loss': 0.4326, 'grad_norm': 1.2201462984085083, 'learning_rate': 1.2624219386369809e-05, 'epoch': 0.48}
{'loss': 0.4064, 'grad_norm': 1.0795049667358398, 'learning_rate': 1.228482215585121e-05, 'epoch': 0.54}
{'loss': 0.4077, 'grad_norm': 1.0733861923217773, 'learning_rate': 1.194542492533261e-05, 'epoch': 0.61}
{'loss': 0.4209, 'grad_norm': 1.1338129043579102, 'l

KeyboardInterrupt: 

In [None]:
# Define compute metrics
rouge = load_metric("rouge")



# Evaluate the model
metrics = trainer.evaluate(tokenized_samsum["test"])
print(metrics)

# Save the model
model.save_pretrained("./saved_model")
tokenizer.save_pretrained("./saved_model")

In [17]:
import torch
torch.cuda.empty_cache()