In [1]:
!pip install transformers

!pip install datasets




!pip install accelerate -U
!pip install transformers[torch]

In [2]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer, Trainer, TrainingArguments
from datasets import load_dataset

# Define custom BART model with loss calculation
class CustomBartForConditionalGeneration(BartForConditionalGeneration):
    def forward(self, input_ids, attention_mask=None, decoder_input_ids=None, labels=None, **kwargs):
        # Forward pass through the BART model
        outputs = super().forward(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_input_ids=decoder_input_ids,
            labels=labels,  # Pass labels directly to the model
            **kwargs
        )

        # Compute loss during training
        if labels is not None:
            loss_fct = torch.nn.CrossEntropyLoss(ignore_index=self.config.pad_token_id)
            logits = outputs.logits
            loss = loss_fct(logits.view(-1, logits.shape[-1]), labels.view(-1))
            outputs["loss"] = loss

        return outputs

# Load the dataset
dataset = load_dataset("xsum")
small_train_dataset = dataset["train"].shuffle(seed=42).select(range(200))
small_test_dataset = dataset["test"].shuffle(seed=42).select(range(100))
small_val_dataset = dataset["validation"].shuffle(seed=42).select(range(50))

# Load BART tokenizer and model
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = CustomBartForConditionalGeneration.from_pretrained(model_name)

# Tokenize the dataset and prepare it for training
def tokenize_batch(batch):
    inputs = tokenizer(batch["document"], truncation=True, padding=True)
    inputs["labels"] = tokenizer(batch["summary"], truncation=True, padding=True)["input_ids"]
    return inputs

train_dataset = small_train_dataset.map(tokenize_batch, batched=True)
val_dataset = small_val_dataset.map(tokenize_batch, batched=True)
test_dataset = small_test_dataset.map(tokenize_batch, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate(eval_dataset=val_dataset)
print(eval_results)

# Test the model
test_results = trainer.predict(test_dataset)
print(test_results)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss
10,2.6824
20,2.5355
30,2.1496
40,2.5341
50,2.1581
60,2.0961
70,2.05
80,2.0269
90,2.0249
100,1.954


{'eval_loss': 2.2220358848571777, 'eval_runtime': 6.8972, 'eval_samples_per_second': 7.249, 'eval_steps_per_second': 3.625, 'epoch': 3.0}
PredictionOutput(predictions=(array([[[ 17.599861  ,   0.7349492 ,   3.569334  , ...,   0.30250362,
          -0.17086725,   0.49684408],
        [ 17.599861  ,   0.7349489 ,   3.5693355 , ...,   0.3025031 ,
          -0.17086767,   0.49684414],
        [ -9.064835  ,   0.08656605,   2.6819916 , ...,   0.69516224,
           0.29531044,   1.1283927 ],
        ...,
        [-16.581528  ,   1.3321382 ,  11.510843  , ...,   2.5146434 ,
           2.3318543 ,   2.6981962 ],
        [-16.116644  ,   1.272443  ,  11.312265  , ...,   2.4851575 ,
           2.2216747 ,   2.555202  ],
        [-15.808429  ,   1.2446619 ,  11.506066  , ...,   2.3486362 ,
           2.1746538 ,   2.461396  ]],

       [[ 17.081324  ,   0.8650696 ,   4.2715154 , ...,   0.40510252,
          -0.04545025,   0.17601967],
        [ 17.081326  ,   0.8650698 ,   4.2715144 , ...,   0.4

In [3]:
import torch
import numpy as np

def generate_summary_from_logits(prediction_output):
    # Get the logits from the PredictionOutput
    logits = torch.from_numpy(prediction_output.predictions[0])  # Convert NumPy array to PyTorch tensor

    # Convert logits to token IDs
    predicted_ids = torch.argmax(logits, dim=-1)

    # Decode token IDs to text
    summary = tokenizer.decode(predicted_ids[4], skip_special_tokens=True)
    return summary

# Example usage with test_results
summary_text = generate_summary_from_logits(test_results)
print("Generated Summary Text:")
print(summary_text)


Generated Summary Text:
pool have signed formerournemouth midfielder Phil Taylorsop on former Liamum Cooke on Premierbrough. undisclosed-long loan.,,,,,,,,,,,igigg g g gg g g g g g g g g g g g g g ggggg


In [4]:
test_dataset[15]


{'document': 'But that requires a very patient man or woman to select the most important periods of play.\nNow scientists in Spain are trying to make that work easier by getting a computer to do it.\nThe technology is being designed to automatically edit a whole game down to the key moments.\nThe people behind it at the Polytechnic University of Catalonia have been working on it for a few years and have just published 18 pages of exactly how it works.\nArnau Raventos is the lead professor on the project.\nHe tells Newsbeat they have "been working with the local television companies in Spain in order to try to make the job easier for the person who manually does the summaries currently."\nWithout having to read the full report (we did that for you) - in short - it analyses what are known as key-frames.\nSo, every part of the match is looked over by the computer to spot tell-tale signs of when something interesting is happening.\nThat might be lots of players grouped together, lots of zo

In [5]:


# Define training arguments
training_args2 = TrainingArguments(
    output_dir="./results",
    learning_rate= 5e-5,
    adam_epsilon = 1e-9,
    lr_scheduler_type = "polynomial",
    warmup_ratio= 0.2,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
)

# Define Trainer
trainer2 = Trainer(
    model=model,
    args=training_args2,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Train the model
trainer2.train()

# Evaluate the model
eval_results2 = trainer2.evaluate(eval_dataset=val_dataset)
print(eval_results2)

# Test the model
test_results2 = trainer2.predict(test_dataset)
print(test_results2)


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss
10,0.6712
20,0.5625
30,0.4768
40,0.5344
50,0.3915
60,0.3934
70,0.3481
80,0.2681
90,0.29
100,0.2277


{'eval_loss': 3.1940813064575195, 'eval_runtime': 6.8678, 'eval_samples_per_second': 7.28, 'eval_steps_per_second': 3.64, 'epoch': 3.0}
PredictionOutput(predictions=(array([[[ 3.3580940e+01,  3.8971615e-01,  7.1289372e+00, ...,
         -3.9055368e-01, -4.1649750e-01,  3.5014629e-01],
        [ 3.3580917e+01,  3.8971728e-01,  7.1289415e+00, ...,
         -3.9055303e-01, -4.1649538e-01,  3.5014740e-01],
        [-1.3206020e+01,  4.8510978e-01,  1.7523783e+00, ...,
          1.2009377e+00,  1.0955153e+00,  1.8236066e+00],
        ...,
        [-2.2442335e+01,  1.1274173e+00,  2.0118492e+01, ...,
          2.4176302e+00,  2.0303113e+00,  3.1951075e+00],
        [-2.2191017e+01,  1.1209409e+00,  1.9583580e+01, ...,
          2.4266706e+00,  1.9130787e+00,  3.0944159e+00],
        [-2.1667599e+01,  1.1148508e+00,  1.9421982e+01, ...,
          2.3436470e+00,  1.8989933e+00,  2.9414771e+00]],

       [[ 3.2173805e+01,  6.3868141e-01,  6.8323894e+00, ...,
         -4.3816186e-02, -2.1767700e-

In [6]:
import torch
import numpy as np

def generate_summary_from_logits(prediction_output):
    # Get the logits from the PredictionOutput
    logits = torch.from_numpy(prediction_output.predictions[0])  # Convert NumPy array to PyTorch tensor

    # Convert logits to token IDs
    predicted_ids = torch.argmax(logits, dim=-1)

    # Decode token IDs to text
    summary = tokenizer.decode(predicted_ids[4], skip_special_tokens=True)
    return summary

# Example usage with test_results
summary_text2 = generate_summary_from_logits(test_results2)
print("Generated Summary Text:")
print(summary_text2)


Generated Summary Text:
pool defender signed Premierournemouth defender Jon Allsop and Cre Liamum Cooke on Premierbrough. undisclosed-long loan. All Clarke Clarke Clarke,,,, in


In [7]:
test_dataset[4]

{'document': 'Allsop, 25, made his Premier League debut in November 2015 and has spent time on loan at Coventry and Wycombe.\nCooke, 20, won the European Championship with England Under-17s in 2014 and scored four goals while on loan at Crewe last season.\nLeague One Blackpool have now signed nine players this summer.\nFind all the latest football transfers on our dedicated page.',
 'summary': 'Blackpool have signed Bournemouth goalkeeper Ryan Allsop and midfielder Callum Cooke from Middlesbrough on season-long loans.',
 'id': '40648389',
 'input_ids': [0,
  3684,
  29,
  1517,
  6,
  564,
  6,
  156,
  39,
  2275,
  815,
  2453,
  11,
  759,
  570,
  8,
  34,
  1240,
  86,
  15,
  2541,
  23,
  19150,
  12595,
  8,
  12449,
  24264,
  4,
  50118,
  8739,
  5361,
  6,
  291,
  6,
  351,
  5,
  796,
  3261,
  19,
  1156,
  2096,
  12,
  1360,
  29,
  11,
  777,
  8,
  1008,
  237,
  1175,
  150,
  15,
  2541,
  23,
  12022,
  1694,
  94,
  191,
  4,
  50118,
  17608,
  509,
  1378,
  10

In [None]:


# Define training arguments
training_args3 = TrainingArguments(
    output_dir="./results",
    learning_rate= 1e-6,
    adam_epsilon = 1e-7,
    lr_scheduler_type = "cosine",
    warmup_ratio= 0.1,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
)

# Define Trainer
trainer3 = Trainer(
    model=model,
    args=training_args3,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Train the model
trainer3.train()

# Evaluate the model
eval_results3 = trainer3.evaluate(eval_dataset=val_dataset)
print(eval_results3)

# Test the model
test_results3 = trainer3.predict(test_dataset)
print(test_results3)


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss
10,0.2576
20,0.2135


In [None]:
import torch
import numpy as np

def generate_summary_from_logits(prediction_output):
    # Get the logits from the PredictionOutput
    logits = torch.from_numpy(prediction_output.predictions[0])  # Convert NumPy array to PyTorch tensor

    # Convert logits to token IDs
    predicted_ids = torch.argmax(logits, dim=-1)

    # Decode token IDs to text
    summary = tokenizer.decode(predicted_ids[4], skip_special_tokens=True)
    return summary

# Example usage with test_results
summary_text3 = generate_summary_from_logits(test_results3)
print("Generated Summary Text:")
print(summary_text3)


In [None]:
test_dataset[4]

In [None]:


# Define training arguments
training_args4 = TrainingArguments(
    output_dir="./results",
    learning_rate= 1e-5,
    adam_epsilon = 1e-8,
    lr_scheduler_type = "inverse_sqrt",
    warmup_ratio= 0.1,
    gradient_accumulation_steps = 2,
    fp16  = "amp",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
)

# Define Trainer
trainer4 = Trainer(
    model=model,
    args=training_args4,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Train the model
trainer4.train()

# Evaluate the model
eval_results4 = trainer4.evaluate(eval_dataset=val_dataset)
print(eval_results4)

# Test the model
test_results4 = trainer4.predict(test_dataset)
print(test_results4)


In [None]:
import torch
import numpy as np

def generate_summary_from_logits(prediction_output):
    # Get the logits from the PredictionOutput
    logits = torch.from_numpy(prediction_output.predictions[0])  # Convert NumPy array to PyTorch tensor

    # Convert logits to token IDs
    predicted_ids = torch.argmax(logits, dim=-1)

    # Decode token IDs to text
    summary = tokenizer.decode(predicted_ids[4], skip_special_tokens=True)
    return summary

# Example usage with test_results
summary_text4 = generate_summary_from_logits(test_results4)
print("Generated Summary Text:")
print(summary_text4)
