In [1]:
# Connecting to Google Colab
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
import torch
import os
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [5]:
# Load the T5 model
drive_path_to_load_t5_model = 'gdrive/My Drive/prototype/SummarizationModels/t5-base-model'
t5_model = AutoModelForSeq2SeqLM.from_pretrained(drive_path_to_load_t5_model)

# Load the T5 tokenizer
drive_path_to_load_t5_tokenizer = 'gdrive/My Drive/prototype/SummarizationModels/t5-base-tokenizer'
t5_tokenizer = AutoTokenizer.from_pretrained(drive_path_to_load_t5_tokenizer)

# Load the BART model
drive_path_to_load_bart_model = 'gdrive/My Drive/prototype/SummarizationModel2/facebook/bart-base-model'
bart_model = AutoModelForSeq2SeqLM.from_pretrained(drive_path_to_load_bart_model)

# Load the BART tokenizer
drive_path_to_load_bart_tokenizer = 'gdrive/My Drive/prototype/SummarizationModel2/facebook/bart-base-tokenizer'
bart_tokenizer = AutoTokenizer.from_pretrained(drive_path_to_load_bart_tokenizer)

In [25]:
# Define a function to capitalize sentences in the summary
def capitalize_sentences(summary):
    # Capitalize the first letter of each sentence
    summary = re.sub(r'(?<=\.\s)(\w)', lambda x: x.group(1).upper(), summary)

    # Remove any leading punctuation or whitespace
    summary = re.sub(r'^[^a-zA-Z]*', '', summary)

    # Ensure the first character is uppercase
    summary = summary[0].upper() + summary[1:]

    # Add a full stop at the end if it's missing
    if not summary.endswith('.'):
        summary += '.'

    return summary

# Define a function to generate summaries using a given model and tokenizer
def generate_summary(document, model, tokenizer):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    inputs = tokenizer.encode(document, return_tensors='pt', max_length=512, truncation=True)
    inputs = inputs.to(device)

    outputs = model.generate(inputs, max_length=250, min_length=80, length_penalty=1.5, num_beams=5, early_stopping=True, no_repeat_ngram_size=2)
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return capitalize_sentences(summary)

In [26]:
# Document to be summarized
document = """DA is a widely accepted fruitful method to avoid overfitting issues and enhancing the performance of CNNs. However, currently, DA policies have been designed manually, and the optimal DA policies are dataset specific (Cubuk et al., 2019a; Khalifa et al., 2022). As a result, to design optimal DA policies manually for a given dataset requires a considerable amount of expertise in the DA domain, powerful computational resources, and a lot of time. So far, a significant focus of the researchers has been on refining the architectures of the CNNs to avoid data related issues. Less attention has been put into improving DA technologies, solving difficulties in traditional DA and automatically identifying optimal DA policies based on the dataset and task type (Cubuk et al., 2019a). The advancements in automated machine learning (AutoML) have shown promise in developing AutoDA systems to enhance CNN performance by automatically learning optimal DA policies (Yang et al., 2022a). However, limitations in existing AutoDA works highlight the need for further research to address the important problem of automating the DA process (Yang et al., 2022a)."""

# Generate summaries using both models
summary_t5 = generate_summary(document, t5_model, t5_tokenizer)
summary_bart = generate_summary(document, bart_model, bart_tokenizer)


In [27]:
# Print the summaries
print("Summary from T5 Model :")
print(summary_t5)
print("\nSummary from BART Model :")
print(summary_bart)

Summary from T5 Model :
The advancements in automated machine learning (AutoML) have shown promise in developing AutoDA systems to enhance CNN performance by automatically learning optimal DA policies (Yang et al., 2022a) however, limitations in existing autoDA works highlight the need for further research to address the important problem of automating the DB process (yang-etal. (2020a). As a result, to design optimal.

Summary from BART Model :
A widely accepted fruitful method to avoid overfitting issues and enhancing the performance of CNNs. However, currently, DA policies have been designed manually, and the optimal editor policies are dataset specific (Cubuk et al., 2019a; Khalifa etal., 2022). As a result, to design optimal DA policy manually for a given dataset requires a considerable amount of expertise in the DA domain, powerful computational resources, the lot of time. So far, a significant focus of the researchers has been on refining the architectures of wells to avoids dat