In [1]:
!pip install transformers datasets torch




DEPRECATION: textract 1.6.5 has a non-standard dependency specifier extract-msg<=0.29.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of textract or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [2]:
!pip install transformers



DEPRECATION: textract 1.6.5 has a non-standard dependency specifier extract-msg<=0.29.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of textract or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [3]:
# from transformers import PegasusTokenizer
# from datasets import load_dataset

# # Load dataset
# dataset = load_dataset("xsum")

# # Initialize tokenizer
# tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")

# # Tokenize data
# def preprocess_data(examples):
#     inputs = tokenizer(examples['document'], max_length=1024, truncation=True, padding="max_length", return_tensors="pt")
#     targets = tokenizer(examples['summary'], max_length=128, truncation=True, padding="max_length", return_tensors="pt")
#     inputs["labels"] = targets["input_ids"]
#     return inputs

# tokenized_datasets = dataset.map(preprocess_data, batched=True)
# tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])


from transformers import PegasusTokenizer
from datasets import load_dataset

# Load dataset
dataset = load_dataset("xsum")

# Take a small subset for training and validation
small_train_dataset = dataset['train'].shuffle(seed=42).select(range(1000))  # 1000 examples from training data
small_val_dataset = dataset['validation'].shuffle(seed=42).select(range(100))  # 100 examples from validation data

# Initialize tokenizer
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")

# Tokenize data
def preprocess_data(examples):
    inputs = tokenizer(examples['document'], max_length=1024, truncation=True, padding="max_length", return_tensors="pt")
    targets = tokenizer(examples['summary'], max_length=128, truncation=True, padding="max_length", return_tensors="pt")
    inputs["labels"] = targets["input_ids"]
    return inputs

# Apply the preprocessing to the smaller datasets
small_train_dataset = small_train_dataset.map(preprocess_data, batched=True)
small_val_dataset = small_val_dataset.map(preprocess_data, batched=True)

# Set format to torch
small_train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
small_val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])


  from .autonotebook import tqdm as notebook_tqdm
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [4]:
from torch.utils.data import DataLoader

# DataLoader for the small training and validation datasets
train_dataloader = DataLoader(small_train_dataset, batch_size=4, shuffle=True)
val_dataloader = DataLoader(small_val_dataset, batch_size=4)


In [5]:
import torch
from torch import nn
from transformers import PegasusConfig, PegasusModel

class MiniPegasus(nn.Module):
    def __init__(self, config):
        super(MiniPegasus, self).__init__()
        self.model = PegasusModel(config)
        self.linear = nn.Linear(config.d_model, config.vocab_size)

    def forward(self, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask,
                             decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask)
        sequence_output = outputs[0]
        logits = self.linear(sequence_output)
        return logits
    
    def generate(self, input_ids, attention_mask, max_length=1000, num_beams=4, early_stopping=True):
        # Initialize decoder input
        decoder_input_ids = torch.tensor([[self.model.config.decoder_start_token_id]]).to(input_ids.device)
        generated_ids = []

        for _ in range(max_length):
            outputs = self.forward(input_ids=input_ids, attention_mask=attention_mask,
                                   decoder_input_ids=decoder_input_ids, decoder_attention_mask=None)
            next_token_logits = outputs[:, -1, :]
            next_token = torch.argmax(next_token_logits, dim=-1)
            decoder_input_ids = torch.cat([decoder_input_ids, next_token.unsqueeze(-1)], dim=-1)
            generated_ids.append(next_token)

            if next_token == self.model.config.eos_token_id:
                break

        return torch.cat(generated_ids, dim=-1)

# Define configuration
config = PegasusConfig(
    vocab_size=tokenizer.vocab_size,
    d_model=512,  # Model dimension
    encoder_layers=6,
    decoder_layers=6,
    encoder_attention_heads=8,
    decoder_attention_heads=8,
    encoder_ffn_dim=2048,
    decoder_ffn_dim=2048,
)

# Initialize model
model = MiniPegasus(config)


In [6]:
# from torch.optim import AdamW
# from torch.utils.data import DataLoader

# # DataLoader
# train_dataloader = DataLoader(tokenized_datasets['train'], batch_size=4, shuffle=True)

# # Optimizer
# optimizer = AdamW(model.parameters(), lr=5e-5)

# # Training loop
# model.train()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# for epoch in range(3):  # Train for 3 epochs
#     for batch in train_dataloader:
#         optimizer.zero_grad()
#         input_ids = batch['input_ids'].to(device)
#         attention_mask = batch['attention_mask'].to(device)
#         labels = batch['labels'].to(device)
#         decoder_input_ids = labels[:, :-1]
#         decoder_attention_mask = (decoder_input_ids != tokenizer.pad_token_id).float().to(device)

#         outputs = model(input_ids=input_ids, attention_mask=attention_mask,
#                         decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask)

#         loss = nn.CrossEntropyLoss()(outputs.view(-1, config.vocab_size), labels[:, 1:].reshape(-1))
#         loss.backward()
#         optimizer.step()
#         print(f"Epoch: {epoch}, Loss: {loss.item()}")


from torch.optim import AdamW
import torch
from torch import nn

# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training loop
model.train()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(3):  # Train for 3 epochs
    for batch in train_dataloader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        decoder_input_ids = labels[:, :-1]
        decoder_attention_mask = (decoder_input_ids != tokenizer.pad_token_id).float().to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask,
                        decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask)

        loss = nn.CrossEntropyLoss()(outputs.view(-1, config.vocab_size), labels[:, 1:].reshape(-1))
        loss.backward()
        optimizer.step()
        print(f"Epoch: {epoch}, Loss: {loss.item()}")


Epoch: 0, Loss: 11.529243469238281
Epoch: 0, Loss: 7.78811502456665
Epoch: 0, Loss: 5.6966352462768555
Epoch: 0, Loss: 4.502650260925293
Epoch: 0, Loss: 3.6428630352020264
Epoch: 0, Loss: 2.998122215270996
Epoch: 0, Loss: 2.584482431411743
Epoch: 0, Loss: 2.6550326347351074
Epoch: 0, Loss: 2.5681264400482178
Epoch: 0, Loss: 2.4914042949676514
Epoch: 0, Loss: 2.750763416290283
Epoch: 0, Loss: 2.6718311309814453
Epoch: 0, Loss: 2.2279255390167236
Epoch: 0, Loss: 3.0959670543670654
Epoch: 0, Loss: 2.097642421722412
Epoch: 0, Loss: 2.166555643081665
Epoch: 0, Loss: 1.9731032848358154
Epoch: 0, Loss: 2.359503746032715
Epoch: 0, Loss: 2.573507070541382
Epoch: 0, Loss: 2.348625421524048
Epoch: 0, Loss: 2.681722640991211
Epoch: 0, Loss: 2.9402735233306885
Epoch: 0, Loss: 2.647303581237793
Epoch: 0, Loss: 3.3758463859558105
Epoch: 0, Loss: 2.6084582805633545
Epoch: 0, Loss: 2.7187609672546387
Epoch: 0, Loss: 2.5115859508514404
Epoch: 0, Loss: 2.4553260803222656
Epoch: 0, Loss: 2.293655633926391

In [7]:
!pip install rouge_score



DEPRECATION: textract 1.6.5 has a non-standard dependency specifier extract-msg<=0.29.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of textract or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [8]:
from datasets import load_metric

# Load ROUGE metric
rouge = load_metric('rouge')

def evaluate(model, dataloader):
    model.eval()
    for batch in dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        decoder_input_ids = labels[:, :-1]
        decoder_attention_mask = (decoder_input_ids != tokenizer.pad_token_id).float().to(device)

        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask,
                            decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask)

        predictions = torch.argmax(outputs, dim=-1)
        decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
        decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

        rouge.add_batch(predictions=decoded_preds, references=decoded_labels)

    result = rouge.compute()
    return result

# Evaluate
val_dataloader = DataLoader(tokenized_datasets['validation'], batch_size=4)
rouge_scores = evaluate(model, val_dataloader)
print(rouge_scores)


  rouge = load_metric('rouge')
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


NameError: name 'tokenized_datasets' is not defined

In [18]:
# def generate_summary(model, tokenizer, text, max_length=128, num_beams=4):
#     model.eval()
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     model.to(device)

#     # Tokenize the input text
#     inputs = tokenizer(text, max_length=1024, truncation=True, padding="max_length", return_tensors="pt").to(device)

#     # Generate summary
#     summary_ids = model.generate(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'],
#                                  max_length=max_length, num_beams=num_beams, early_stopping=True)

#     # Decode the generated summary
#     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
#     return summary


def generate_summary(model, tokenizer, text, min_tokens=50, max_length=512, num_beams=4, max_words=100):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Tokenize the input text
    inputs = tokenizer(text, max_length=1024, truncation=True, padding="max_length", return_tensors="pt").to(device)
    
    summary = ""
    while True:
        # Generate summary
        summary_ids = model.generate(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_length=max_length,
            num_beams=num_beams,
            early_stopping=True
        )
        
        # Decode the generated summary
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        
        # Check the length of the summary
        summary_words = summary.split()
        if len(summary_words) >= min_tokens:
            break
        else:
            # Optionally, adjust parameters and retry
            max_length += 50  # Increase max_length to allow for longer summaries
            
            if max_length > 1024:
                print("Unable to generate a summary with the minimum required tokens.")
                break

    # Truncate summary to the maximum word limit
    # summary_words = summary_words[:max_words]
    summary = ' '.join(summary_words)
    
    return summary


In [19]:
test_text = """"
In this blog post, we share system design lessons from consolidating several related machine learning models for large-scale search and recommendation systems at Netflix into a single unified model. Given different recommendation use cases, many recommendation systems treat each use-case as a separate machine-learning task and train a bespoke ML model for each task. In contrast, our approach generates recommendations for multiple use cases from a single, multi-task machine learning model. This not only improves the model performance but also simplifies the system architecture, thus improving maintainability. Additionally, building a common extensible framework for search and recommendations has allowed us to build systems for new use-cases faster. We describe the trade-offs we made for achieving this consolidation and lessons we learnt that can be applied generally.

Background

Figure 1: Multiple use cases in a typical recommendation system
In large real world recommender system applications like e-commerce, streaming services, and social media, multiple machine learning models are trained to optimize item recommendations for different parts of the system. There are separate models for different use-cases like notifications (user-to-item recommendations), related items (item-to-item based recommendations), search (query-to-item recommendations), and category exploration (category-to-item recommendations) (Figure 1). However, this can rapidly result in systems management overhead and hidden technical debt in maintaining a large number of specialized models (Sculley et al., 2015). This complexity can lead to increased long-term costs, and reduce the reliability and effectiveness of ML systems (Ehsan & Basillico, 2022).

Figure 2 shows how such an ML system with model proliferation might look. The different use-cases like notifications, related items, search and category exploration have different UI canvases where the user interacts with them. ML systems for these different use-cases often evolve to have multiple offline pipelines that have similar steps such as label generation, featurization and model training. On the online side, different models might be hosted in different services with different inference APIs. However, there are a number of commonalities in both offline pipelines and online infrastructure, which such a design does not leverage.


Figure 2: Model Proliferation in ML Systems
In this blog, we describe our efforts to leverage the commonalities across these tasks to consolidate the offline and online stacks for these models. This methodology not only reduces technical debt but also enhances the effectiveness of the models by leveraging knowledge gained from one task to improve another related task. Additionally, we noticed advantages in terms of efficiently implementing innovative updates across multiple recommendation tasks.

Figure 3 shows the consolidated system design. After an initial step of use-case-specific label preparation, we unify the rest of the offline pipeline and train a single multi-task model. On the online side, a flexible inference pipeline hosts models in different environments based on the latency, data freshness and other requirements, and the model is exposed via a unified canvas-agnostic API.


Figure 3: Consolidated ML System
Offline Design
In an offline model training pipeline, each recommendation task maps to a request context where recommendations need to be shown. The request context schema varies depending on the specific task. For instance, for query-to-item recommendation, the request context would consist of elements like the query, country, and language. On the other hand, for item-to-item recommendation, the request context would also include the source item and country information. The composition of the request context schema is tailored to suit the requirements of each recommendation task.

An offline pipeline trains models from logged interaction data in these stages:

Label preparation: Clean logged interaction data and generate (request_context, label) pairs.

Feature Extraction: Generate feature vectors for the above generated (request_context, label) tuples.

Model Training: Train a model based on (feature_vector, label) rows.

Model Evaluation: Assess the performance of the trained model using appropriate evaluation metrics.

Deployment: Make the model available for online serving.

For model consolidation, we set the unified request context to the union of all context elements across tasks. For specific tasks, missing or unnecessary context values are substituted by sentinel (default) values. We introduce a task_type categorical variable as part of the unified request context to inform the model of target recommendation task.

In label preparation, data from each canvas are cleaned, analyzed and stored with the unified request context schema. This label data from different canvases is then merged together with appropriate stratification to get a unified labeled data set. In feature extraction, not all features contain values for certain tasks and are filled with appropriate default values.

Online Design
Serving a single ML model at-scale presents certain unique online MLOps challenges (Kreuzberger et al., 2022). Each use-case may have different requirements with respect to:

Latency and throughput: Different service-level agreements (SLAs) to guarantee a latency and throughput target to deliver an optimal end-user experience.
Availability: Different guarantees of model serving uptime, without resorting to fallbacks.
Candidate Sets: Different types of items (e.g. videos, games, people, etc) that may be further curated by use-case-specific business requirements.
Budget: Different budget targets for model inferencing costs.
Business Logic: Different pre- and post- processing logic.
Historically, use-case-specific models are tuned to satisfy the unique requirements. The core online MLOps challenge is to support a wide variety of use-cases without regressing towards the lowest-common denominator in terms of model performance.

We approached this challenge by:

Deploying the same model in different system environments per use-case. Each environment has “knobs” to tune the characteristics of the model inference, including model latency, model data freshness and caching policies and, model execution parallelism.
Exposing a generic, use-case-agnostic API for consuming systems. To enable this flexibility, the API enables heterogeneous context input (User, Video, Genre, etc.), heterogeneous candidate selection (User, Video, Genre, etc.), timeout configuration, and fallback configuration.
Lessons Learnt
Consolidating ML models into a single model can be thought of as a form of software refactoring (Cinnéide et al., 2016). Similar to software refactoring, where related code modules are restructured and consolidated to eliminate redundancy and improve maintainability, model consolidation can be thought of as combining different prediction tasks into a single model, and leveraging shared knowledge and representations. There are several benefits to this.

Reduced code and deployment footprint
Supporting a new ML model requires significant investment in code, data and computational resources. There is complexity involved in setting up training pipelines to generate labels, features, train the model and manage deployments. Maintaining such pipelines requires constant upgrades to underlying software frameworks and rolling out bug fixes. Model consolidation acts as an essential leverage in reducing such costs.

Improved Maintainability
Production systems must have high availability: any problems must be detected and resolved quickly. ML Teams often have on-call rotations to ensure continuity of operation. A single unified code base makes the on-call work easier. The benefits include little to no context switching for the on-call, homogeneity of workflows, fewer points of failures, and fewer lines of code.

Apply Model Advancements Quickly to Multiple Canvases
Building a consolidated ML system with a multi-task model allows us to apply advancements in one use-case quickly to other use-cases. For example, if a certain feature is tried for a specific use-case, the common pipeline allows us to try it for other use-cases without additional pipeline work. There is the trade-off of potential regression for other use-cases as features are introduced for one use-case. However, in practice, this has not been a problem if the different use-cases in the consolidated model are sufficiently related.

Better Extensibility
Consolidating multiple use cases into a single model prompts a flexible design with extra thought in incorporating the multiple use cases. Such essential extensibility consequently future-proofs the system. For example, we initially designed the model training infrastructure to consolidate a few use cases. However, the flexible design necessitated to incorporate these multiple use cases has proved effective to onboard new model training use cases on the same infrastructure. In particular, our approach of including variable request context schemas has simplified the process of training models for new use-cases using the same infrastructure.

Final Thoughts
Though ML system consolidation is not a silver bullet and may not be appropriate in all cases, we believe there are many scenarios where such consolidation simplifies code, allows faster innovation and increases the maintainability of systems. Our experience shows that consolidating models that rank similar targets leads to many benefits, but it’s unclear whether models that rank completely different targets and have very different input features would benefit from such consolidation. In future work, we plan to establish more concrete guidelines for when ML model consolidation is most suitable. Finally, large foundation models for NLP and recommendations might have significant impact on ML system design and could lead to even more consolidation at systems level.

Acknowledgements
We thank Vito Ostuni, Moumita Bhattacharya, Justin Basilico, Weidong Zhang, and Xinran Waibel for their contributions to the ML system. Thanks also to Anne Cocos for her valuable feedback on a previous draft.
"""

In [20]:
# # Example usage:

# summary = generate_summary(model, tokenizer, test_text)
# print("Summary:", summary)

summary = generate_summary(model, tokenizer, test_text, min_tokens=50, max_length=512, num_beams=4, max_words=100)
print(summary)

Unable to generate a summary with the minimum required tokens.
of


In [24]:
def generate_summary(model, tokenizer, text, min_tokens=50, initial_max_length=512, num_beams=4, max_words=100):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Tokenize the input text
    inputs = tokenizer(text, max_length=1024, truncation=True, padding="max_length", return_tensors="pt").to(device)
    
    max_length = initial_max_length
    summary = ""
    
    while True:
        # Generate summary
        summary_ids = model.generate(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_length=max_length,
            num_beams=num_beams,
            early_stopping=True
        )
        
        # Decode the generated summary
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        
        # Print intermediate outputs for debugging
        print(f"Generated summary (max_length={max_length}): {summary}")

        # Check the length of the summary
        summary_words = summary.split()
        if len(summary_words) >= min_tokens:
            break
        else:
            # Optionally, adjust parameters and retry
            max_length += 50  # Increase max_length to allow for longer summaries
            
            if max_length > 1024:
                print("Unable to generate a summary with the minimum required tokens.")
                break

    # Truncate summary to the maximum word limit
    summary_words = summary_words[:max_words]
    summary = ' '.join(summary_words)
    
    return summary

# Example usage
text = """UK house prices rose by 5.7% in the year to the end of September, according to the latest house price index from lender Halifax. The annual rate of increase picked up from 5.2% in August, Halifax said. Across the UK, the average house price in September was £267,587, up from £263,786 in August, a rise of 1.7%. Russell Galley, managing director at Halifax, said: "Housing market activity has remained solid with decent levels of buyer enquiries. However, some of the drivers of the buoyant market we saw earlier in the year, such as the lack of properties for sale and buyers racing to benefit from the stamp duty holiday, have fallen away to some extent. That said, underlying demand is still strong and is serving to underpin a degree of pricing pressure for homes, which we expect will lead to a further period of sustained house price growth." The stamp duty holiday in England and Northern Ireland was phased out in stages over the summer, coming to a complete end from 1 October. In Wales, the tax break on house purchases ended on 30 June, while in Scotland it ran until 31 March. Halifax said the performance of the housing market was being supported by a number of other factors. These included the continuing low mortgage rate environment, with products priced at close to historical lows, and the ongoing shortage of properties for sale. The lender said the latter was helping to put upward pressure on house prices. However, it added that affordability challenges for buyers remained acute, with the average first-time buyer in the UK now paying the equivalent of 40% of their annual gross income on mortgage repayments. "With pressures on the cost of living mounting, and the prospect of interest rates increasing from the current low level, the house price to income ratio is becoming even more of a constraint," Mr Galley said. "Most experts are anticipating a slowing of house price inflation next year as affordability issues and other economic headwinds exert greater influence." Regional variations Halifax's figures showed that Wales remained the strongest performer across the UK nations and regions, with annual house price inflation of 12.9%. This was followed by Northern Ireland at 10.7%, the South West of England at 9.8%, and the East Midlands at 8.8%. The weakest regions were the North East, where prices rose by 3.9% over the past year, Scotland at 4.4%, and London at 4.5%. Within London, the average house price was £541,920. Halifax said the capital was the only area of the UK where prices remained below their August 2007 peak, before the global financial crisis struck. Separate figures released by the Bank of England on Thursday showed mortgage approvals for house purchases fell in September to their lowest level since June 2020. Some 72,453 mortgages were approved for house purchase, down from 74,145 in August. The Bank's Money and Credit report said mortgage approvals for house purchase had fallen in September for the fifth month in a row. "This is likely reflecting increasing pressures on household finances as well as rising mortgage rates," said Nitesh Patel, strategic economist at the Bank."""
summary = generate_summary(model, tokenizer, text, min_tokens=50, initial_max_length=512, num_beams=4, max_words=100)
print("Final summary:")
print(summary)


Generated summary (max_length=512): of
Generated summary (max_length=562): of
Generated summary (max_length=612): of
Generated summary (max_length=662): of
Generated summary (max_length=712): of
Generated summary (max_length=762): of
Generated summary (max_length=812): of
Generated summary (max_length=862): of
Generated summary (max_length=912): of
Generated summary (max_length=962): of
Generated summary (max_length=1012): of
Unable to generate a summary with the minimum required tokens.
Final summary:
of


In [35]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

def generate_summary(model, tokenizer, text, min_tokens=100, initial_max_length=512, num_beams=10):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Tokenize the input text
    inputs = tokenizer(text, max_length=1024, truncation=True, padding="max_length", return_tensors="pt").to(device)
    print(f"Tokenized inputs: {inputs}")

    max_length = initial_max_length
    summary = ""
    
    while True:
        # Generate summary
        summary_ids = model.generate(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_length=max_length,
            num_beams=num_beams,
            early_stopping=True
        )
        
        # Decode the generated summary
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        
        # Print intermediate outputs for debugging
        print(f"Generated summary (max_length={max_length}): {summary}")

        # Check the length of the summary in tokens
        summary_tokens = tokenizer.encode(summary, add_special_tokens=False)
        if len(summary_tokens) >= min_tokens:
            break
        else:
            # Optionally, adjust parameters and retry
            max_length += 50  # Increase max_length to allow for longer summaries
            
            if max_length > 1024:
                print("Unable to generate a summary with the minimum required tokens.")
                break

    return summary


In [36]:


text = """UK h/ouse prices rose by 5.7percent in the year to the end of September, according to the latest house price index from lender Halifax. The annual rate of increase picked up from 5.2percentage in August, Halifax said. Across the UK, the average house price in September was £267,587, up from £263,786 in August, a rise of 1.7%. Russell Galley, managing director at Halifax, said: "Housing market activity has remained solid with decent levels of buyer enquiries. However, some of the drivers of the buoyant market we saw earlier in the year, such as the lack of properties for sale and buyers racing to benefit from the stamp duty holiday, have fallen away to some extent. That said, underlying demand is still strong and is serving to underpin a degree of pricing pressure for homes, which we expect will lead to a further period of sustained house price growth." The stamp duty holiday in England and Northern Ireland was phased out in stages over the summer, coming to a complete end from 1 October. In Wales, the tax break on house purchases ended on 30 June, while in Scotland it ran until 31 March. Halifax said the performance of the housing market was being supported by a number of other factors. These included the continuing low mortgage rate environment, with products priced at close to historical lows, and the ongoing shortage of properties for sale. The lender said the latter was helping to put upward pressure on house prices. However, it added that affordability challenges for buyers remained acute, with the average first-time buyer in the UK now paying the equivalent of 40% of their annual gross income on mortgage repayments. "With pressures on the cost of living mounting, and the prospect of interest rates increasing from the current low level, the house price to income ratio is becoming even more of a constraint," Mr Galley said. "Most experts are anticipating a slowing of house price inflation next year as affordability issues and other economic headwinds exert greater influence." Regional variations Halifax's figures showed that Wales remained the strongest performer across the UK nations and regions, with annual house price inflation of 12.9%. This was followed by Northern Ireland at 10.7%, the South West of England at 9.8%, and the East Midlands at 8.8%. The weakest regions were the North East, where prices rose by 3.9% over the past year, Scotland at 4.4%, and London at 4.5%. Within London, the average house price was £541,920. Halifax said the capital was the only area of the UK where prices remained below their August 2007 peak, before the global financial crisis struck. Separate figures released by the Bank of England on Thursday showed mortgage approvals for house purchases fell in September to their lowest level since June 2020. Some 72,453 mortgages were approved for house purchase, down from 74,145 in August. The Bank's Money and Credit report said mortgage approvals for house purchase had fallen in September for the fifth month in a row. "This is likely reflecting increasing pressures on household finances as well as rising mortgage rates," said Nitesh Patel, strategic economist at the Bank."""

summary = generate_summary(model, tokenizer, text, min_tokens=50, initial_max_length=512, num_beams=4)
print("Final summary:")
print(summary)


Tokenized inputs: {'input_ids': tensor([[ 926, 5124,  191,  ...,    0,    0,    0]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0]], device='cuda:0')}
Generated summary (max_length=512): of
Generated summary (max_length=562): of
Generated summary (max_length=612): of
Generated summary (max_length=662): of
Generated summary (max_length=712): of
Generated summary (max_length=762): of
Generated summary (max_length=812): of
Generated summary (max_length=862): of
Generated summary (max_length=912): of
Generated summary (max_length=962): of
Generated summary (max_length=1012): of
Unable to generate a summary with the minimum required tokens.
Final summary:
of


In [46]:
import os
import pickle
if os.path.getsize('./restored.pkl') > 0:
    with open('./restored.pkl', 'rb') as f:
        abc = pickle.load(f)
else:
    abc = None


In [48]:
print(abc)

None


In [43]:
text ="""
Google's AI News Generator Sparks Debate Over Journalism's Future
Google is experimenting with a powerful new artificial intelligence tool codenamed "Genesis" that can automatically generate news articles from information about current events. The cutting-edge technology has sparked a heated debate within the journalism industry over the implications of AI-written content.
According to insiders familiar with the project, Genesis ingests data like news reports and press releases, then uses advanced language models to produce full news stories in a human-like writing style. Google envisions the tool as an "AI assistant" for journalists, automating some of the more routine writing tasks to allow reporters to focus on higher-level aspects like analysis, fact-checking, and in-depth reporting.
"In collaboration with news publishers, particularly smaller publishers, we're in the initial stages of exploring potential applications for AI-enabled tools to support their journalists in their work," a Google spokesperson stated. "These tools are not intended to replace the crucial role journalists play, but could assist by suggesting alternative headlines or writing styles."
However, the project has raised concerns among some media executives who worry about the quality and accuracy of AI-generated articles. They argue that crafting a well-written, factual news story requires human skills like context understanding, applying journalistic ethics, and ensuring truthful reporting.
"Reporting the news is a highly nuanced process that involves far more than just stringing words together," said one anonymous media executive. "I have doubts about whether an AI, no matter how advanced, can truly capture all the subtleties and maintain the high standards we expect from journalism."
Potential Benefits and Risks
Proponents argue AI-generated articles could be a boon for smaller publishers or niche topics that lack resources for large reporting teams. The technology could help fill coverage gaps and keep readers informed on a wider range of stories.
"For a small local paper, having an AI that could produce straightforward reporting on things like town meetings or high school sports could be really valuable," said Mario Garcia, a news design consultant. "It would free up human reporters to focus on deeper enterprise pieces."
But critics worry about the potential consequences of AI news going unchecked. They raise concerns about articles containing inaccuracies, biases, or automatically rewriting copyrighted content from other sources in ways that could violate intellectual property laws.
"We've already seen issues with AI language models sometimes producing biased or toxic outputs during training," said Kalev Leetaru, an AI and data mining researcher. "Deploying these systems for news writing raises risks around amplifying societal biases or spreading misinformation, whether intentionally or not."
There are also broader philosophical questions about what constitutes true journalism and whether AI-written articles lacking human involvement can retain the editorial integrity and pursuit of truth that defines the profession.
Navigating the AI News Frontier
As generative AI capabilities rapidly advance, the journalism world is being forced to grapple with these new realities. While Genesis remains an exploratory project for now, Google is far from alone in this space. Ventures like Anthropic's Claude AI have already demonstrated powerful news writing abilities.
Some media organizations are taking a cautious stance, treating AI writing tools as aids for human journalists rather than full replacements. Others are experimenting with AI-generated content as a way to understand the technology's potential and limitations.
"We're still in very early days, but it's critical that journalists get involved to help shape the development of this technology in responsible ways," said Oren Etzioni, CEO of the Allen Institute for AI. "We need to find the right balance between editorial oversight and technological capabilities."
As the news industry navigates this AI-driven frontier, one thing is clear: The future of journalism will be fundamentally shaped by how these innovations are implemented and governed. Ensuring AI systems bolster rather than undermine journalistic integrity will require ongoing dialogue, experimentation and a steadfast commitment to ethics from both tech companies and news organizations alike.
"""
summary = generate_summary(abc, tokenizer, text, min_tokens=50, initial_max_length=512, num_beams=4)
print("Final summary:")
print(summary)

Tokenized inputs: {'input_ids': tensor([[1058,  131,  116,  ...,    0,    0,    0]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0]], device='cuda:0')}
Generated summary (max_length=512): of
Generated summary (max_length=562): of
Generated summary (max_length=612): of
Generated summary (max_length=662): of
Generated summary (max_length=712): of
Generated summary (max_length=762): of
Generated summary (max_length=812): of
Generated summary (max_length=862): of
Generated summary (max_length=912): of
Generated summary (max_length=962): of
Generated summary (max_length=1012): of
Unable to generate a summary with the minimum required tokens.
Final summary:
of
