# Summarization Frame Work.

### Dowloading and Saving the model

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "facebook/bart-large-cnn"
save_directory = "./bart-large-cnn"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Save the tokenizer and model locally
tokenizer.save_pretrained(save_directory)
model.save_pretrained(save_directory)

print(f"Model and tokenizer saved to {save_directory}")

### Summarization

In [1]:
import torch
import numpy as np
import pandas as pd
import sklearn
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

  from .autonotebook import tqdm as notebook_tqdm


In [26]:
save_directory = "./bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(save_directory)
model = AutoModelForSeq2SeqLM.from_pretrained(save_directory)

In [41]:
def summarizer(text, max_length=150, min_length=40):
    
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", truncation=True)
    #print("Input Text Is:")
    #print(tokenizer.decode(inputs[0], skip_special_tokens=True))
    summary_ids = model.generate(inputs, min_length=min_length, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return summary

In [38]:
src_text = """
"Atomic Habits" by James Clear is a self-help book that offers a comprehensive guide to building good habits and 
breaking bad ones. The book introduces the concept of "atomic habits," which refers to small habits that, when practiced 
consistently, can lead to significant improvements in our lives. Clear explains that small changes can add up over time, 
much like the way atoms combine to form molecules, and emphasizes that it's the small, incremental changes that can lead to 
lasting results, rather than trying to make drastic changes all at once. He outlines the 4 Laws of Behavior Change - make it 
obvious, make it attractive, make it easy, and make it satisfying - and the 4 stages of habit formation - cue, craving, 
response, and reward. The book provides strategies for building good habits, such as starting small, creating an implementation 
intention, using visual cues, and celebrating milestones, as well as strategies for breaking bad habits, including reframing your 
identity, finding alternative behaviors, and using the 4 Laws of Behavior Change to reverse engineer the bad habit. By 
applying these strategies and techniques, readers can create lasting changes in their lives and make progress towards their goals, 
and the book emphasizes the importance of tracking progress, being patient, and staying consistent in order to achieve success.
"""
print(len(src_text.split(" ")))

216


In [None]:
summarized_text = summarizer(long_text)
print(summarized_text)
print(len(summarized_text.split(" ")))

### For long texts

In [16]:
import torch
import numpy as np
import pandas as pd
import sklearn
import math
from transformers import BartTokenizer, BartForConditionalGeneration

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Define the path to the model
model_id = "./bart-large-cnn"
local_model_path = model_id

# Load the tokenizer and model from the local directory
tokenizer = BartTokenizer.from_pretrained(local_model_path)
model = BartForConditionalGeneration.from_pretrained(local_model_path).to(device)

Using device: cuda


In [None]:
def summarize_text(text, max_length=150, min_length=40):
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True).to(device)
    summary_ids = model.generate(inputs, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

def chunk_text(text, max_tokens=1024):
    tokens = tokenizer.encode(text, return_tensors="pt")[0]
    chunks = []
    for i in range(0, len(tokens), max_tokens):
        chunk = tokens[i:i + max_tokens]
        chunks.append(tokenizer.decode(chunk))
    return chunks

def iterative_summarization(text, target_word_count=100):
    while True:
        chunks = chunk_text(text)
        summaries = [summarize_text(chunk) for chunk in chunks]
        combined_summary = " ".join(summaries)
        
        word_count = len(combined_summary.split())
        if word_count <= target_word_count:
            return combined_summary
        
        text = combined_summary

In [29]:
# Sample text to summarize
long_text = """
"Atomic Habits" by James Clear is a self-help book that offers a comprehensive guide to building good habits and 
breaking bad ones, providing actionable strategies and techniques to help readers create lasting changes in their lives. 
The book introduces the concept of "atomic habits," which refers to small habits that, when practiced consistently, can lead 
to significant improvements in our lives, emphasizing that small changes can add up over time, much like the way atoms combine
to form molecules. Clear illustrates the concept of the "aggregation of marginal gains" using the example of the British cycling 
team, which dominated the Tour de France by making small improvements in nutrition, training, and equipment, and applies this 
concept to habit formation, where small, incremental changes can lead to significant gains over time. The book outlines the 4 Laws 
of Behavior Change, which are: make it obvious, make it attractive, make it easy, and make it satisfying, and provides strategies 
for building good habits, such as starting small, creating an implementation intention, using visual cues, and celebrating milestones.
 Clear also provides strategies for breaking bad habits, including reframing your identity, finding alternative behaviors, and 
 using the 4 Laws of Behavior Change to reverse engineer the bad habit. He emphasizes the importance of tracking progress, being 
 patient, and staying consistent in order to achieve success, and provides examples of how small changes can lead to significant 
 improvements in various areas of life, such as fitness, productivity, and relationships. The book also explores the role of 
 identity and motivation in shaping our habits, and provides strategies for creating an environment that supports good habits, 
 such as eliminating distractions, creating a schedule, and using implementation intentions. Clear also discusses the importance 
 of community and accountability in maintaining good habits, and provides strategies for overcoming obstacles and setbacks, such 
 as using the "2-minute rule" and creating a "habit scorecard." Throughout the book, Clear draws on a wide range of sources, 
 including psychology, neuroscience, and real-life examples, to provide a comprehensive and accessible guide to building good 
 habits and breaking bad ones. By applying the strategies and techniques outlined in the book, readers can create lasting changes 
 in their lives and achieve their goals, and the book provides a valuable resource for anyone looking to improve their habits and 
 achieve success. The book is divided into four main sections, the first section focuses on the fundamentals of habits, the second 
 section focuses on how to build good habits, the third section focuses on how to break bad habits, and the fourth section focuses
on how to create an environment that supports good habits. Each section is filled with actionable strategies and techniques that 
readers can apply to their own lives, and the book provides a comprehensive and accessible guide to building good habits and 
breaking bad ones. Overall, "Atomic Habits" is a valuable resource for anyone looking to improve their habits and achieve success,
and provides a comprehensive and accessible guide to building good habits and breaking bad ones.
"""
print(len(long_text.split(" ")))

510


In [24]:
# Get the final summary
final_summary = iterative_summarization(long_text, target_word_count=100)
print(final_summary)
print("Length of Summary:",len(final_summary.split(" ")))

"Atomic Habits" by James Clear is a self-help book that offers a comprehensive guide to building good habits and breaking bad ones. The book introduces the concept of "atomic habits," which refers to small habits that, when practiced consistently, can lead to significant improvements in our lives.
Length of Summary: 47


In [42]:
import gc
del model
gc.collect()
print("Model has been removed from the device and memory is freed.")

NameError: name 'model' is not defined