In [1]:
import os
os.chdir('../')

In [2]:
%pwd

'c:\\Users\\sjasm\\Documents\\Text-Summarization'

In [19]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelPredictionConfig:
    model_path: Path
    tokenizer_path: Path
    length_penalty: float
    num_beams: int
    max_length: int
    

In [20]:
from textSummarization.constants import *
from textSummarization.utils.common import read_yaml, create_directories

In [24]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_model_prediction_config(self) -> ModelPredictionConfig:
        config = self.config.prediction
        params = self.params.PredictionArguments



        #create_directories([config.root_dir])

        model_prediction_config = ModelPredictionConfig(
            model_path = config.model_path,
            tokenizer_path = config.tokenizer_path,
            length_penalty = params.length_penalty,
            num_beams = params.num_beams,
            max_length = params.max_length,
           
        )

        return model_prediction_config

In [25]:
from datasets import load_dataset, load_from_disk
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

In [7]:
from transformers import pipeline

In [8]:
import torch

In [26]:
class ModelPrediction:
    def __init__(self, config: ModelPredictionConfig):
        self.config = config
        self.gen_kwargs = {"length_penalty": config.length_penalty, "num_beams": config.num_beams, "max_length": config.max_length}


    def predict(self, text):

        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_path)
        model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_path).to(device)
        
        sample_text = text
        #pipelining

        pipe = pipeline("summarization", model=model_pegasus, tokenizer=tokenizer)
        gen_text = pipe(sample_text, **self.gen_kwargs)[0]["summary_text"]

        return gen_text

In [27]:
try:
    config = ConfigurationManager()
    model_pred_config = config.get_model_prediction_config()
    text  = "The schematic diagram provides an overview of how the Text Summarization Streamlit Application functions, from user input to the generation of high-quality summaries, all while maintaining ethical and user-friendly principles.This final product prototype embodies our commitment to delivering a valuable and accessible text summarization tool to a wide range of users, revolutionizing the way they extract insights and knowledge from extensive textual content."
    prediction = ModelPrediction(config=model_pred_config)
    print(prediction.predict(text =  text))
except Exception as e:
    raise e

[2023-09-01 21:48:11,447: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-09-01 21:48:11,458: INFO: common: yaml file: params.yaml loaded successfully]
[2023-09-01 21:48:11,464: INFO: common: created directory at: artifacts]


Your max_length is set to 128, but your input_length is only 80. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)


The schematic diagram provides an overview of how the Text Summarization Streamlit Application functions .<n>The final product prototype embodies our commitment to delivering a valuable and accessible text summarization tool to a wide range of users .
