# Article Summarizer

In [1]:
from langchain.llms import HuggingFacePipeline
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import requests
from bs4 import BeautifulSoup
import os




In [2]:
def extract(url):
    if not url.strip():
        print("Error: Empty URL")
        return None
    try:
        webpage = requests.get(url)
        webpage.raise_for_status()
        
        soup = BeautifulSoup(webpage.text, 'html.parser')
        paragraphs = soup.find_all('p')
        
        article = ' '.join(p.text for p in paragraphs)
        
        if not article:
            raise ValueError("The artticle appears to be empty or poorly formated")
        return article
    
    except requests.exceptions.RequestException as req_err:
        print(f"An HTTP error occured: {req_err}")
        
    except ValueError as val_err:
        print(f"Value error occured: {val_err}")
    
    except Exceptions as e :
        print(f"An error occurred: {e}")
        
    return None

In [3]:
def load_model():
    name = "google/flan-t5-large"
    tokenizer = AutoTokenizer.from_pretrained(name, clean_up_tokenization_spaces = True)
    model = AutoModelForSeq2SeqLM.from_pretrained(name, device_map = "auto")
    pipe = pipeline("text2text-generation", model = model, tokenizer = tokenizer, max_length = 100)
    return HuggingFacePipeline(pipeline = pipe)

In [4]:
def summarize(art_text):
    summary_prompt = PromptTemplate(input_var = ["text"],
                                    template = "Summarize the article in detail :\n{text}")
    title_prompt = PromptTemplate(input_var = ["text"],
                                 template = "Generate a title for this article :\n{text}")
    llm = load_model()
    
    summary_chain = LLMChain(llm = llm, prompt = summary_prompt)
    summary = summary_chain.run(text = art_text)
    
    title_chain = LLMChain(llm = llm, prompt = title_prompt)
    title = title_chain.run(text = art_text)
    
    return summary, title

In [5]:
def save(url, title, summary, filename = "Summaries.txt"):
    txt = f"URL : {url}\n GeneratedTitle : {title}\n Summary:\n {summary}\n\n{'='*50}\n\n"
    
    with open(filename, "a", encoding = "utf-8") as file:
        file.write(txt)
    print(f"saved successfully to {filename}")

In [6]:
def main():
    url = input("Enter URL: ").strip()
    art_text = extract(url)
    
    if art_text:
        print("Successfully extracted.Generating summary and title :\n")
        summary, title = summarize(art_text)
        print(f"Title:\n{title}\n")
        print(f"Sumary:\n{summary}\n")
        
        save(url,title,summary)
        
if __name__ == "__main__":
    main()

Enter URL:  https://www.teachermagazine.com/sea_en/articles/a-new-approach-to-personalised-learning


Successfully extracted.Generating summary and title :



  return HuggingFacePipeline(pipeline = pipe)
  summary_chain = LLMChain(llm = llm, prompt = summary_prompt)
  summary = summary_chain.run(text = art_text)
Token indices sequence length is longer than the specified maximum sequence length for this model (754 > 512). Running this sequence through the model will result in indexing errors


Title:
A global study calls for personalised education

Sumary:
A global study aims to reimagine the future of education, to build more resilient and sustainable education systems.

saved successfully to Summaries.txt
