In [2]:
from flask import Flask, request, jsonify
from transformers import pipeline, BartTokenizer
import pdfplumber
import requests
from bs4 import BeautifulSoup

app = Flask(__name__)

# Initialize the summarizer and tokenizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")

def summarize_text_in_chunks(text, max_length=150, min_length=50):
    """
    Summarizes large text by splitting it into chunks and summarizing each chunk.
    """
    chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
    summaries = []
    for chunk in chunks:
        try:
            summary = summarizer(chunk, max_length=max_length, min_length=min_length)
            summaries.append(summary[0]['summary_text'])
        except Exception as e:
            print(f"Error summarizing chunk: {e}")
    return " ".join(summaries)

@app.route('/summarize-text', methods=['POST'])
def summarize_text():
    data = request.json
    text = data.get("text", "")
    if not text.strip():
        return jsonify({"error": "No text provided"}), 400
    try:
        summary = summarize_text_in_chunks(text)
        return jsonify({"summary": summary})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/summarize-pdf', methods=['POST'])
def summarize_pdf():
    file = request.files.get('file')
    if not file:
        return jsonify({"error": "No PDF file provided"}), 400
    try:
        text = ""
        with pdfplumber.open(file) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text
        if not text.strip():
            return jsonify({"error": "No readable text in the PDF"}), 400
        summary = summarize_text_in_chunks(text)
        return jsonify({"summary": summary})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/summarize-website', methods=['POST'])
def summarize_website():
    data = request.json
    url = data.get("url", "")
    if not url.strip():
        return jsonify({"error": "No URL provided"}), 400
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        paragraphs = soup.find_all('p')
        content = ' '.join([p.get_text() for p in paragraphs])
        if not content.strip():
            return jsonify({"error": "No readable content on the webpage"}), 400
        summary = summarize_text_in_chunks(content)
        return jsonify({"summary": summary})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    app.run(port=5001)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5001
Press CTRL+C to quit
Your max_length is set to 150, but your input_length is only 4. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=2)
127.0.0.1 - - [08/Dec/2024 23:24:31] "POST /summarize-text HTTP/1.1" 200 -
Your max_length is set to 150, but your input_length is only 4. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=2)
127.0.0.1 - - [08/Dec/2024 23:24:56] "POST /summarize-text HTTP/1.1" 200 -
