In [None]:
import PyPDF2
from transformers import pipeline
import os

# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    try:
        # Check if the file exists
        if not os.path.exists(pdf_path):
            raise FileNotFoundError(f"Error: File '{pdf_path}' not found.")
        
        # Check if the file has a .pdf extension
        if not pdf_path.lower().endswith('.pdf'):
            raise ValueError("Error: The file provided is not a PDF.")
        
        # Open and extract text from the PDF
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ''
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:  # Ensure we only add non-empty text
                    text += page_text
        if not text.strip():
            raise ValueError("Error: The PDF appears to be empty or contains no extractable text.")
        
        return text
    except FileNotFoundError as e:
        print(e)
    except ValueError as e:
        print(e)
    return None

# Summarize the extracted text
def summarize_text(text, max_length=150):
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summary = summarizer(text, max_length=max_length, min_length=40, do_sample=False)
    return summary[0]['summary_text']

# Function to combine extraction and summarization
def summarize_pdf(pdf_path, summary_length=150):
    # Extract text from the given PDF path
    text = extract_text_from_pdf(pdf_path)
    if text is None:
        return None

    # Trimming text if it's too long for the model
    if len(text) > 5000:
        text = text[:5000]

    # Summarize the text with user-specified length
    summary = summarize_text(text, max_length=summary_length)
    return summary

# Function to get and validate user input for summary length
def get_summary_length():
    while True:
        try:
            length = int(input("Enter the desired summary length (e.g., 100-300): "))
            if length < 50 or length > 500:
                raise ValueError("Summary length should be between 50 and 500.")
            return length
        except ValueError as e:
            print(f"Invalid input: {e}. Please enter a valid number between 50 and 500.")

# Main function to handle user input
def main():
    # Ask the user to input the file path
    pdf_path = input("Please enter the path to the PDF file: ")

    # Ask the user for desired summary length
    summary_length = get_summary_length()

    # Generate and display the summary
    summary = summarize_pdf(pdf_path, summary_length)
    if summary:
        print("\nSummary of the Paper:\n")
        print(summary)

# Execute the main function if the script is run directly
if __name__ == "__main__":
    main()


Please enter the path to the PDF file:  C:\Users\Sahilsharma\Desktop\Reaserch paper.pdf
Enter the desired summary length (e.g., 100-300):  100
