In [2]:
!pip install PyPDF2 transformers



In [3]:
import PyPDF2
from transformers import pipeline
import os


In [4]:

def get_uploaded_file():
    """Automatically gets the uploaded PDF file name from the Colab environment."""
    files = [f for f in os.listdir('/content') if f.endswith('.pdf')]
    return files[0] if files else None

In [5]:
def extract_text_from_pdf(pdf_name, max_pages=None):
    """Extracts text from a PDF with optional page limit."""
    pdf_path = f'/content/{pdf_name}'
    text = ""
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        total_pages = len(reader.pages)
        num_pages = min(total_pages, max_pages) if max_pages else total_pages
        for page in range(num_pages):
            text += reader.pages[page].extract_text()
    return text

In [6]:
def preprocess_text(text):
    """Preprocess text by removing extra spaces and newlines."""
    return " ".join(text.split())

In [7]:
def summarize_text(text, max_length=150, min_length=50):
    """Summarizes the given text using a transformer-based model."""
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    return summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']


In [8]:
if __name__ == "__main__":
    # Automatically get the uploaded PDF file name
    file_name = get_uploaded_file()

    if file_name:
        # Extract and preprocess text from the first few pages of the PDF
        text = extract_text_from_pdf(file_name, max_pages=3)
        processed_text = preprocess_text(text)

        summary = summarize_text(processed_text, max_length=150, min_length=50)

        print(f"\nSummary of '{file_name}':")
        print(summary)
    else:
        print("No PDF file found in the Colab environment.")



Summary of '211010206_Aditya_Kaul(SDE).pdf':
Aditya Kaul IIIT, Naya Raipur 7.5/10 B.Tech in Electronics and Communication Dec. 2021 – May 2025 Experience Software Developmen Intern June 2023 – July 2023 Birlasoft Gurugram. Designed and implemented functions to generate tabular Excel data.
