# PDF Form Field Analysis

This notebook demonstrates how to extract form fields from a PDF using PyPDF2.

First, let's install and import the required library

In [1]:
!pip install PyPDF2



In [2]:
from PyPDF2 import PdfReader
from pathlib import Path

Create a sample PDF with form fields for demonstration

In [3]:
# Using a sample blank PDF for demonstration
sample_pdf_path = Path("sample.pdf")
if not sample_pdf_path.exists():
    from reportlab.pdfgen import canvas
    c = canvas.Canvas(str(sample_pdf_path))
    c.save()

ModuleNotFoundError: No module named 'reportlab'

Load the PDF file and get the form fields

In [None]:
reader = PdfReader(sample_pdf_path)

# Get form fields from the PDF
form_fields = reader.get_form_text_fields()

# Save form fields to a text file
with open('form_fields.txt', 'w') as f:
    for field_name, value in form_fields.items():
        f.write(f"Field: {field_name}, Current Value: {value}\n")

Now we can also see what kinds of fields are available (checkboxes, text fields etc.)

In [None]:
# Get all fields including their types
all_fields = {}
if '/AcroForm' in reader.trailer['/Root']:
    form = reader.trailer['/Root']['/AcroForm']
    if '/Fields' in form:
        fields = form['/Fields']
        for i in range(len(fields)):
            field = fields[i].get_object()
            name = field.get('/T')
            type = field.get('/FT')
            all_fields[name] = type

# Save detailed field information
with open('field_types.txt', 'w') as f:
    for name, type in all_fields.items():
        f.write(f"Field Name: {name}, Type: {type}\n")