# 📄 Image → PDF & PDF → OCR (Searchable)
Convert **images to a PDF** or **PDF to OCR (searchable)**.
Works on **PC & Mobile** in Google Colab.

In [ ]:
# ⬇️ Install dependencies
!pip -q install pillow pytesseract pymupdf
!apt-get -qq update
!apt-get -qq install -y tesseract-ocr
import pytesseract, fitz
from PIL import Image
print('✅ Setup complete!')

## 1) 🖼️ Images → PDF

In [ ]:
from google.colab import files
import io

print('📤 Upload images...')
uploaded = files.upload()
filenames = list(uploaded.keys())
images = [Image.open(io.BytesIO(uploaded[f])).convert('RGB') for f in filenames]

output_pdf = 'images_to_pdf_output.pdf'
if len(images) == 1:
    images[0].save(output_pdf)
else:
    images[0].save(output_pdf, save_all=True, append_images=images[1:])

print('✅ Done:', output_pdf)
files.download(output_pdf)

## 2) 📄 PDF → OCR (Searchable PDF)

In [ ]:
print('📤 Upload a PDF...')
uploaded_pdf = files.upload()
input_pdf_name = list(uploaded_pdf.keys())[0]

output_pdf = 'output_ocr.pdf'
lang = 'eng=hin'  # change to 'eng+hin' for English+Hindi
zoom = 3.0    # increase for sharper OCR

doc = fitz.open(stream=uploaded_pdf[input_pdf_name], filetype='pdf')
new_doc = fitz.open()
for page in doc:
    pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom))
    img = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
    pdf_bytes = pytesseract.image_to_pdf_or_hocr(img, lang=lang, extension='pdf')
    new_doc.insert_pdf(fitz.open('pdf', pdf_bytes))

new_doc.save(output_pdf)
new_doc.close()
doc.close()

print('✅ OCR PDF created:', output_pdf)
files.download(output_pdf)