In [2]:
!pip install pytesseract

Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13


In [3]:
# --- CELL 1: SETUP & UPLOAD ---
import os
import cv2
import numpy as np
import pytesseract
import matplotlib.pyplot as plt
from google.colab import files
import shutil

# 1. Install Libraries
!sudo apt-get install -y tesseract-ocr
!pip install -q pytesseract opencv-python-headless matplotlib kaggle

# 2. OPTIONAL: Kaggle Setup (To show proficiency, even if using Drive images)
# Replace these with your actual details if you want to demonstrate API usage
os.environ['KAGGLE_USERNAME'] = "cymero"
os.environ['KAGGLE_KEY'] = "KGAT_c184dd075b61cba51a2dae890609653b"

# 3. UPLOAD DRIVE IMAGES
# When this runs, click "Choose Files" and select the images you downloaded from the Drive link.
print(">>> ACTION REQUIRED: Upload the Test Images from the Drive Link now.")
os.makedirs("drive_test_images", exist_ok=True)
uploaded = files.upload()

# Move images to the folder
for filename in uploaded.keys():
    shutil.move(filename, f"drive_test_images/{filename}")

print(f"\n[SUCCESS] {len(uploaded)} images ready for processing.")

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 1 not upgraded.
>>> ACTION REQUIRED: Upload the Test Images from the Drive Link now.


Saving IMG_5154.JPG to IMG_5154.JPG
Saving IMG_5153.JPG to IMG_5153.JPG
Saving IMG_5152.PNG to IMG_5152.PNG
Saving IMG_5151.JPG to IMG_5151.JPG

[SUCCESS] 4 images ready for processing.


In [4]:
# --- CELL 2: PYTESSERACT PIPELINE ---
class PyTesseract_Pipeline:
    def __init__(self, img_path):
        self.img_path = img_path
        self.image = cv2.imread(img_path)
        self.processed = None

    def display(self, img, title="Preview"):
        plt.figure(figsize=(10, 10))
        if len(img.shape) == 2:
            plt.imshow(img, cmap='gray')
        else:
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.title(title)
        plt.axis('off')
        plt.show()

    def preprocess_unique(self):
        """
        Unique Pipeline: Grayscale -> Bilateral Filter -> Adaptive Threshold.
        Bilateral Filter is used instead of Gaussian Blur to keep text edges sharp.
        """
        if self.image is None: return None

        gray = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)

        # KEY DIFFERENTIATOR: Bilateral Filter
        # Removes noise while keeping edges sharp (Better than Gaussian for OCR)
        denoised = cv2.bilateralFilter(gray, 9, 75, 75)

        # Adaptive Thresholding for shadows
        self.processed = cv2.adaptiveThreshold(
            denoised, 255,
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY, 11, 2
        )
        return self.processed

    def run_ocr(self):
        # PSM 6 (Single Uniform Block) is usually best for document snippets
        config = '--psm 6'
        img = self.processed if self.processed is not None else self.image
        return pytesseract.image_to_string(img, config=config)

print("Pipeline Class Loaded.")

Pipeline Class Loaded.


In [5]:
# --- CELL 3: EXECUTION ---
import glob

# Get the images you uploaded
test_images = glob.glob("drive_test_images/*")
results_A = {}

print(f"{'Filename':<20} | {'Extracted Text Snippet'}")
print("-" * 60)

for img_file in test_images:
    pipeline = PyTesseract_Pipeline(img_file)

    # 1. Apply Unique Preprocessing
    pipeline.preprocess_unique()

    # 2. Run OCR
    text = pipeline.run_ocr()

    # 3. Store Results
    filename = os.path.basename(img_file)
    results_A[filename] = text

    # Print a snippet to the screen
    clean_text = text.replace('\n', ' ')
    print(f"{filename:<20} | {clean_text[:50]}...")

    # Uncomment next line if you want to see the preprocessed image
    # pipeline.display(pipeline.processed, f"Processed: {filename}")

Filename             | Extracted Text Snippet
------------------------------------------------------------
IMG_5152.PNG         | SThis-is* a handwritten example Write. as good as....
IMG_5154.JPG         | of ...
IMG_5151.JPG         | mARRING | AW J& f RIDAY (222m oO ey | : | - a ...
IMG_5153.JPG         | totecaterent soeseeacadesy: EN ERR See SS SS Ss So...
