In [None]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import ToTensor
from PIL import Image
import os
import json
from tqdm import tqdm
import random

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
import torch
from torchvision.transforms import ToTensor
from PIL import Image
import os
import json

# Load COCO-style annotations
with open("/content/drive/MyDrive/DocumentAnalysisRCNN/annotations.json") as f:
    coco_data = json.load(f)

# Extract image IDs
image_ids = [img['id'] for img in coco_data['images']]

# Path to your images directory
images_dir = "/content/drive/MyDrive/DocumentAnalysisRCNN/images"

# Custom COCO Dataset class
class COCODataset(torch.utils.data.Dataset):
    def __init__(self, images_dir, coco_data, image_ids, transform=None):
        self.images_dir = images_dir
        self.coco_data = coco_data
        self.image_ids = image_ids
        self.transform = transform

        self.image_id_to_info = {img['id']: img for img in coco_data['images']}
        self.image_id_to_anns = {}
        for ann in coco_data['annotations']:
            self.image_id_to_anns.setdefault(ann['image_id'], []).append(ann)

        self.categories = {cat['id']: cat['name'] for cat in coco_data['categories']}

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        image_info = self.image_id_to_info[img_id]
        img_path = os.path.join(self.images_dir, image_info['file_name'])
        img = Image.open(img_path).convert("RGB")
        img_tensor = ToTensor()(img)

        anns = self.image_id_to_anns.get(img_id, [])

        # Skip if there are no annotations
        if len(anns) == 0:
            return self.__getitem__((idx + 1) % len(self))

        boxes, labels, areas, iscrowd = [], [], [], []

        for ann in anns:
            x, y, w, h = ann['bbox']
            boxes.append([x, y, x + w, y + h])
            labels.append(ann['category_id'])
            areas.append(ann['area'])
            iscrowd.append(ann.get('iscrowd', 0))

        target = {
            'boxes': torch.tensor(boxes, dtype=torch.float32),
            'labels': torch.tensor(labels, dtype=torch.int64),
            'image_id': torch.tensor([img_id]),
            'area': torch.tensor(areas, dtype=torch.float32),
            'iscrowd': torch.tensor(iscrowd, dtype=torch.int64)
        }

        return img_tensor, target

# Instantiate the dataset
dataset = COCODataset(images_dir, coco_data, image_ids)

In [12]:
# === Load dataset ===
coco_json_path = "/content/drive/MyDrive/DocumentAnalysisRCNN/annotations.json"
images_dir = "/content/drive/MyDrive/DocumentAnalysisRCNN/images"

with open(coco_json_path) as f:
    coco_data = json.load(f)

# === Split image IDs ===
all_image_ids = [img['id'] for img in coco_data['images']]
random.shuffle(all_image_ids)

split_idx = int(0.8 * len(all_image_ids))
train_ids = all_image_ids[:split_idx]
val_ids = all_image_ids[split_idx:]


In [13]:
train_dataset = COCODataset(images_dir, coco_data, train_ids)
val_dataset = COCODataset(images_dir, coco_data, val_ids)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [14]:
model = fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = len(train_dataset.categories) + 1  # +1 for background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [15]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

In [None]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for images, targets in tqdm(train_loader, desc=f"Epoch {epoch+1} Training"):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        total_loss += losses.item()

    print(f"Epoch {epoch+1}, Training Loss: {total_loss/len(train_loader):.4f}")

Epoch 1 Training:   3%|▎         | 13/400 [13:39<6:17:28, 58.52s/it]

In [None]:
torch.save(model.state_dict(), "Document Analysis.pth")
print("Document Analysis.pth")

In [None]:
import os
import random
import shutil

# Set your image directory path
images_dir = "/content/drive/MyDrive/DocumentAnalysisRCNN/images"  # Update with your path
annotations_dir = "/content/drive/MyDrive/DocumentAnalysisRCNN/annotations.json"  # Update with your annotations directory path

# List all image files in the directory (change to the correct extension if needed)
all_images = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png'))]

# Select 1000 random images
num_images_to_keep = 1000
images_to_keep = random.sample(all_images, num_images_to_keep)

# Delete images that are not in the selected set
images_to_delete = set(all_images) - set(images_to_keep)

# Delete extra images from the images directory
for image in images_to_delete:
    image_path = os.path.join(images_dir, image)
    if os.path.exists(image_path):
        os.remove(image_path)  # Delete the image file
        print(f"Deleted {image_path}")

# Optional: Delete corresponding annotation files if you want to remove them too
for image in images_to_delete:
    annotation_file = image.replace('.jpg', '.xml').replace('.png', '.xml')  # Assuming XML annotations
    annotation_path = os.path.join(annotations_dir, annotation_file)
    if os.path.exists(annotation_path):
        os.remove(annotation_path)  # Delete the annotation file
        print(f"Deleted {annotation_path}")

print(f"Kept {num_images_to_keep} images and deleted the rest.")


In [None]:
import os
import json

# Set your image directory and JSON file path
images_dir = "./images"  # Update with your image directory path
json_file_path = "./annotations.json"  # Update with your JSON file path

# List all remaining image files in the directory (change to correct extension if needed)
existing_images = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png'))]

# Load the existing JSON file
with open(json_file_path, 'r') as json_file:
    data = json.load(json_file)

# Create a set of filenames for fast lookups
existing_image_filenames = set(existing_images)

# Filter images and annotations to keep only those that are in the existing_images list
updated_images = [img for img in data['images'] if img['file_name'] in existing_image_filenames]
updated_annotations = [anno for anno in data['annotations'] if anno['image_id'] in [img['id'] for img in updated_images]]

# Create the updated JSON structure
updated_data = {
    "info": data["info"],
    "licenses": data["licenses"],
    "images": updated_images,
    "annotations": updated_annotations,
    "categories": data["categories"]
}

# Save the updated JSON file
with open(json_file_path, 'w') as json_file:
    json.dump(updated_data, json_file, indent=4)

print(f"Updated JSON file to reflect only the remaining {len(updated_images)} images and their annotations.")


In [None]:
pip install torch torchvision torchaudio
