In [None]:

import os
import shutil
import glob
from google.colab import drive
from PIL import Image
import random
from tqdm.notebook import tqdm

drive.mount('/content/drive')
BASE_PATH = '/content/drive/MyDrive/inDrive_hackathon'
CLEAN_CARS_ZIP_PATH = os.path.join(BASE_PATH, 'data', 'raw', 'Car_model_detection_subset.zip')
TEMP_DIR = '/content/temp_clean_cars_for_cropping'
LOCAL_CROPS_DIR = '/content/local_crops'
OUTPUT_CROPS_DIR_DRIVE = os.path.join(BASE_PATH, 'data', 'processed', 'clean_crops')


if os.path.exists(TEMP_DIR): shutil.rmtree(TEMP_DIR)
os.makedirs(TEMP_DIR)

if os.path.exists(LOCAL_CROPS_DIR): shutil.rmtree(LOCAL_CROPS_DIR)
os.makedirs(LOCAL_CROPS_DIR)

os.makedirs(OUTPUT_CROPS_DIR_DRIVE, exist_ok=True)

print("✅ Setup complete!")
print(f"Will read clean cars from: {CLEAN_CARS_ZIP_PATH}")
print(f"Will save new crops to: {OUTPUT_CROPS_DIR}")

if not os.path.exists(CLEAN_CARS_ZIP_PATH):
    print(f"\n❌ ERROR: Raw zip file not found! Please check the path and filename.")
    raise FileNotFoundError
else:
    print(f"\n✅ Successfully located raw zip file.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Setup complete!
Will read clean cars from: /content/drive/MyDrive/inDrive_hackathon/data/raw/Car_model_detection_subset.zip
Will save new crops to: /content/drive/MyDrive/inDrive_hackathon/data/processed/clean_crops

✅ Successfully located raw zip file.


In [None]:


print(f"Unzipping {os.path.basename(CLEAN_CARS_ZIP_PATH)}...")
shutil.unpack_archive(CLEAN_CARS_ZIP_PATH, TEMP_DIR)
print("Unzipping complete.")

train_images = glob.glob(os.path.join(TEMP_DIR, 'train', '**', '*.*'), recursive=True)
valid_images = glob.glob(os.path.join(TEMP_DIR, 'valid', '**', '*.*'), recursive=True)
all_source_images = train_images + valid_images

all_source_images = [f for f in all_source_images if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

print(f"\nFound {len(all_source_images)} source images to generate crops from.")

Unzipping Car_model_detection_subset.zip...
Unzipping complete.

Found 481 source images to generate crops from.


In [None]:

CROP_WIDTH = 640
CROP_HEIGHT = 640
NUM_CROPS_PER_IMAGE = 4

print(f"Starting crop generation with corrected logic...")
print(f"Generating up to {NUM_CROPS_PER_IMAGE} crops of size {CROP_WIDTH}x{CROP_HEIGHT} from each image.")

crop_counter = 0
for img_path in tqdm(all_source_images, desc="Processing images"):
    try:
        with Image.open(img_path).convert('RGB') as img:
            img_width, img_height = img.size

            if img_width < CROP_WIDTH or img_height < CROP_HEIGHT:
                resized_img = img.resize((CROP_WIDTH, CROP_HEIGHT))
                save_path = os.path.join(LOCAL_CROPS_DIR, f"clean_crop_{crop_counter:05d}.jpg")
                resized_img.save(save_path, 'JPEG', quality=95)
                crop_counter += 1
                continue

            for _ in range(NUM_CROPS_PER_IMAGE):
                max_x = img_width - CROP_WIDTH
                max_y = img_height - CROP_HEIGHT

                random_x = random.randint(0, max_x)
                random_y = random.randint(0, max_y)

                box = (random_x, random_y, random_x + CROP_WIDTH, random_y + CROP_HEIGHT)
                crop = img.crop(box)

                save_path = os.path.join(LOCAL_CROPS_DIR, f"clean_crop_{crop_counter:05d}.jpg")
                crop.save(save_path, 'JPEG', quality=95)
                crop_counter += 1

    except Exception as e:
        print(f"Warning: Could not process image {img_path}. Error: {e}")


print(f"\n🎉 Crop generation complete!")
print(f"Successfully generated and saved {crop_counter} clean crop images to the LOCAL directory.")

Starting crop generation with corrected logic...
Generating up to 4 crops of size 640x640 from each image.


Processing images:   0%|          | 0/481 [00:00<?, ?it/s]


🎉 Crop generation complete!
Successfully generated and saved 742 clean crop images to the LOCAL directory.


In [None]:


print(f"Copying {len(os.listdir(LOCAL_CROPS_DIR))} files from local disk to Google Drive...")
print("This may take a few minutes...")

!cp -r {LOCAL_CROPS_DIR}/. {OUTPUT_CROPS_DIR_DRIVE}

print("\n✅ Copying complete!")
print("Your clean crops are now permanently saved in your Google Drive.")

Copying 742 files from local disk to Google Drive...
This may take a few minutes...

✅ Copying complete!
Your clean crops are now permanently saved in your Google Drive.


In [None]:

print("Copying clean crops into the main damage dataset...")
!cp /content/drive/MyDrive/inDrive_hackathon/data/processed/clean_crops/*.jpg /content/drive/MyDrive/inDrive_hackathon/data/processed/damage_dataset_yolo/images/
print("Copying complete.")


print("\nVerifying the total number of images in the dataset:")
!ls -1 /content/drive/MyDrive/inDrive_hackathon/data/processed/damage_dataset_yolo/images/ | wc -l

Copying clean crops into the main damage dataset...
Copying complete.

Verifying the total number of images in the dataset:
3435
