<a href="https://colab.research.google.com/github/ddecosmo-dev/thread-checker/blob/main/threadCheckerBoundingBoxTrainer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
#import libraries and download testing data from kaggle
#!kaggle datasets download -d devindecosmo/bolt-only-thread-checker-rev-1
import tensorflow as tf
from tensorflow.keras.layers import Rescaling
import numpy as np
import matplotlib.pyplot as plt
import os
import json
import shutil
import random
from google.colab import drive

drive.mount('/content/drive')

# You will be prompted to upload your 'kaggle.json' API key file.
# This only needs to be done once per session.
if not os.path.exists("/root/.kaggle/kaggle.json"):
    from google.colab import files
    print("Please upload your kaggle.json file")
    files.upload()
    !mkdir -p ~/.kaggle
    !cp kaggle.json ~/.kaggle/
    !chmod 600 ~/.kaggle/kaggle.json

# The name of the main folder that gets created after unzipping
# Change 'training' if your main folder is named something else
dataset_folder = 'images'

if not os.path.exists(dataset_folder):
  print('Dataset not found. Downloading...')
  # Replace with the API command you copied from your dataset's Kaggle page
  !kaggle datasets download -d sujan97/screws-and-nuts-image

  # Replace with the actual name of the downloaded .zip file
  !unzip -q screws-and-nuts-image.zip
  print('Download and unzip complete.')
else:
  print('Dataset already exists.')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset already exists.


In [8]:
# Define the root dataset directory
dataset_root = "yolo_dataset"
os.makedirs(os.path.join(dataset_root, "images", "train"), exist_ok=True)
os.makedirs(os.path.join(dataset_root, "images", "val"), exist_ok=True)
os.makedirs(os.path.join(dataset_root, "labels", "train"), exist_ok=True)
os.makedirs(os.path.join(dataset_root, "labels", "val"), exist_ok=True)

print("Directory structure created.")

Directory structure created.


In [9]:
def convert_coco_to_yolo(json_file, images_dir, labels_dir):
    """
    Converts a single COCO JSON file to YOLO format.

    Args:
        json_file (str): Path to the COCO JSON file.
        images_dir (str): Path to the directory containing the images.
        labels_dir (str): Path to the directory to save the YOLO labels.
    """
    print(f"Starting conversion for {json_file}...")

    with open(json_file, 'r') as f:
        coco_data = json.load(f)

    # Create a mapping from image_id to image filename and dimensions
    img_map = {img['id']: {'file_name': img['file_name'], 'width': img['width'], 'height': img['height']}
               for img in coco_data['images']}

    # Create a mapping from category_id to a new, zero-indexed class ID
    # Since there are only two classes, we map them to 0 and 1
    category_map = {cat['id']: i for i, cat in enumerate(coco_data['categories'])}

    # Process annotations
    for ann in coco_data['annotations']:
        img_info = img_map[ann['image_id']]
        img_width = img_info['width']
        img_height = img_info['height']

        # Get the COCO bounding box: [x_min, y_min, width, height]
        bbox_coco = ann['bbox']

        # Convert to YOLO format: [class_id, x_center, y_center, width, height]
        class_id = category_map[ann['category_id']]

        # Calculate normalized coordinates
        x_center = (bbox_coco[0] + bbox_coco[2] / 2) / img_width
        y_center = (bbox_coco[1] + bbox_coco[3] / 2) / img_height
        norm_width = bbox_coco[2] / img_width
        norm_height = bbox_coco[3] / img_height

        # Get the filename without the extension
        file_name_no_ext = os.path.splitext(img_info['file_name'])[0]

        # Write the annotation to a YOLO .txt file
        with open(os.path.join(labels_dir, f'{file_name_no_ext}.txt'), 'a') as f:
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {norm_width:.6f} {norm_height:.6f}\n")

    print(f"Conversion complete for {json_file}. {len(coco_data['annotations'])} annotations processed.")

# --- Run the conversion for your training and validation data ---

# Paths to your local dataset files
local_dataset_path = "/content/" # Update this if needed
train_json_file = os.path.join(local_dataset_path, "mvtec_screws_train.json")
val_json_file = os.path.join(local_dataset_path, "mvtec_screws_val.json")

# Run the function for both splits
convert_coco_to_yolo(train_json_file, os.path.join(local_dataset_path, "images"), os.path.join(dataset_root ,"labels", "train"))
convert_coco_to_yolo(val_json_file, os.path.join(local_dataset_path, "images"), os.path.join(dataset_root, "labels", "val"))

Starting conversion for /content/mvtec_screws_train.json...
Conversion complete for /content/mvtec_screws_train.json. 3119 annotations processed.
Starting conversion for /content/mvtec_screws_val.json...
Conversion complete for /content/mvtec_screws_val.json. 647 annotations processed.


In [None]:
import os
import json
import shutil

def get_image_filenames_from_json(json_path):
    """Reads a COCO JSON file and returns a set of all image filenames."""
    with open(json_path, 'r') as f:
        data = json.load(f)
    return {img['file_name'] for img in data['images']}

# Define paths
local_dataset_path = "/content/screws_nuts_data"
yolo_dataset_path = "yolo_dataset"
original_images_dir = os.path.join(local_dataset_path, "images")

train_json_path = os.path.join(local_dataset_path, "mvtec_screws_train.json")
val_json_path = os.path.join(local_dataset_path, "mvtec_screws_val.json")

train_dest_dir = os.path.join(yolo_dataset_path, "images", "train")
val_dest_dir = os.path.join(yolo_dataset_path, "images", "val")

# Get sets of image filenames for each split
train_filenames = get_image_filenames_from_json(train_json_path)
val_filenames = get_image_filenames_from_json(val_json_path)

# Move the image files to their correct directories
for filename in os.listdir(original_images_dir):
    src_path = os.path.join(original_images_dir, filename)

    if filename in train_filenames:
        shutil.move(src_path, train_dest_dir)
        print(f"Moved {filename} to training directory.")
    elif filename in val_filenames:
        shutil.move(src_path, val_dest_dir)
        print(f"Moved {filename} to validation directory.")
    else:
        # This handles the test images or any other images not in the train/val splits
        print(f"Skipping {filename} (not in training or validation set).")

print("\nImage reorganization complete.")