# Extract Plan Images and Masks from JSON format to JPG 

In [1]:
import json
import base64
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np

In [2]:
#LABELFORM

def JSONtoJPG(folder_name):
    # Train input is boolean
    
    # Folder paths (update as needed)
    folder_path = './' + folder_name
    
    output_folder = 'Data'

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Class colors
    class_colors = {
        'banyo': (255, 0, 0),
        'room': (0, 255, 0),
        'eyvan': (0, 0, 255),
    }

    ## EXTRACT IMAGES

    # Process all JSON files
    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)

            with open(file_path, 'r') as f:
                json_file = json.load(f)

            # Extract and decode image data
            img_bytes = base64.b64decode(json_file['imageData'])
            img_io = BytesIO(img_bytes)
            img = Image.open(img_io).convert('RGB')

            # Save the image to a file
            image_filename = os.path.splitext(filename)[0] + '_plan.jpg'
            output_filepath = os.path.join(output_folder, image_filename)
            img.save(output_filepath)

            #print(f"Image saved to: {output_filepath}")

    ## EXTRACT MASKS

            # Convert PIL Image to NumPy Array
            img_np = np.array(img)

            # Draw bounding boxes and segmentations
            for shape in json_file['shapes']:
                if shape['shape_type'] == 'rectangle':
                    points = shape['points']
                    x0, y0, x1, y1 = points[0][0], points[0][1], points[1][0], points[1][1]

                    if shape['label'] in class_colors:  # Check for missing labels
                        cv2.rectangle(
                            img_np,
                            (int(x0), int(y0)),
                            (int(x1), int(y1)),
                            color=class_colors[shape['label']],
                            thickness=5
                        )
                    else:
                        print(f'Invalid or missing key in class_colors dictionary: {shape["label"]}')

                elif shape['shape_type'] == 'polygon':
                    points = np.array(shape['points'], dtype=np.int32)

                    for label_key in ['label', 'points', 'group_id', 'description', 'shape_type', 'flags']:
                        label = shape.get(label_key)
                        if label is not None and label in class_colors:
                            cv2.polylines(
                                img_np,
                                [points],
                                isClosed=True,
                                color=class_colors[label],
                                thickness=5
                            )

                            cv2.fillPoly(
                                img_np,
                                [points],
                                color=class_colors[label],
                            )
                            break
                    else:
                        print(f'Invalid or missing key in class_colors dictionary: {shape.keys()}')

                else:
                    print(f'Unsupported shape type: {shape["shape_type"]}')

            # Conver array to image
            mask_img = Image.fromarray(img_np)

            # Save the mask to a file
            mask_filename = os.path.splitext(filename)[0] + '_mask.jpg'
            mask_filepath = os.path.join(output_folder, mask_filename)
            mask_img.save(mask_filepath)

            #print(f"Mask saved to: {mask_filepath}")

            # Display the image with bounding boxes
            #plt.imshow(img_np)
            #plt.show()  # Make sure to display the image


In [3]:
JSONtoJPG(folder_name='labelform')

Unsupported shape type: mask
Unsupported shape type: mask
Unsupported shape type: mask


# Split the data folder (%80 Train/ %20 Test)

In [4]:
import os
import shutil
from sklearn.model_selection import train_test_split

In [5]:
# Input folder contains all data
input_folder = "Data"

# Train and Test folders
output_folder_train = "images/train"
output_folder_test = "images/test"

# Create the output folders if it doesn't exist
os.makedirs(output_folder_train, exist_ok=True)
os.makedirs(output_folder_test, exist_ok=True)

# Read dataset and match image-masks
data_pairs = []
for filename in os.listdir(input_folder):
    if filename.endswith("_plan.jpg"):  # find image files
        image_path = os.path.join(input_folder, filename)
        mask_filename = filename.replace("_plan.jpg", "_mask.jpg")
        mask_path = os.path.join(input_folder, mask_filename)

        if os.path.exists(mask_path):
            data_pairs.append((image_path, mask_path))

# Separate the data set into train and test
train_data, test_data = train_test_split(data_pairs, test_size=0.2, random_state=42)

# Copy train data
for image_path, mask_path in train_data:
    shutil.copy(image_path, os.path.join(output_folder_train, os.path.basename(image_path)))
    shutil.copy(mask_path, os.path.join(output_folder_train, os.path.basename(mask_path)))

# Copy test data
for image_path, mask_path in test_data:
    shutil.copy(image_path, os.path.join(output_folder_test, os.path.basename(image_path)))
    shutil.copy(mask_path, os.path.join(output_folder_test, os.path.basename(mask_path)))
    