In [None]:
import numpy as np
from skimage.io import imshow
import matplotlib.pyplot as plt

from skimage.transform import resize
from skimage.io import imread
import os

def color_map(N=256, normalized=True):
    def bitget(byteval, idx):
        return ((byteval & (1 << idx)) != 0)

    dtype = 'float32' if normalized else 'uint8'
    cmap = np.zeros((N, 3), dtype=dtype)
    for i in range(N):
        r = g = b = 0
        c = i
        for j in range(8):
            r = r | (bitget(c, 0) << 7-j)
            g = g | (bitget(c, 1) << 7-j)
            b = b | (bitget(c, 2) << 7-j)
            c = c >> 3

        cmap[i] = np.array([r, g, b])

    cmap = cmap/255 if normalized else cmap
    return cmap


def color_map_viz():
    labels = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor', 'void']
    nclasses = 21
    row_size = 50
    col_size = 500
    cmap = color_map()
    array = np.empty((row_size*(nclasses+1), col_size, cmap.shape[1]), dtype=cmap.dtype)
    for i in range(nclasses):
        array[i*row_size:i*row_size+row_size, :] = cmap[i]
    array[nclasses*row_size:nclasses*row_size+row_size, :] = cmap[-1]

    imshow(array)
    plt.yticks([row_size*i+row_size/2 for i in range(nclasses+1)], labels)
    plt.xticks([])
    plt.show()

In [None]:
import numpy as np
from skimage.transform import resize
from skimage.io import imread
import os

def preprocess_and_save(image_path, mask_path, cmap, output_size=(224, 224), output_dir='output'):
    # Ensure output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Load and resize image
    image = imread(image_path)
    image = resize(image, output_size, anti_aliasing=True)
    image /= 255.0  # Normalize to [0, 1]
    image_flattened = image.flatten()

    # Load and resize mask
    mask_image = imread(mask_path)
    mask_image = resize(mask_image, output_size, anti_aliasing=False, preserve_range=True)
    
    # Initialize 22-channel binary mask
    channels = np.zeros((*output_size, 22), dtype=np.float32)

    # Map each pixel to the correct channel
    for i in range(output_size[0]):
        for j in range(output_size[1]):
            color = np.round(mask_image[i, j]).astype(int)  # Ensure colors are integers
            for idx, val in enumerate(cmap):
                if np.array_equal(color, val):  # Strict equality check
                    try:
                        channels[i, j, idx] = 1
                    except:
                        print('ignore')
                    break

    mask_flattened = channels.flatten()

    # Save flattened image and mask to text files
    image_filename = os.path.join(output_dir, os.path.basename(image_path) + '_image.txt')
    mask_filename = os.path.join(output_dir, os.path.basename(mask_path) + '_mask.txt')

    np.savetxt(image_filename, image_flattened, fmt='%.6f')
    np.savetxt(mask_filename, mask_flattened, fmt='%d')

def process_dataset(image_dir, mask_dir, cmap):
    file_names = os.listdir(image_dir)
    count = 0
    for file_name in file_names:
        if file_name.endswith('.jpg'):
            mask_name = file_name[:-4] + '.png'  # Change extension for mask
            mask_path = os.path.join(mask_dir, mask_name)
            image_path = os.path.join(image_dir, file_name)
            
            # Check if the mask file exists
            if os.path.exists(mask_path):
                preprocess_and_save(image_path, mask_path, cmap)
            else:
                n = 0
                # print(f"Mask for {file_name} not found, skipping...")

# Example usage
cmap = color_map()  # Assuming this function returns the correct RGB values for each class
image_dir = 'data/VOC2007/JPEGImages'
mask_dir = 'data/VOC2007/SegmentationClass'
process_dataset(image_dir, mask_dir, cmap)

In [14]:
import numpy as np
from skimage.transform import resize
from skimage.io import imread
import os
from concurrent.futures import ThreadPoolExecutor

def preprocess_and_save(image_path, mask_path, cmap, output_size=(224, 224), output_dir='output'):
    # Load and resize image
    image = imread(image_path)
    image = resize(image, output_size, anti_aliasing=True) / 255.0  # Normalize to [0, 1]

    # Load and resize mask
    mask_image = imread(mask_path)
    mask_image = resize(mask_image, output_size, anti_aliasing=False, preserve_range=True).astype(int)

    # Initialize 22-channel binary mask
    channels = np.zeros((*output_size, 22), dtype=np.float32)

    # Vectorized mask processing
    for idx, val in enumerate(cmap):
        try:
            channels[:, :, idx] = np.all(mask_image == val, axis=-1)
        except Exception as e:
            print(e)
        except:
            print('amme kharab')

    # Save processed image and mask using numpy.save
    image_filename = os.path.join(output_dir, os.path.basename(image_path) + '_image.npy')
    mask_filename = os.path.join(output_dir, os.path.basename(mask_path) + '_mask.npy')
    print(image_filename)
    np.save(image_filename, image)
    np.save(mask_filename, channels)
    return image_filename

def process_dataset(image_dir, mask_dir, cmap):
    file_names = os.listdir(image_dir)
    if not os.path.exists('output'):
        os.makedirs('output')

    # Parallel processing
    with ThreadPoolExecutor(max_workers=4) as executor:
        for file_name in file_names:
            if file_name.endswith('.jpg'):
                mask_name = file_name[:-4] + '.png'  # Change extension for mask
                mask_path = os.path.join(mask_dir, mask_name)
                image_path = os.path.join(image_dir, file_name)

                if os.path.exists(mask_path):
                    executor.submit(preprocess_and_save, image_path, mask_path, cmap)

# Assume color_map function is defined and provides the cmap array
cmap = color_map()
image_dir = 'data/VOC2007/JPEGImages'
mask_dir = 'data/VOC2007/SegmentationClass'
process_dataset(image_dir, mask_dir, cmap)


In [13]:
import numpy as np
from skimage.transform import resize
from skimage.io import imread
import os
from concurrent.futures import ThreadPoolExecutor

def preprocess_and_save(image_path, mask_path, cmap, output_size=(224, 224), output_dir='output'):
    # Load and resize image
    image = imread(image_path)
    image = resize(image, output_size, anti_aliasing=True) / 255.0  # Normalize to [0, 1]

    # Load and resize mask
    mask_image = imread(mask_path)
    mask_image = resize(mask_image, output_size, anti_aliasing=False, preserve_range=True).astype(int)

    # Initialize 22-channel binary mask
    channels = np.zeros((*output_size, 22), dtype=np.float32)

    # Vectorized mask processing
    for idx, val in enumerate(cmap):
        channels[:, :, idx] = np.all(mask_image == val, axis=-1)

    # Save processed image and mask using numpy.save
    image_filename = os.path.join(output_dir, os.path.basename(image_path) + '_image.npy')
    mask_filename = os.path.join(output_dir, os.path.basename(mask_path) + '_mask.npy')
    np.save(image_filename, image)
    np.save(mask_filename, channels)

def process_dataset(image_dir, mask_dir, cmap):
    file_names = os.listdir(image_dir)
    if not os.path.exists('output'):
        os.makedirs('output')

    # Parallel processing
    with ThreadPoolExecutor(max_workers=4) as executor:
        for file_name in file_names:
            if file_name.endswith('.jpg'):
                mask_name = file_name[:-4] + '.png'  # Change extension for mask
                mask_path = os.path.join(mask_dir, mask_name)
                image_path = os.path.join(image_dir, file_name)
                
                if os.path.exists(mask_path):
                    executor.submit(preprocess_and_save, image_path, mask_path, cmap)

# Assume color_map function is defined and provides the cmap array
cmap = color_map()
image_dir = 'data/VOC2007/JPEGImages'
mask_dir = 'data/VOC2007/SegmentationClass'
process_dataset(image_dir, mask_dir, cmap)
import numpy as np
from skimage.transform import resize
from skimage.io import imread
import os
from concurrent.futures import ThreadPoolExecutor

def preprocess_and_save(image_path, mask_path, cmap, output_size=(224, 224), output_dir='output'):
    # Load and resize image
    image = imread(image_path)
    image = resize(image, output_size, anti_aliasing=True) / 255.0  # Normalize to [0, 1]

    # Load and resize mask
    mask_image = imread(mask_path)
    mask_image = resize(mask_image, output_size, anti_aliasing=False, preserve_range=True).astype(int)

    # Initialize 22-channel binary mask
    channels = np.zeros((*output_size, 22), dtype=np.float32)

    # Vectorized mask processing
    for idx, val in enumerate(cmap):
        channels[:, :, idx] = np.all(mask_image == val, axis=-1)

    # Ensure output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Save processed image and mask using numpy.save
    image_filename = os.path.join(output_dir, os.path.basename(image_path) + '_image.npy')
    mask_filename = os.path.join(output_dir, os.path.basename(mask_path) + '_mask.npy')
    np.save(image_filename, image)
    np.save(mask_filename, channels)

def process_dataset(image_dir, mask_dir, cmap):
    file_names = os.listdir(image_dir)
    # Ensure the output directory exists before starting processing
    output_dir = 'output_new'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Parallel processing
    with ThreadPoolExecutor(max_workers=4) as executor:
        futures = []
        for file_name in file_names:
            if file_name.endswith('.jpg'):
                mask_name = file_name[:-4] + '.png'  # Change extension for mask
                mask_path = os.path.join(mask_dir, mask_name)
                image_path = os.path.join(image_dir, file_name)
                
                if os.path.exists(mask_path):
                    futures.append(executor.submit(preprocess_and_save, image_path, mask_path, cmap, output_size=(224, 224), output_dir=output_dir))
        
        # Wait for all the futures to complete
        for future in futures:
            future.result()  # This will raise exceptions if any occurred during execution

# Assume color_map function is defined and provides the cmap array
cmap = color_map()
image_dir = 'data/VOC2007/JPEGImages'
mask_dir = 'data/VOC2007/SegmentationClass'
process_dataset(image_dir, mask_dir, cmap)

IndexError: index 22 is out of bounds for axis 2 with size 22

In [None]:

import os
import shutil

def find_and_copy_matching_images(source_dir1, source_dir2, target_dir1, target_dir2):
    # Create target directories if they don't exist
    os.makedirs(target_dir1, exist_ok=True)
    os.makedirs(target_dir2, exist_ok=True)

    # List all files in both directories
    files1 = set(os.listdir(source_dir1))
    files2 = set(os.listdir(source_dir2))
    
    # Find matching files with different extensions
    matched_files = []
    for file1 in files1:
        name1, ext1 = os.path.splitext(file1)
        for file2 in files2:
            name2, ext2 = os.path.splitext(file2)
            if name1 == name2 and ext1 != ext2:  # Same name but different extension
                matched_files.append((file1, file2))
    
    # Copy matched files to the target directories
    for file1, file2 in matched_files:
        shutil.copy(os.path.join(source_dir1, file1), os.path.join(target_dir1, file1))
        shutil.copy(os.path.join(source_dir2, file2), os.path.join(target_dir2, file2))
        print(f"Copied {file1} to {target_dir1} and {file2} to {target_dir2}")

# Example usage
source_dir1 = 'data/VOC2007/JPEGImages'
source_dir2 = 'data/VOC2007/SegmentationClass'
target_dir1 = 'data_out/images'
target_dir2 = 'data_out/masks'
find_and_copy_matching_images(source_dir1, source_dir2, target_dir1, target_dir2)