Import libraries

In [1]:
import os
import cv2
import numpy as np
import time

load images from dataset_path, convert to rgb, resize to image_size 

In [2]:
def load_and_preprocess_data(dataset_path, image_size=(128, 128)):
    images = []
    labels = []
    
    class_names = sorted([d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))])  
    class_map = {name: i for i, name in enumerate(class_names)}
    
    print(f'loading classes: {class_names}')

    for class_name in class_names:
        class_path = os.path.join(dataset_path, class_name)
        print(f'processing {class_name} folder')
        
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            image = cv2.imread(image_path)

            if image is None:
                print(f'could not load {image_path} !')
                continue

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, image_size)
                
            images.append(image)
            labels.append(class_map[class_name])

    print('finished processing data')
    return np.array(images), np.array(labels), class_map

In [3]:
def save_data():
    DATASETS = ['dataset1', 'dataset2']
    IMAGE_SIZE = (128, 128)

    OUTPUT_DIR = 'preprocessed_data'
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    for path in DATASETS:
        OUTPUT_FILE_PATH = f'{OUTPUT_DIR}\{path}_processed.npz'
        print(f'Dataset: {path}')
        print(f'Output file: {OUTPUT_FILE_PATH}\n')

        if os.path.exists(OUTPUT_FILE_PATH):
            print(f'{OUTPUT_FILE_PATH} already exists.')
        else:
            start_time = time.time()
            
            images, labels, class_map = load_and_preprocess_data(f'data\{path}', image_size=IMAGE_SIZE)
            
            end_time = time.time()
            print(f'\n{len(images)} images processed')
            print(f' in {end_time - start_time:.2f} seconds.')
            
            print('\nsaving to a file...')
            np.savez_compressed(
                OUTPUT_FILE_PATH,
                images=images,
                labels=labels,
                class_map=class_map
            )
            print('completed')


In [5]:
save_data()

Dataset: dataset1
Output file: preprocessed_data\dataset1_processed.npz

loading classes: ['dew', 'fogsmog', 'frost', 'glaze', 'hail', 'lightning', 'rain', 'rainbow', 'rime', 'sandstorm', 'snow']
processing dew folder
processing fogsmog folder
processing frost folder
processing glaze folder
processing hail folder
processing lightning folder
processing rain folder
processing rainbow folder
processing rime folder
processing sandstorm folder
processing snow folder
finished processing data

6862 images processed
 in 66.10 seconds.

saving to a file...
completed
Dataset: dataset2
Output file: preprocessed_data\dataset2_processed.npz

loading classes: ['cloudy', 'foggy', 'lightning', 'rainbow', 'rainy', 'rime', 'sandstorm', 'sunrise']
processing cloudy folder
processing foggy folder
processing lightning folder
processing rainbow folder
processing rainy folder
processing rime folder
processing sandstorm folder
processing sunrise folder
finished processing data

3116 images processed
 in 35.58