In [23]:
#Importing relevant libraries:::
import os
import cv2
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelEncoder


In [24]:
# Configuration: paths to the dataset in downloads folder
path = 'KittiDataset'
train_image_path = os.path.join(path, 'image_2', 'training')
train_label_path = os.path.join(path, 'label_2')
train_calib_path = os.path.join(path, 'calib', 'training')

test_image_path = os.path.join(path, 'image_2', 'testing')
test_calib_path = os.path.join(path, 'calib', 'testing')

In [25]:
# Verify Dataset Paths
def verify_paths(paths):
    for p in paths:
        if not os.path.exists(p):
            raise FileNotFoundError(f"Path does not exist: {p}")
        else:
            print(f"Verified path: {p}")

paths = [train_image_path, train_label_path, train_calib_path, test_image_path, test_calib_path]
verify_paths(paths)

Verified path: KittiDataset/image_2/training
Verified path: KittiDataset/label_2
Verified path: KittiDataset/calib/training
Verified path: KittiDataset/image_2/testing
Verified path: KittiDataset/calib/testing


In [28]:
#Data Preprocessing:::
#add in data here, augmentation here too
def data_preprocessing(image_path, label_path=None):
    print(f"Processing images in: {image_path}")
    if not isinstance(image_path, str):
        raise TypeError(f"Expected a string for image_path, but got {type(image_path)}")

    images, labels = [], []

    #lopping through to find all images in folders 
    for file_name in os.listdir(image_path):
        #print(f"Contents of {image}: {os.listdir(image)}")
        if file_name.lower().endswith('.png'):
            #loading
            img_file = os.path.join(image_path, file_name)
            img = cv2.imread(img_file)
            if img is None:
                print(f"Warning: Unable to load image: {img_file}")
                continue
            #resizing and normalizing data on image
            img = cv2.resize(img, (64, 64))
            img = img / 255.0 
            images.append(img)



    #lopping through to find all labels in folders
        if label_path:
                label_file = os.path.join(label_path, file_name.replace('.png', '.txt'))
                if os.path.exists(label_file):
                    with open(label_file, 'r') as f:
                        labels.append(f.read().strip())
                else:
                    labels.append(None)

    return np.array(images), labels



In [29]:
#loading test and training data
print("Loading and preprocessing training data...")
train_images, train_labels = data_preprocessing(train_image_path, train_label_path)

print("Loading and preprocessing testing data...")
test_images, _ = data_preprocessing(test_image_path)

Loading and preprocessing training data...
Processing images in: KittiDataset/image_2/training
Loading and preprocessing testing data...
Processing images in: KittiDataset/image_2/testing


libpng error: IDAT: CRC error


In [30]:
# Encode Labels
if train_labels:
    print("Encoding labels...")
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform([label for label in train_labels if label is not None])



Encoding labels...


In [31]:
#DATA AUGMENTATION
# Data Augmentation for training data, not validation
def data_augmentation(X_train, y_train, batch_size=32):
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    #train_data = datagen.flow(X_train, y_train, batch_size=batch_size)
    #return train_data
    return datagen.flow(X_train, y_train, batch_size=batch_size)

print("Applying data augmentation...")
train_data = data_augmentation(train_images, encoded_labels)
    


Applying data augmentation...


ValueError: `x` (images tensor) and `y` (labels) should have the same length. Found: x.shape = (21, 64, 64, 3), y.shape = (7,)

In [32]:
#calling functions above in order:::
#data_preprocessed = data_preprocessing(data)

print("Applying data augmentation...")
train_data = data_augmentation(train_images, train_labels)

Applying data augmentation...


In [22]:
# #saving data
print("Saving data...")
np.save('train_images.npy', train_images)
np.save('train_labels.npy', encoded_labels)
np.save('test_images.npy', test_images)
#np.save('test_calib.npy', test_calib)

print("Data preprocessing complete. Files saved.")


Saving data...
Data preprocessing complete. Files saved.
