This file is for creating the dataset for testing and training the model


In [5]:
# Importing libraries
import os
import pickle
import pandas as pd
import numpy as np
from skimage.transform import resize
from skimage.io import imread

Extracting the desired Dataset from the folder into Extracted Set

In [4]:
#Unzipping the dataset to the ExtractedSet folder
from zipfile import ZipFile

# Input the dataset name here
dataSetname = 'dataset (4).zip'
# Specify the path to your zip file
zip_file_path = os.path.join('..','dataset', dataSetname)
# Specify the directory where you want to extract the contents
extract_directory = os.path.join('..','dataset','ExtractedSet')

with ZipFile(zip_file_path, 'r') as zip_object:
    # Extract all the contents to the specified directory
    zip_object.extractall(path=extract_directory)

print(f"All files extracted from '{zip_file_path}' to '{extract_directory}'")

All files extracted from '..\dataset\dataset (4).zip' to '..\dataset\ExtractedSet'


In [6]:
# Importing the Database from the ExtractedSet
import os

dataDir = os.path.join('..', 'dataset', 'ExtractedSet', 'dataset')

# Verify the paths exist
print(f"Data directory path: {os.path.abspath(dataDir)}")
print(f"Data directory exists: {os.path.isdir(dataDir)}")

Data directory path: c:\Users\makjo\GithubPrograms\cs306_project2\dataset\ExtractedSet\dataset
Data directory exists: True


In [7]:
#Imports
import os

#The save the directory of where all the pictures are
picDirectory = "../dataset/ExtractedSet/"

#Folder name of the dataset
fileName = "dataset/"

#Compile the data path
picPath = os.path.join(picDirectory, fileName)

#Rename the files within the data directory
categories = ["green", "sheep", "slow", "stop", "yellow", "free"]
for classes in categories:
    print(f"renaming... category : {classes}")
    path = os.path.join(picPath, classes)

    #Check if path exists
    if not os.path.isdir(path):
        print(f"Directory not found: {path}")
        continue

    #Set up the files in the folder
    files = os.listdir(path)

    #Loop through the image folder and renaming each file
    for i, filename in enumerate(files, start = 1):
        newFileName = f"{classes}{i}.jpg"
        newPath = os.path.join(path, newFileName)
        
        # Skip file renaming if file already exists
        if os.path.exists(newPath):
            print(f"{filename} already exists")
            continue
        oldPath = os.path.join(path, filename)
        os.rename(oldPath, newPath)
    
    print(f"renaming {classes} finished")



renaming... category : green
renaming green finished
renaming... category : sheep
renaming sheep finished
renaming... category : slow
renaming slow finished
renaming... category : stop
renaming stop finished
renaming... category : yellow
renaming yellow finished
renaming... category : free
renaming free finished


The Following two boxes are used for creating the pre-processed images, these images would be attached with _c + it's extension

In [None]:
import cv2
import numpy as np

hue_shift_deg = 80
sat_percent = 100
light_percent = 10

def adjust_hls(path):
    bgr = cv2.imread(path)
    hls = cv2.cvtColor(bgr, cv2.COLOR_BGR2HLS)
    H, L, S = cv2.split(hls)

    # Adjust: H in 0–179, L and S in 0–255
    H = ((H.astype(np.float32) + hue_shift_deg / 2.0) % 180).astype(np.uint8)
    S = np.full(S.shape, int(sat_percent / 100.0 * 255), dtype=np.uint8)
    L = np.clip(L.astype(np.float32) + light_percent / 100.0 * 255, 0, 255).astype(np.uint8)

    hls_adj = cv2.merge([H, L, S])
    bgr_out = cv2.cvtColor(hls_adj, cv2.COLOR_HLS2BGR)
    cv2.imwrite(path, bgr_out)

In [None]:
import os
from pathlib import Path


# Set the data directory
picDirectory = "../dataset/ExtractedSet/dataset"
# Rename the files within the data directory
categories = ["green", "sheep", "slow", "stop", "yellow", "free"]
for classes in categories:
    print(f"preprocessing... category : {classes}")
    path = os.path.join(picDirectory, classes)

    #Check if path exists
    if not os.path.isdir(path):
        print(f"Directory not found: {path}")
        continue

    #Set up the files in the folder
    files = os.listdir(path)

    #Loop through the image folder and renaming each file
    for i, filename in enumerate(files, start = 1):
        if (filename.endswith("_c.jpg")):
            continue
        adjust_hls(os.path.join(path, filename))
        nameWithoutExt = Path(filename).stem
        newName = f"{nameWithoutExt}_c.jpg"
        colourimagePath= os.path.join(path, newName)
        os.rename(os.path.join(path, filename), colourimagePath)

    print(f"recolouring {classes} finished")

In [11]:
flat_data_arr = []
target_arr = []
categories = ["green", "sheep", "slow", "stop", "yellow", "free"]

# Flattening the images into an array
for classes in categories:
  print(f'loading... category : {classes}')
  path = os.path.join(picDirectory,"dataset" ,classes)
  
  # Check if path exists
  if not os.path.isdir(path):
    print(f"Directory not found: {path}")
    continue
  
  counter = 0
  for img in os.listdir(path):
    img_path = os.path.join(path, img)
    
    # Skip folders - only process image files
    if not os.path.isfile(img_path):
      continue
    
    # Skip non-image files
    if not img.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
      continue
    
    img_array = imread(img_path)
    img_array = np.squeeze(img_array)
    img_resized = resize(img_array, (224, 224, 3))
    
    # Append flattened image to list
    flat_data_arr.append(img_resized.flatten())
    
    # Append class label to list
    target_arr.append(classes)
    
    counter += 1
  
  print(f'loaded category {classes} : {counter} images successfully')

# Convert lists to NumPy arrays
flat_data = np.array(flat_data_arr)
target = np.array(target_arr)

print(f"flat_data shape: {flat_data.shape}")
print(f"target shape: {target.shape}")

# Compiling the data frame for the training
df = pd.DataFrame(flat_data)
df['Target'] = target
print("Data Frame created")
print(f"DataFrame shape: {df.shape}")

loading... category : green
loaded category green : 321 images successfully
loading... category : sheep
loaded category sheep : 478 images successfully
loading... category : slow
loaded category slow : 336 images successfully
loading... category : stop
loaded category stop : 360 images successfully
loading... category : yellow
loaded category yellow : 308 images successfully
loading... category : free
loaded category free : 408 images successfully
flat_data shape: (2211, 150528)
target shape: (2211,)
Data Frame created
DataFrame shape: (2211, 150529)


In [10]:
# Save DataFrame to a local file
import pandas as pd
import os

FramePath = "./DataFrame/testdataframe.pkl"

# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(FramePath), exist_ok=True)

# Specify the local path where you want to save the file
# Make sure you have write permissions to this directory


try:
    df.to_pickle(FramePath)
    print(f"DataFrame successfully saved to {FramePath}")
except PermissionError:
    print(f"Permission denied: Unable to save the file to {FramePath}. Please check the directory path and your file permissions.")
except Exception as e:
    print(f"An error occurred: {e}")

DataFrame successfully saved to ./DataFrame/testdataframe.pkl
