# Create CSV spreadsheet from a folder of images
##### Note: This is only necessary if not using a provided Master Dataset with corresponding Master Spreadsheet
##### Note: Need to manually enter information such as FNU

In [3]:
import os
from PIL import Image
import csv

# Function to extract metadata from an image
def extract_image_metadata(image_path):
    try:
        with Image.open(image_path) as img:
            metadata = {
                "ISO": img._getexif().get(34855),  # ISO
                "ShutterSpeed": img._getexif().get(33434),  # Shutter Speed
                "FocalLength": img._getexif().get(37386),  # Focal Length
                "FStop": img._getexif().get(33437),  # F-Stop
                "WhiteBalance": img._getexif().get(41987)  # White Balance
            }
            return metadata
    except (AttributeError, FileNotFoundError, IsADirectoryError, PermissionError):
        return None

# Folder containing the images
image_folder = "/home/twilight/Honors Research Project/AI_Turbidity_Honors2023-4/Datasets/IndividualCollections/2024-2-26"

# Output CSV file
csv_file = "output.csv"

# Get a list of image files in the folder
image_files = [file for file in os.listdir(image_folder) if file.lower().endswith(('jpg', 'jpeg', 'png', 'gif', 'bmp'))]

# Prepare the CSV header and rows
header = ["Image Name", "Site", "Substrate", "Auto", "ISO", "ShutterSpeed", "FocalLength", "FStop", "WhiteBalance", "FNU", "DOC", "Flow", "Secchi", "TSS"]
rows = []

# Iterate through image files and extract metadata
for image_name in image_files:
    image_path = os.path.join(image_folder, image_name)
    metadata = extract_image_metadata(image_path)

    # Append a row for each image
    rows.append([image_name, "", "", "", metadata["ISO"] if metadata else "", metadata["ShutterSpeed"] if metadata else "",
                 metadata["FocalLength"] if metadata else "", metadata["FStop"] if metadata else "",
                 metadata["WhiteBalance"] if metadata else "", "", "", "", "", ""])

# Write to CSV
with open(csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(rows)

print("CSV file created successfully:", csv_file)


CSV file created successfully: output.csv


-----
# Code for splitting all pictures in a master dataset folder into range folders

In [1]:
# 11 categories weighted
folder_ranges = [(0, 0.49), (0.5, 0.99), (1, 2.49), (2.5, 4.99), (5, 9.99), (10,14.99), (15,20.99), (21,28.99), (29,36.99), (37,44.99), (45,55)]


In [2]:
# Input CSV file
csv_file ="./Datasets/Combined/_Master-Combined.csv"

In [3]:
# Specify the parent directory where you want to create the folders
parent_directory = "./Datasets/Combined/temp/"

In [4]:
# Specify path to input folder of images
image_folder = "./Datasets/Combined/_Master/"

In [5]:
import os
import shutil
import csv


for start, end in folder_ranges:
    folder_name = f"{start}-{end}"
    os.makedirs(os.path.join(parent_directory, folder_name), exist_ok=True)

# Read the CSV file and copy images to the corresponding folders
with open(csv_file, 'r') as file:
    reader = csv.reader(file)
    header = next(reader)  # Skip header

    for row in reader:
        image_name = row[0]
        fnu_value = float(row[9])  # FNU column

        # Determine the folder based on the FNU value
        for start, end in folder_ranges:
            if start <= fnu_value <= end:
                destination_folder = f"{start}-{end}"
                break

        # Copy the image to the corresponding folder
        source_path = os.path.join(image_folder, image_name)  # Update with your image folder path
        destination_path = os.path.join(parent_directory, destination_folder, image_name)

        shutil.copyfile(source_path, destination_path)

print("Images copied to the corresponding folders in:", parent_directory)


Images copied to the corresponding folders in: ./Datasets/Combined/temp/


# Code for taking the created range folders and splitting into train, test, validate folders
##### Likes to not put as many in Val folder with small dataset, make sure there is at least one picture in each category

In [7]:
import os
import shutil
import random
import math

# Source and destination directories
source_root = './Datasets/Combined/temp/'
destination_root = './Datasets/Combined/_Processed11/'

# Percentage split for training, validation, and testing
train_percentage = 0.75  # 75% for training
test_percentage = 0.15   # 15% for testing
val_percentage = 0.1    # 10% for validation


# Create destination directories
os.makedirs(os.path.join(destination_root, 'train'), exist_ok=True)
os.makedirs(os.path.join(destination_root, 'val'), exist_ok=True)
os.makedirs(os.path.join(destination_root, 'test'), exist_ok=True)

# Iterate through each class directory in the source root
for class_name in os.listdir(source_root):
    class_source_dir = os.path.join(source_root, class_name)

    # Create destination directories for the current class
    os.makedirs(os.path.join(destination_root, 'train', class_name), exist_ok=True)
    os.makedirs(os.path.join(destination_root, 'val', class_name), exist_ok=True)
    os.makedirs(os.path.join(destination_root, 'test', class_name), exist_ok=True)

    # Get a list of all files in the current class directory
    files = os.listdir(class_source_dir)
    num_files = len(files)

    # Calculate the number of files for training, validation, and testing
    num_train = math.ceil(train_percentage * num_files)
    num_val = math.ceil(val_percentage * num_files)
    num_test = num_files - (num_train + num_val)

    # Shuffle the files randomly
    random.shuffle(files)

    # Move files for training
    for i in range(min(num_train, len(files))):
        source_file = os.path.join(class_source_dir, files[i])
        destination_file = os.path.join(destination_root, 'train', class_name, files[i])
        shutil.copy(source_file, destination_file)

    # Move files for validation
    for i in range(num_train, num_train + min(num_val, len(files) - num_train)):
        source_file = os.path.join(class_source_dir, files[i])
        destination_file = os.path.join(destination_root, 'val', class_name, files[i])
        shutil.copy(source_file, destination_file)

    # Move files for testing
    for i in range(num_train + num_val, num_train + num_val + min(num_test, len(files) - num_train - num_val)):
        source_file = os.path.join(class_source_dir, files[i])
        destination_file = os.path.join(destination_root, 'test', class_name, files[i])
        shutil.copy(source_file, destination_file)
