In [2]:
import os
from PIL import Image
import csv

# Function to extract metadata from an image
def extract_image_metadata(image_path):
    try:
        with Image.open(image_path) as img:
            metadata = {
                "ISO": img._getexif().get(34855),  # ISO
                "ShutterSpeed": img._getexif().get(33434),  # Shutter Speed
                "FocalLength": img._getexif().get(37386),  # Focal Length
                "FStop": img._getexif().get(33437),  # F-Stop
                "WhiteBalance": img._getexif().get(41987)  # White Balance
            }
            return metadata
    except (AttributeError, FileNotFoundError, IsADirectoryError, PermissionError):
        return None

# Folder containing the images
image_folder = "/home/twilight/Honors Research Project/AI_Turbidity_Honors2023-4/Datasets/With Secchi/2023-10-5-4gal-inkAndSediment1"

# Output CSV file
csv_file = "output.csv"

# Get a list of image files in the folder
image_files = [file for file in os.listdir(image_folder) if file.lower().endswith(('jpg', 'jpeg', 'png', 'gif', 'bmp'))]

# Prepare the CSV header and rows
header = ["Image Name", "Site", "Substrate", "Auto", "ISO", "ShutterSpeed", "FocalLength", "FStop", "WhiteBalance", "FNU", "DOC", "Flow", "Secchi", "TSS"]
rows = []

# Iterate through image files and extract metadata
for image_name in image_files:
    image_path = os.path.join(image_folder, image_name)
    metadata = extract_image_metadata(image_path)

    # Append a row for each image
    rows.append([image_name, "", "", "", metadata["ISO"] if metadata else "", metadata["ShutterSpeed"] if metadata else "",
                 metadata["FocalLength"] if metadata else "", metadata["FStop"] if metadata else "",
                 metadata["WhiteBalance"] if metadata else "", "", "", "", "", ""])

# Write to CSV
with open(csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(rows)

print("CSV file created successfully:", csv_file)


CSV file created successfully: output.csv


# Make sure to add values and tidy up the csv in excel, and also rename

### Code for splitting all pictures in a master dataset folder into range folders

In [3]:
# 5 categories
folder_ranges = [(0, 10), (11, 21), (22, 32), (33, 43), (44, 55)]

In [None]:
# 11 categories
folder_ranges = [(0, 5), (6, 10), (11, 15), (16, 20), (21, 25), (26,30), (31,35), (36,40), (41,45), (46,50), (51,55)]

In [4]:
# Input CSV file
csv_file = "./Datasets/With Secchi/_Master-Secchi.csv"

In [5]:
# Specify the parent directory where you want to create the folders
parent_directory = "./Datasets/With Secchi/"

In [6]:
# Specify path to input folder of images
image_folder = "./Datasets/With Secchi/_Master/"

In [7]:
import os
import shutil
import csv


for start, end in folder_ranges:
    folder_name = f"{start}-{end}"
    os.makedirs(os.path.join(parent_directory, folder_name), exist_ok=True)

# Read the CSV file and copy images to the corresponding folders
with open(csv_file, 'r') as file:
    reader = csv.reader(file)
    header = next(reader)  # Skip header

    for row in reader:
        image_name = row[0]
        fnu_value = float(row[9])  # FNU column

        # Determine the folder based on the FNU value
        for start, end in folder_ranges:
            if start <= fnu_value <= end:
                destination_folder = f"{start}-{end}"
                break

        # Copy the image to the corresponding folder
        source_path = os.path.join(image_folder, image_name)  # Update with your image folder path
        destination_path = os.path.join(parent_directory, destination_folder, image_name)

        shutil.copyfile(source_path, destination_path)

print("Images copied to the corresponding folders in:", parent_directory)


Images copied to the corresponding folders in: ./Datasets/With Secchi/


### Code for taking the created range folders and splitting into train, test, validate folders
##### Likes to not put as many in Val folder with small dataset, make sure there is at least one picture in each category

In [8]:
import os
import shutil
import random
import math

# Source and destination directories
source_root = './Datasets/With Secchi/Grouped/'
destination_root = './Datasets/With Secchi/_Processed/'

# Percentage split for training, validation, and testing
train_percentage = 0.8  # 80% for training
test_percentage = 0.15   # 15% for testing
val_percentage = 0.05    # 5% for validation


# Create destination directories
os.makedirs(os.path.join(destination_root, 'train'), exist_ok=True)
os.makedirs(os.path.join(destination_root, 'val'), exist_ok=True)
os.makedirs(os.path.join(destination_root, 'test'), exist_ok=True)

# Iterate through each class directory in the source root
for class_name in os.listdir(source_root):
    class_source_dir = os.path.join(source_root, class_name)

    # Create destination directories for the current class
    os.makedirs(os.path.join(destination_root, 'train', class_name), exist_ok=True)
    os.makedirs(os.path.join(destination_root, 'val', class_name), exist_ok=True)
    os.makedirs(os.path.join(destination_root, 'test', class_name), exist_ok=True)

    # Get a list of all files in the current class directory
    files = os.listdir(class_source_dir)
    num_files = len(files)

    # Calculate the number of files for training, validation, and testing
    num_train = math.ceil(train_percentage * num_files)
    num_val = math.ceil(val_percentage * num_files)
    num_test = num_files - (num_train + num_val)

    # Shuffle the files randomly
    random.shuffle(files)

    # Move files for training
    for i in range(min(num_train, len(files))):
        source_file = os.path.join(class_source_dir, files[i])
        destination_file = os.path.join(destination_root, 'train', class_name, files[i])
        shutil.copy(source_file, destination_file)

    # Move files for validation
    for i in range(num_train, num_train + min(num_val, len(files) - num_train)):
        source_file = os.path.join(class_source_dir, files[i])
        destination_file = os.path.join(destination_root, 'val', class_name, files[i])
        shutil.copy(source_file, destination_file)

    # Move files for testing
    for i in range(num_train + num_val, num_train + num_val + min(num_test, len(files) - num_train - num_val)):
        source_file = os.path.join(class_source_dir, files[i])
        destination_file = os.path.join(destination_root, 'test', class_name, files[i])
        shutil.copy(source_file, destination_file)


-----
### Code for splitting and adding all at once
#### Not as good

In [37]:
import os
import shutil
import csv
import random

# Input CSV file
csv_file = "./Dataset1.csv"

# Specify the parent directory where you want to create the folders
parent_directory = "./Datasets/D1Set/"

# Create folders for different FNU ranges and subsets inside the parent directory
folder_ranges = [(0, 10), (11, 21), (22, 32), (33, 43), (44, 55)]
subsets = ['train', 'test', 'validate']

for start, end in folder_ranges:
    folder_name = f"{start}-{end}"
    for subset in subsets:
        os.makedirs(os.path.join(parent_directory, subset, folder_name), exist_ok=True)

# Define the percentages for train, test, and validate
train_percentage = 0.7
test_percentage = 0.3
validate_percentage = 0.00

# Initialize counters for each FNU range and subset
counters = {range_: {subset: 0 for subset in subsets} for range_ in folder_ranges}

# Read the CSV file and copy images to the corresponding folders
with open(csv_file, 'r') as file:
    reader = csv.reader(file)
    header = next(reader)  # Skip header

    for row in reader:
        image_name = row[0]
        fnu_value = float(row[9])  # FNU column

        # Determine the folder based on the FNU value
        for start, end in folder_ranges:
            if start <= fnu_value <= end:
                fnu_range = (start, end)
                break

        # Determine the subset based on the counters and percentages
        rand_num = random.random()
        if rand_num < train_percentage and counters[fnu_range]['train'] < train_percentage * 100:
            subset_folder = 'train'
        elif rand_num < train_percentage + test_percentage and counters[fnu_range]['test'] < test_percentage * 100:
            subset_folder = 'test'
        elif counters[fnu_range]['validate'] < validate_percentage * 100:
            subset_folder = 'validate'
        else:
            subset_folder = 'train'  # Default to 'train' if not within the percentages

        # Update the counters and copy the image to the corresponding subset folder
        counters[fnu_range][subset_folder] += 1
        destination_folder = f"{fnu_range[0]}-{fnu_range[1]}"
        source_path = os.path.join("./Datasets/Dataset1", image_name)  # Update with your image folder path
        destination_path = os.path.join(parent_directory, subset_folder, destination_folder, image_name)

        shutil.copyfile(source_path, destination_path)

print("Images copied to the corresponding folders in:", parent_directory)


Images copied to the corresponding folders in: ./Datasets/D1Set/


# Now for training classification Model

In [9]:
import torch
torch.cuda.is_available()

True

In [10]:
from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.0.138 🚀 Python-3.11.4 torch-2.0.1 CUDA:0 (NVIDIA GeForce RTX 4080, 16376MiB)
Setup complete ✅ (24 CPUs, 30.9 GB RAM, 113.9/1006.9 GB disk)


In [11]:
from ultralytics import YOLO

from IPython.display import display, Image

In [12]:
import numpy
numpy.version.version

'1.25.0'

In [13]:
#%cd '/content'
# Train Initial
!yolo task=classify mode=train model='yolov8l-cls.pt' data="./Datasets/With Secchi/_Processed" epochs=500 imgsz=320 patience=0 project="./Training/2023-10-8" #resume model="./FirstTrain/something.pt" 

New https://pypi.org/project/ultralytics/8.0.195 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.138 🚀 Python-3.11.4 torch-2.0.1 CUDA:0 (NVIDIA GeForce RTX 4080, 16376MiB)
[34m[1mengine/trainer: [0mtask=classify, mode=train, model=yolov8l-cls.pt, data=./Datasets/With Secchi/_Processed, epochs=500, patience=0, batch=16, imgsz=320, save=True, save_period=-1, cache=False, device=None, workers=8, project=./Training/2023-10-8, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, a

In [23]:
!yolo classify val model="./Training/2023-10-8/train/weights/best.pt" data="./Datasets/With Secchi/_Processed/" project="./Training/2023-10-8"

Ultralytics YOLOv8.0.138 🚀 Python-3.11.4 torch-2.0.1 CUDA:0 (NVIDIA GeForce RTX 4080, 16376MiB)
YOLOv8l-cls summary (fused): 133 layers, 36190981 parameters, 0 gradients
  return F.conv2d(input, weight, bias, self.stride,
               classes   top1_acc   top5_acc: 100%|██████████| 1/1 [00:04<00:00,
                   all      0.875          1
Speed: 0.1ms preprocess, 5.1ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mTraining/2023-10-8/val2[0m


In [18]:
# Inference
!yolo task=classify mode=predict model="./Training/2023-10-8/train/weights/best.pt" conf=0.5 source="./Datasets/With Secchi/_Processed/val/**/*" project="./Training/2023-10-8"

Ultralytics YOLOv8.0.138 🚀 Python-3.11.4 torch-2.0.1 CUDA:0 (NVIDIA GeForce RTX 4080, 16376MiB)
YOLOv8l-cls summary (fused): 133 layers, 36190981 parameters, 0 gradients

  return F.conv2d(input, weight, bias, self.stride,
image 1/8 /home/twilight/Honors Research Project/AI_Turbidity_Honors2023-4/Datasets/With Secchi/_Processed/val/0-10/P1010004.JPG: 320x320 0-10 1.00, 22-32 0.00, 11-21 0.00, 33-43 0.00, 44-55 0.00, 3.4ms
image 2/8 /home/twilight/Honors Research Project/AI_Turbidity_Honors2023-4/Datasets/With Secchi/_Processed/val/0-10/P1010040.JPG: 320x320 0-10 1.00, 22-32 0.00, 33-43 0.00, 44-55 0.00, 11-21 0.00, 3.3ms
image 3/8 /home/twilight/Honors Research Project/AI_Turbidity_Honors2023-4/Datasets/With Secchi/_Processed/val/0-10/P1010055.JPG: 320x320 0-10 1.00, 22-32 0.00, 33-43 0.00, 44-55 0.00, 11-21 0.00, 4.1ms
image 4/8 /home/twilight/Honors Research Project/AI_Turbidity_Honors2023-4/Datasets/With Secchi/_Processed/val/0-10/P1010056.JPG: 320x320 0-10 1.00, 33-43 0.00, 11-21 0