## Deploying Models 

#PIPELINE to use classifier during the Cruise <br>
1-Convert roi to png (you will need pyifcb environment for this step)<br>
2-Make csv files with png imagepathways<br>
3-Process the images for tensorflow classifier <br>
4-Load and apply the classifier<br>
5-Plot 20 random images and predicted labels with and without detritus<br>
6-Merge the classified csv files for a day <br>
7-Summarize the counts of groups per day 


In [None]:
#you will need a Python 3.10.12 environment to upload pyifcb package ,it doesnt work with new versions
pip install git+https://github.com/joefutrelle/pyifcb

In [None]:
#STEP 1  CONVERT ROI TO PNG
import os
from PIL import Image
import glob
import ifcb  # Assuming ifcb is the correct module for opening .roi files
import numpy as np

# Directory containing the .roi files
directory_path = r"D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB"#your folder with roi,adc and hdr 

# Define the start and end filenames for the range you want to process
start_file = "D20230928T020332_IFCB165.roi"
end_file = "D20231003T073748_IFCB165.roi"

# Find all .roi files in the directory
all_roi_files = glob.glob(os.path.join(directory_path, '*.roi'))

# Filter the list for files within the specified range
filtered_roi_files = [file for file in all_roi_files if start_file <= os.path.basename(file) <= end_file]

# Iterate over each filtered .roi file
for roi_file in filtered_roi_files:
    try:
        # Extract the base filename (without the extension) to use as the folder name
        base_filename = os.path.splitext(os.path.basename(roi_file))[0]
        
        # Construct the path for the new folder
        new_folder_path = os.path.join(directory_path, base_filename)

        # Create the new folder if it doesn't exist
        os.makedirs(new_folder_path, exist_ok=True)

        # Open the .roi file to access the image data
        with ifcb.open_raw(roi_file) as sample_bin:
            # Iterate over each image in sample_bin.images
            for index, (image_name, image_data) in enumerate(sample_bin.images.items(), start=1):  # Start from 1
                # Ensure that image data is converted to an appropriate integer type
                if not np.issubdtype(image_data.dtype, np.integer):
                    # Convert floating-point image data to uint8 (common for images)
                    image_data = (255 * (image_data / np.max(image_data))).astype(np.uint8)
                
                # Convert the image data to a PIL Image object
                img = Image.fromarray(image_data)
                
                # Format the filename with the incremented part
                filename = f"{base_filename}.{index:05}.png"
                
                # Construct the full path for the output file within the new folder
                output_path = os.path.join(new_folder_path, filename)
                
                # Save the image
                img.save(output_path)

                # Free memory by closing the image object after saving
                img.close()  # Explicitly close the image to release resources
                del img  # Ensure the object is deleted to free up memory

        print(f"Processed and saved images for: {roi_file}")

    except Exception as e:
        # If there's an error processing a specific file, it will be logged, and the process will continue
        print(f"Error processing {roi_file}: {e}")

# Process complete
print("Processing complete for all files.")


Processed and saved images for: D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20230928T020332_IFCB165.roi
Processed and saved images for: D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20230928T022715_IFCB165.roi
Processed and saved images for: D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20230928T025059_IFCB165.roi
Processed and saved images for: D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20230928T031442_IFCB165.roi
Processed and saved images for: D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20230928T033825_IFCB165.roi
Processed and saved images for: D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20230928T040208_IFCB165.roi
Processed 

In [None]:
# STEP 2- MAKE THE CSV FILES
#Parent directory, make csv file of each image folder 
import os
import pandas as pd

# Define the parent directory containing the folders
parent_dir = r"D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB"

# Iterate through each folder in the parent directory
for folder_name in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, folder_name)

    # Ensure the path is a directory (i.e., a folder)
    if os.path.isdir(folder_path):
        # List to hold file paths
        image_paths = []

        # Iterate over the files in the folder
        for filename in os.listdir(folder_path):
            if filename.endswith(".png"):  # Assuming images are in PNG format
                img_path = os.path.join(folder_path, filename)
                image_paths.append(img_path)

        # Create a DataFrame with the image paths
        df = pd.DataFrame(image_paths, columns=['Image_Path'])

        # Define the CSV file name using the folder name
        csv_path = os.path.join(folder_path, f"{folder_name}_Image_Path.csv")
        
        # Save the DataFrame to a CSV file
        df.to_csv(csv_path, index=False)

        print(f"Image paths for folder '{folder_name}' saved to {csv_path}")


Image paths for folder 'D20230914T150627_IFCB165' saved to D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20230914T150627_IFCB165\D20230914T150627_IFCB165_Image_Path.csv
Image paths for folder 'D20230915T062640_IFCB165' saved to D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20230915T062640_IFCB165\D20230915T062640_IFCB165_Image_Path.csv
Image paths for folder 'D20230915T092536_IFCB165' saved to D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20230915T092536_IFCB165\D20230915T092536_IFCB165_Image_Path.csv
Image paths for folder 'D20230915T230926_IFCB165' saved to D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20230915T230926_IFCB165\D20230915T230926_IFCB165_Image_Path.csv
Image paths for folder 'D20230916T054202_IFCB165' saved to D:\Sikuliaq_23_raw\ctd-sample

In [None]:
#STEP 3 PROCESS IMAGES
##3.1- Define functions
#Preprocess the image and saving function
import os
import cv2
import numpy as np

def preprocess_input(image):
    fixed_size = 128  # Final image should be 128 x 128
    image_size = image.shape[:2]  # Gets the (y_dim, x_dim) for each image

    # The ratio needed to make the longest side of the image 128 pixels
    ratio = float(fixed_size) / max(image_size)

    # Calculates the new size by multiplying each dimension by the ratio
    new_size = tuple([int(x * ratio) for x in image_size])

    # Resizes the image to the new size
    img = cv2.resize(image, (new_size[1], new_size[0]))

    # Calculates the possible padding needed for the x and y dimensions
    delta_w = fixed_size - new_size[1]
    delta_h = fixed_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
    left, right = delta_w // 2, delta_w - (delta_w // 2)

    # Makes a black border of 128x128 pixels around the image
    color = [0, 0, 0]  # RGB = 0,0,0 -> Black
    rescaled_image = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
    
    return rescaled_image

def process_and_save_images(folder_path, destination_folder):
    # Create a new destination folder for the processed images
    processed_folder_name = os.path.basename(folder_path) + "_processed"
    processed_folder_path = os.path.join(destination_folder, processed_folder_name)
    os.makedirs(processed_folder_path, exist_ok=True)

    # Iterate through each image in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".png"):  # Assuming images are in PNG format
            img_path = os.path.join(folder_path, filename)

            # Load the image
            image = cv2.imread(img_path)

            # Check if the image was loaded successfully
            if image is None:
                print(f"Warning: Could not load image {img_path}. Skipping this file.")
                continue

            # Process the image
            processed_image = preprocess_input(image)

            # Convert the processed image to grayscale
            processed_gray = cv2.cvtColor(processed_image, cv2.COLOR_BGR2GRAY)

            # Save the processed grayscale image as an .npy file
            npy_path = os.path.join(processed_folder_path, filename.replace('.png', '.npy'))
            np.save(npy_path, processed_gray)

    print(f"Processed images saved as .npy files to {processed_folder_path}")





In [None]:
#STEP 3 PROCESS IMAGES
##3.2 iterate thhrough folders and apply process functions(THIS CAN TAKE TIME)

parent_dir = r"D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB"# Define the parent directory containing the folders

# Create the new destination directory for all processed folders
destination_folder = os.path.join(parent_dir, "D202309_class_processed_v2")#Change this based on folder name
os.makedirs(destination_folder, exist_ok=True)

# Iterate through each folder in the parent directory
for folder_name in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, folder_name)

    # Ensure the path is a directory (i.e., a folder)
    if os.path.isdir(folder_path):
        print(f"Processing folder: {folder_name}")
        process_and_save_images(folder_path, destination_folder)


Processing folder: D20230914T150627_IFCB165
Processed images saved as .npy files to D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\D20230914T150627_IFCB165_processed
Processing folder: D20230915T062640_IFCB165
Processed images saved as .npy files to D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\D20230915T062640_IFCB165_processed
Processing folder: D20230915T092536_IFCB165
Processed images saved as .npy files to D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\D20230915T092536_IFCB165_processed
Processing folder: D20230915T230926_IFCB165
Processed images saved as .npy files to D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\D20230915T230926_IFCB165_processed


In [None]:
#STEP 4 APPLY CLASSIFIER
## 4.2 A2 for APPLY ECOFOCI_v2 model (Classifier EcoFOCI batch) — with input normalization added 
import os
import numpy as np
import pandas as pd
import glob
import cv2
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder

# === Load model and label encoder ===
model_path = r"C:\Users\kurta\OneDrive - UW\Desktop\Dyson_M2\EcoFOCI_v2_20epoch_model.h5" #Pathway of CLassifier EcoFOCI batch model
model = load_model(model_path)

# Load label classes in correct order
class_labels = np.load(r"C:\Users\kurta\OneDrive - UW\Desktop\Dyson_M2\class_labels.npy", allow_pickle=True) #Label order for EcoFOCI batch model
label_encoder = LabelEncoder()
label_encoder.classes_ = class_labels

# === Base directory setup ===
parent_dir = r"D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2" #Pathway of your processed images(.npy files)
csv_output_folder = os.path.join(parent_dir, "classification_results_v2")#v2 means model 2 (batch model)
os.makedirs(csv_output_folder, exist_ok=True)

# === Process each *_processed folder ===
for folder_name in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, folder_name)

    if os.path.isdir(folder_path) and folder_name.endswith("_processed"):
        print(f"\n🔍 Processing folder: {folder_name}")

        # Recursively find all .npy files
        npy_files = glob.glob(os.path.join(folder_path, '**', '*.npy'), recursive=True)

        image_data = []
        image_paths = []

        for npy_path in npy_files:
            try:
                image = np.load(npy_path)

                # ✅ Normalize to [0.0, 1.0]
                image = image.astype(np.float32) / 255.0

                # ✅ Ensure shape is (128, 128, 1)
                if image.ndim == 2:
                    image = np.expand_dims(image, axis=-1)

                # ✅ Add batch dimension
                image = np.expand_dims(image, axis=0)

                image_data.append(image)
                image_paths.append(npy_path)

            except Exception as e:
                print(f"⚠️ Error loading {npy_path}: {e}")

        if len(image_data) == 0:
            print(f"❌ No valid .npy files found in: {folder_name}. Skipping...")
            continue

        try:
            image_data = np.vstack(image_data)
        except ValueError as ve:
            print(f"❌ Error stacking images in {folder_name}: {ve}")
            continue

        # === PREDICT ===
        predictions = model.predict(image_data)
        predicted_classes = np.argmax(predictions, axis=1)
        predicted_labels = label_encoder.inverse_transform(predicted_classes)

        # === SAVE TO CSV ===
        results_df = pd.DataFrame({
            'Image_Path': image_paths,
            'Predicted_Label': predicted_labels
        })

        output_csv_path = os.path.join(csv_output_folder, f"{folder_name}_classification_results2.csv")
        results_df.to_csv(output_csv_path, index=False)

        print(f"✅ Saved results to: {output_csv_path}")

# === DONE ===
print("\n🎉 Done classifying all folders.")





🔍 Processing folder: D20230914T150627_IFCB165_processed
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 51ms/step
✅ Saved results to: D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2\D20230914T150627_IFCB165_processed_classification_results2.csv

🔍 Processing folder: D20230915T062640_IFCB165_processed
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step
✅ Saved results to: D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2\D20230915T062640_IFCB165_processed_classification_results2.csv

🔍 Processing folder: D20230915T092536_IFCB165_processed
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step
✅ Saved results to: D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D20

In [None]:
#STEP 5 QA/QC PLOT PREDICTED LABELS
##5.1 Iterate through the classified files and plot 20 images WITH DETRITUS
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Define the folder containing the classification results CSV files
classification_folder = r"D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2"#Folder with csv files and predicted labels

# Iterate through each CSV file in the classification folder
for csv_file in os.listdir(classification_folder):
    if csv_file.endswith("_classification_results2.csv"):
        # Construct the full path to the CSV file
        csv_path = os.path.join(classification_folder, csv_file)
        
        # Read the CSV file
        results_df = pd.read_csv(csv_path)

        # Sample 20 random images from the DataFrame
        random_sample = results_df.sample(n=20, random_state=42)  # random_state ensures reproducibility

        # Create the plot
        plt.figure(figsize=(25, 10))

        for i, (index, row) in enumerate(random_sample.iterrows()):
            # Get the image path and predicted label
            image_path = row['Image_Path']
            predicted_label = row['Predicted_Label']

            # Load the image from the .npy file
            image = np.load(image_path)

            # Plot the image
            plt.subplot(4, 5, i + 1)  # 4 rows, 5 columns grid
            plt.imshow(image, cmap='gray')  # Assuming the image is grayscale
            plt.title(f"Predicted: {predicted_label}")
            plt.axis('off')

        plt.tight_layout()

        # Extract the base name (without the "_processed_classification_results.csv" part)
        base_name = csv_file.split("_processed_classification_results.csv")[0]

        # Define the path to save the PNG file
        output_png_path = os.path.join(classification_folder, f"{base_name}.png")

        # Save the plot as a PNG file
        plt.savefig(output_png_path)
        plt.close()  # Close the plot to free up memory

        print(f"Random 20 images plot saved as {output_png_path}")


Random 20 images plot saved as D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2\D20230914T150627_IFCB165_processed_classification_results2.csv.png
Random 20 images plot saved as D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2\D20230915T062640_IFCB165_processed_classification_results2.csv.png
Random 20 images plot saved as D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2\D20230915T092536_IFCB165_processed_classification_results2.csv.png
Random 20 images plot saved as D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2\D20230915T230926_IFCB165_processed_classificati

In [None]:
#STEP 5 QA/QC PLOT PREDICTED LABELS
##5.2 Iterate through the classified files and plot 20 images WITHOUT DETRITUS
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Define the folder containing the classification results CSV files
classification_folder = r"D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2"

# Iterate through each CSV file in the classification folder
for csv_file in os.listdir(classification_folder):
    if csv_file.endswith("_classification_results.csv"):
        # Construct the full path to the CSV file
        csv_path = os.path.join(classification_folder, csv_file)
        
        # Read the CSV file
        results_df = pd.read_csv(csv_path)

        # Filter out images classified as "Detritus"
        filtered_df = results_df[results_df['Predicted_Label'] != "Detritus"]

        # If no images remain after filtering, skip to the next CSV file
        if filtered_df.empty:
            print(f"No non-Detritus images found in {csv_file}. Skipping...")
            continue

        # Sample 20 random images from the filtered DataFrame, or less if fewer than 20 non-Detritus images exist
        random_sample = filtered_df.sample(n=min(20, len(filtered_df)), random_state=42)  # random_state ensures reproducibility

        # Create the plot
        plt.figure(figsize=(25, 10))

        for i, (index, row) in enumerate(random_sample.iterrows()):
            # Get the image path and predicted label
            image_path = row['Image_Path']
            predicted_label = row['Predicted_Label']

            # Load the image from the .npy file
            image = np.load(image_path)

            # Plot the image
            plt.subplot(4, 5, i + 1)  # 4 rows, 5 columns grid
            plt.imshow(image, cmap='gray')  # Assuming the image is grayscale
            plt.title(f"Predicted: {predicted_label}")
            plt.axis('off')

        plt.tight_layout()

        # Extract the base name (without the "_processed_classification_results.csv" part)
        base_name = csv_file.split("_processed_classification_results.csv")[0]

        # Define the path to save the PNG file
        output_png_path = os.path.join(classification_folder, f"{base_name}_non_det.png")

        # Save the plot as a PNG file
        plt.savefig(output_png_path)
        plt.close()  # Close the plot to free up memory

        print(f"Random 20 non-Detritus images plot saved as {output_png_path}")



In [None]:
# STEP 6 merge the all csv files per day
import os
import pandas as pd
import glob

# Directory containing the CSV files
csv_directory = r"D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2"

# Find all CSV files in the directory
csv_files = glob.glob(os.path.join(csv_directory, '*.csv'))

# List to hold dataframes from each CSV file
df_list = []

# Iterate over each CSV file and read it into a dataframe
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    df_list.append(df)

# Concatenate all dataframes into one
merged_df = pd.concat(df_list, ignore_index=True)



In [None]:
#STEP 7 SUMMARIZE THE RESULTS
import pandas as pd

# Summarize the Predicted_Label column
label_summary = merged_df['Predicted_Label'].value_counts()

# Convert the summary to a DataFrame for saving as CSV
summary_df = label_summary.reset_index()
summary_df.columns = ['Predicted_Label', 'Count']

# Path to save the output CSV file
output_path = r"D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2\CTD_summary.csv"#your summary csv file

# Save the summary as a CSV file
summary_df.to_csv(output_path, index=False)

print(f"Summary saved at: {output_path}")



Summary saved at: D:\Sikuliaq_23_raw\ctd-samples\depth-samples-DBO23-IFCB-20250609T230844Z-1-001\depth-samples-DBO23-IFCB\D202309_class_processed_v2\classification_results_v2\CTD_summary.csv
