In [None]:
import os
import shutil

# Define source and target directories
base_dir = "../data/dataset_balanced/train"  # Replace with your actual dataset path
augmented_dir = "../data/dataset_balanced/train_augmented"
original_dir = "../data/dataset_balanced/train_original"

# Ensure the new directories exist
os.makedirs(augmented_dir, exist_ok=True)
os.makedirs(original_dir, exist_ok=True)

# Iterate through category folders
categories = ["cardboard","metal", "paper", "plastic", "glass", "trash"]

for category in categories:
    category_path = os.path.join(base_dir, category)
    aug_category_path = os.path.join(augmented_dir, category)
    orig_category_path = os.path.join(original_dir, category)
    
    os.makedirs(aug_category_path, exist_ok=True)
    os.makedirs(orig_category_path, exist_ok=True)

    # Iterate through files in category
    for filename in os.listdir(category_path):
        src_path = os.path.join(category_path, filename)
        
        if "_aug" in filename:  # Check if the file is augmented
            dest_path = os.path.join(aug_category_path, filename)
        else:
            dest_path = os.path.join(orig_category_path, filename)
        
        # Move file
        shutil.move(src_path, dest_path)

print("✅ Dataset successfully separated into original and augmented folders!")


In [None]:
import os
from collections import defaultdict

# Define dataset paths
original_dir = "../data/dataset_balanced/train_original"
augmented_dir = "../data/dataset_balanced/train_augmented"

# Define categories
categories = ["cardboard", "metal", "paper", "trash", "plastic", "glass"]

# Dictionary to store original images and their augmented versions
image_pairs = {}

for category in categories:
    orig_category_path = os.path.join(original_dir, category)
    aug_category_path = os.path.join(augmented_dir, category)

    # Get original images (remove extension for better matching)
    original_images = {os.path.splitext(img)[0]: img for img in os.listdir(orig_category_path)}

    # Dictionary to correctly group augmentations
    grouped_images = defaultdict(list)

    for aug_img in os.listdir(aug_category_path):
        # Extract base name by removing augmentation suffix
        aug_base = aug_img.split("_aug_")[0]  # This strictly separates augmentation suffix

        # Match to the exact original image
        if aug_base in original_images:
            grouped_images[original_images[aug_base]].append(aug_img)

    # Store matched images
    image_pairs[category] = grouped_images

# Print a few sample matches for verification
for key, value in list(image_pairs["cardboard"].items())[:5]:  
    print(f"Original: {key}, Augmented: {value}")


In [None]:
import cv2
import matplotlib.pyplot as plt

# Choose a category for analysis
category = "cardboard"
orig_category_path = os.path.join(original_dir, category)
aug_category_path = os.path.join(augmented_dir, category)

# Select an original image with at least one augmentation
example_original, example_augments = next(
    (orig, augs) for orig, augs in image_pairs[category].items() if augs
)

# Load original image
original_img = cv2.imread(os.path.join(orig_category_path, example_original))
original_rgb = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)

# Create figure for visualization
num_augments = len(example_augments)
plt.figure(figsize=(14, 5))
plt.subplot(1, num_augments + 2, 1)
plt.imshow(original_rgb)
plt.title("Original Image")
plt.axis("off")

# Plot the augmented images dynamically
for i, aug_img_name in enumerate(example_augments[:4]):  # Limit to 4 augmentations
    aug_img = cv2.imread(os.path.join(aug_category_path, aug_img_name))
    aug_rgb = cv2.cvtColor(aug_img, cv2.COLOR_BGR2RGB)

    plt.subplot(1, num_augments + 2, i + 2)
    plt.imshow(aug_rgb)
    plt.title(f"Aug {i+1}")
    plt.axis("off")

plt.suptitle(f"Comparison of Original vs. Augmented Images for '{category}'", fontsize=14)
plt.show()


In [None]:
import numpy as np
from skimage.metrics import structural_similarity as ssim

# Convert original to grayscale
gray_original = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)

# Store SSIM & PSNR values
ssim_scores = []
psnr_values = []

for aug_img_name in example_augments[:4]:  # Limit to 4 augmentations
    aug_img = cv2.imread(os.path.join(aug_category_path, aug_img_name))
    gray_aug = cv2.cvtColor(aug_img, cv2.COLOR_BGR2GRAY)

    # Compute SSIM & PSNR
    ssim_score = ssim(gray_original, gray_aug)
    mse = np.mean((gray_original - gray_aug) ** 2)
    psnr = 10 * np.log10(255 ** 2 / mse) if mse != 0 else float('inf')

    ssim_scores.append(ssim_score)
    psnr_values.append(psnr)

    print(f"Augmented Image: {aug_img_name} | SSIM: {ssim_score:.4f} | PSNR: {psnr:.2f} dB")


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.metrics import structural_similarity as ssim
import matplotlib.pyplot as plt
import seaborn as sns

# Define dataset paths
original_dir = "../data/dataset_balanced/train_original"
augmented_dir = "../data/dataset_balanced/train_augmented"

# Define categories
categories = ["cardboard", "metal", "paper", "trash", "plastic", "glass"]

# Store SSIM scores
ssim_scores_list = []

# Loop through each category
for category in categories:
    orig_category_path = os.path.join(original_dir, category)
    aug_category_path = os.path.join(augmented_dir, category)

    # Process all matched original-augmented pairs
    for original_img, aug_images in image_pairs[category].items():
        original_path = os.path.join(orig_category_path, original_img)

        if not os.path.exists(original_path) or not aug_images:
            continue  # Skip if no matching augmented images

        # Load original image
        original = cv2.imread(original_path, cv2.IMREAD_GRAYSCALE)

        for aug_img in aug_images:
            aug_path = os.path.join(aug_category_path, aug_img)
            augmented = cv2.imread(aug_path, cv2.IMREAD_GRAYSCALE)

            if original is None or augmented is None:
                continue  # Skip if image failed to load

            # Compute SSIM
            ssim_value = ssim(original, augmented)

            # Store results
            ssim_scores_list.append({
                "Category": category,
                "Original": original_img,
                "Augmented": aug_img,
                "SSIM": ssim_value
            })

# Convert to DataFrame
df_ssim = pd.DataFrame(ssim_scores_list)




In [None]:
import pandas as pd

# Assuming df_ssim is your DataFrame
print(df_ssim)


In [None]:
df_ssim.to_csv("ssim_scores.csv", index=False)
print("SSIM scores saved to ssim_scores.csv")

In [None]:
# Compute overall dataset metrics
mean_ssim = df_ssim["SSIM"].mean()
std_ssim = df_ssim["SSIM"].std()
category_mean_ssim = df_ssim.groupby("Category")["SSIM"].mean()

# Print Summary
print(f"📊 Overall Mean SSIM: {mean_ssim:.4f}")
print(f"📊 Overall SSIM Standard Deviation: {std_ssim:.4f}")
print("\n📊 Category-wise Mean SSIM:")
print(category_mean_ssim)


In [None]:
# Histogram of SSIM scores
plt.figure(figsize=(10, 5))
sns.histplot(df_ssim["SSIM"], bins=30, kde=True, color="royalblue")
plt.title("Distribution of SSIM Scores Across Dataset")
plt.xlabel("SSIM Score")
plt.ylabel("Frequency")
plt.grid()
plt.show()

# Boxplot of SSIM per category
plt.figure(figsize=(12, 5))
sns.boxplot(data=df_ssim, x="Category", y="SSIM", palette="Set2")
plt.title("SSIM Score Distribution by Category")
plt.xlabel("Category")
plt.ylabel("SSIM Score")
plt.xticks(rotation=45)
plt.grid()
plt.show()
