In [63]:
TRAIN_CSV_PATH = "../data_duplicate/train_images_filtered_no_duplicates.csv"

In [64]:

import pandas as pd
import numpy as np
import os
import torch
import imagehash
import glob
import matplotlib.pyplot as plt

from tqdm.auto import tqdm
from PIL import Image

In [None]:
train_df = pd.read_csv(TRAIN_CSV_PATH)
print(train_df)

In [None]:
unique_labels = train_df.labels.value_counts()
num_unique_labels = unique_labels.nunique()
print(unique_labels)

In [67]:
images_path = '/Users/alexshienhowkhoo/Documents/NTU_BCG/NTU_BCG_Y3S1/Others/SC4000_Machine_Learning/SC4000_Project/SC4000_Machine_Learning/data_duplicate/train_images'
img_paths = glob.glob(images_path + '/*.jpg')

In [68]:
import cv2
import random

def add_random_noise(image):
    # Lower standard deviation to reduce noise intensity
    noise = np.random.normal(0, 10, image.shape).astype(np.uint8)
    noisy_image = cv2.add(image, noise)
    return np.clip(noisy_image, 0, 255).astype(np.uint8)

def add_random_shadow(image):
    top_y = image.shape[0] * np.random.uniform(0.3, 0.7)
    bot_y = image.shape[0] * np.random.uniform(0.3, 0.7)
    shadow_img = image.copy()
    
    mask = np.zeros_like(image, dtype=np.uint8)
    x = np.array([0, image.shape[1], image.shape[1], 0], dtype=np.int32)
    y = np.array([top_y, top_y, bot_y, bot_y], dtype=np.int32)
    pts = np.stack([x, y], axis=1)
    cv2.fillPoly(mask, [pts], (255, 255, 255))
    
    shadow_ratio = np.random.uniform(0.3, 0.7)
    shadow_img = cv2.addWeighted(shadow_img, shadow_ratio, mask, 1 - shadow_ratio, 0)
    
    return shadow_img

def shear_image(image, shear_range=0.2):
    rows, cols, ch = image.shape
    dx = cols * np.random.uniform(-shear_range, shear_range)
    
    # Shear transformation matrix
    shear_matrix = np.array([[1, dx / cols, 0],
                             [0, 1, 0]], dtype=np.float32)
    
    # Apply shear transformation
    sheared_img = cv2.warpAffine(image, shear_matrix, (cols, rows))
    return sheared_img

def rotate_image(image, max_angle=30):
    rows, cols, ch = image.shape
    angle = np.random.uniform(-max_angle, max_angle)
    
    # Rotation transformation matrix
    rotation_matrix = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    
    # Apply rotation transformation
    rotated_img = cv2.warpAffine(image, rotation_matrix, (cols, rows))
    return rotated_img

def add_random_shadow_new(image):
    # Copy the original image for the shadow effect
    shadow_img = image.copy()
    
    # Horizontal shadow parameters
    top_y = int(image.shape[0] * np.random.uniform(0.3, 0.7))
    bot_y = int(image.shape[0] * np.random.uniform(0.3, 0.7))
    
    # Vertical shadow parameters
    left_x = int(image.shape[1] * np.random.uniform(0.3, 0.7))
    right_x = int(image.shape[1] * np.random.uniform(0.3, 0.7))

    # Initialize mask for horizontal shadow
    mask_horizontal = np.zeros_like(image, dtype=np.uint8)
    x_horiz = np.array([0, image.shape[1], image.shape[1], 0], dtype=np.int32)
    y_horiz = np.array([top_y, top_y, bot_y, bot_y], dtype=np.int32)
    pts_horiz = np.stack([x_horiz, y_horiz], axis=1)
    cv2.fillPoly(mask_horizontal, [pts_horiz], (255, 255, 255))

    # Initialize mask for vertical shadow
    mask_vertical = np.zeros_like(image, dtype=np.uint8)
    x_vert = np.array([left_x, right_x, right_x, left_x], dtype=np.int32)
    y_vert = np.array([0, 0, image.shape[0], image.shape[0]], dtype=np.int32)
    pts_vert = np.stack([x_vert, y_vert], axis=1)
    cv2.fillPoly(mask_vertical, [pts_vert], (255, 255, 255))

    # Combine horizontal and vertical masks
    combined_mask = cv2.bitwise_or(mask_horizontal, mask_vertical)

    # Apply the shadow using the combined mask
    shadow_ratio = np.random.uniform(0.3, 0.7)
    shadow_img = cv2.addWeighted(shadow_img, shadow_ratio, combined_mask, 1 - shadow_ratio, 0)

    return shadow_img

def hueAdder1(ori_img, saturation_boost=1.5, brightness_boost=0.9):
    img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2RGB)
    img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)

    hue_value_list = [175, 105, 70]
    hue_value = random.choice(hue_value_list)
    
    # Apply hue adjustment
    img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_value) % 180
    
    # Boost saturation and adjust brightness to make the hue more prominent
    img_hsv[:, :, 1] = np.clip(img_hsv[:, :, 1] * saturation_boost, 0, 255)
    img_hsv[:, :, 2] = np.clip(img_hsv[:, :, 2] * brightness_boost, 0, 255)
    
    # Convert back to RGB
    enhanced_img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
    
    return enhanced_img


def hueAdder2(ori_img, saturation_boost=1.5, brightness_boost=0.9):
    img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2RGB)
    img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)

    hue_value_list = [85, -40, -67, 140]
    hue_value = random.choice(hue_value_list)
    
    # Apply hue adjustment
    img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_value) % 180
    
    # Boost saturation and adjust brightness to make the hue more prominent
    img_hsv[:, :, 1] = np.clip(img_hsv[:, :, 1] * saturation_boost, 0, 255)
    img_hsv[:, :, 2] = np.clip(img_hsv[:, :, 2] * brightness_boost, 0, 255)
    
    # Convert back to RGB
    enhanced_img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
    
    return enhanced_img



def hueAdder3(ori_img, saturation_boost=1.5, brightness_boost=0.9):
    img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2RGB)
    img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)

    hue_value_list = [30, 150, 50]
    hue_value = random.choice(hue_value_list)
    
    # Apply hue adjustment
    img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_value) % 180
    
    # Boost saturation and adjust brightness to make the hue more prominent
    img_hsv[:, :, 1] = np.clip(img_hsv[:, :, 1] * saturation_boost, 0, 255)
    img_hsv[:, :, 2] = np.clip(img_hsv[:, :, 2] * brightness_boost, 0, 255)
    
    # Convert back to RGB
    enhanced_img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
    
    return enhanced_img

def hueAdder4(ori_img, saturation_boost=1.5, brightness_boost=0.9):
    img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2RGB)
    img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)

    hue_value_list = [-67, 140]
    hue_value = random.choice(hue_value_list)
    
    # Apply hue adjustment
    img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_value) % 180
    
    # Boost saturation and adjust brightness to make the hue more prominent
    img_hsv[:, :, 1] = np.clip(img_hsv[:, :, 1] * saturation_boost, 0, 255)
    img_hsv[:, :, 2] = np.clip(img_hsv[:, :, 2] * brightness_boost, 0, 255)
    
    # Convert back to RGB
    enhanced_img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
    
    return enhanced_img

transformation_functions = [add_random_shadow, add_random_noise,add_random_shadow_new, shear_image, rotate_image, hueAdder1,hueAdder2,hueAdder3,hueAdder4]
transformation_names = ["shadow", "noise", "new_shadow", "shear","rotate", "hue1","hue2","hue3","hue4"]

In [None]:
import cv2
import os
import pandas as pd
from PIL import Image, UnidentifiedImageError

# Load the pandas table
new_entries = []  # List to collect new rows

for img_path in img_paths:
    try:
        # Open the image
        image = cv2.imread(img_path)
        base_name_with_ext = os.path.basename(img_path)  # e.g., "Right_37.jpg"
        
        # Find corresponding row in the DataFrame
        if base_name_with_ext not in train_df['image_id'].values:
            print(f'{base_name_with_ext}')
            print(f"No entry found in train table for {base_name_with_ext}. Skipping.")
            os.remove(img_path)
            continue
        
        label = train_df[train_df['image_id'] == base_name_with_ext]["labels"].values[0]

        # Remove the extension from the base name
        base_name = os.path.splitext(base_name_with_ext)[0]
        
        # Define the output directory as the original image directory
        output_image_dir = os.path.dirname(img_path)
        
        # Apply each transformation function and save as a new image
        for func, tname in zip(transformation_functions, transformation_names):
            # Apply the transformation
            transformed_image = func(image)
            
            # Convert to RGB and save as JPEG
            pil_image = Image.fromarray(cv2.cvtColor(transformed_image, cv2.COLOR_BGR2RGB))
            
            # Generate new file path using base name, transformation name, and labels name
            new_image_name = f"{base_name}_{tname}_{label}.jpg"
            new_image_path = os.path.join(output_image_dir, new_image_name)
            pil_image.save(new_image_path, format='JPEG')
            
            # Add new entry to the list for the updated DataFrame
            new_entries.append({'image_id': new_image_name, 'labels': label})
    
    except UnidentifiedImageError:
        print(f"Skipping unrecognized or corrupted file: {img_path}")

# Append the new entries to train_df
new_entries_df = pd.DataFrame(new_entries)
train_df = pd.concat([train_df, new_entries_df], ignore_index=True)

# Save the updated DataFrame to a new CSV file
train_df.to_csv("../data_duplicate/train_images_filtered_no_duplicate_transformed.csv", index=False)  # Replace with your desired path

In [70]:
train_df
train_df.to_csv("../data_duplicate/train_images_filtered_no_duplicate_transformed.csv", index=False)

In [None]:
os.getcwd()

In [None]:
ls

In [72]:
# import pandas as pd
# import numpy as np
# from imblearn.over_sampling import SMOTE
# from skimage.io import imread
# import os
# # Step 1: Load Images
# image_paths = "../dataset/train_images/"  

# smote_output_folder = "../dataset/smote_images/"   
# os.makedirs(smote_output_folder, exist_ok=True)

# X = []
# y = []

# for idx, row in train_df.iterrows():
#     image_id = row['image_id']
#     label = row['label']
    
#     # Load each image, assuming file path format is image_paths + image_id + '.jpg'
#     image = imread(f"{image_paths}{image_id}")
    
#     # Append image and label
#     X.append(image)
#     y.append(label)

# X = np.array(X)  # Convert to numpy array
# y = np.array(y)

In [73]:

# # Step 2: Flatten images if necessary
# X_flattened = X.reshape(X.shape[0], -1)  # Flatten to 1D if needed

# # Step 3: Apply SMOTE
# smote = SMOTE(sampling_strategy='auto', random_state=42)
# X_resampled_flattened, y_resampled = smote.fit_resample(X_flattened, y)

# # Step 4: Reshape back if you flattened
# image_shape = X.shape[1:]  # Original shape
# X_resampled = X_resampled_flattened.reshape(-1, *image_shape)

# # Step 5: Create DataFrame with resampled data
# # Optionally, create synthetic `image_id`s for new samples or add a prefix to differentiate
# df_resampled = pd.DataFrame({
#     'image_id': [f"{i}_synthetic" for i in range(len(y_resampled))],  # New synthetic IDs
#     'class_label': y_resampled
# })

# # Save SMOTE-generated images
# for i, img_array in enumerate(X_resampled[len(X):]):  # Only new images
#     img = Image.fromarray(img_array.astype('uint8'), 'RGB')  # Convert numpy array to Image
#     img.save(f"{smote_output_folder}/smote_image_{i}.jpg")  # Save with unique name


In [74]:
# from imblearn.under_sampling import RandomUnderSampler
# from imblearn.over_sampling import RandomOverSampler

# desired_majority_class_size = 6000

# class_counts = df_train["labels"].value_counts()
# undersample_strategy = {class_counts.idxmax(): desired_majority_class_size}

# rus = RandomUnderSampler(sampling_strategy=undersample_strategy, random_state=109)
# X_under, y_under = rus.fit_resample(
#     df_train["image_id"].values.reshape(-1, 1), df_train["labels"].values
# )

# desired_minority_class_size = 6000

# ros = RandomOverSampler(
#     sampling_strategy={
#         label: desired_minority_class_size
#         for label in class_counts.index
#         if class_counts[label] < desired_minority_class_size
#     },
#     random_state=109,
# )
# X_resampled, y_resampled = ros.fit_resample(X_under, y_under)

# df_train_resampled = pd.DataFrame(
#     {"image_id": X_resampled.flatten(), "labels": y_resampled}
# )

# # Check the new class distribution
# print(df_train_resampled["labels"].value_counts())

# df_train_resampled.reset_index(drop=True, inplace=True)
