<a href="https://colab.research.google.com/github/harshavardhan784/Dataset/blob/main/BackgroundToBirdsX.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from PIL import Image


In [None]:
# Directories
birds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/BirdsSegmented/'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Background/'

output_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Splits/'

# Subdirectories
categories = {
    'WaterBirds': os.path.join(birds_dir, 'WaterBirds'),
    'LandBirds': os.path.join(birds_dir, 'LandBirds'),
}

# Function to load and resize images from a directory
def load_and_resize_images_from_dir(dir_path, target_size=(256, 256)):
    images = []
    for filename in os.listdir(dir_path):
        if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust based on your image file types
            img_path = os.path.join(dir_path, filename)
            with Image.open(img_path) as img:
                img = img.resize(target_size)
                img_array = np.array(img)
                images.append(img_array)
    return np.array(images)

# Load and split images
splits = {}
for category, path in categories.items():
    images = load_and_resize_images_from_dir(path)
    train_images, test_images = train_test_split(images, test_size=0.2, random_state=42)
    splits[f'{category}_train'] = train_images
    splits[f'{category}_test'] = test_images
    print(f'{category} - train: {len(train_images)}, test: {len(test_images)}')

# Save splits to npy files
os.makedirs(output_dir, exist_ok=True)
for split_name, split_data in splits.items():
    np.save(os.path.join(output_dir, f'{split_name}.npy'), split_data)

print("Splitting and saving completed.")


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from PIL import Image

# Directories
birds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/BirdsSegmented/'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Background/'

output_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Splits/'


# Subdirectories
categories = {
    'WaterBackgrounds': os.path.join(backgrounds_dir, 'WaterBackgrounds'),
    'LandBackgrounds': os.path.join(backgrounds_dir, 'LandBackgrounds'),
}

# Function to load and resize images from a directory
def load_and_resize_images_from_dir(dir_path, target_size=(256, 256)):
    images = []
    for filename in os.listdir(dir_path):
        if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust based on your image file types
            img_path = os.path.join(dir_path, filename)
            with Image.open(img_path) as img:
                img = img.resize(target_size).convert('RGB')
                img_array = np.array(img)
                images.append(img_array)
    return np.array(images)

# Load and split images
splits = {}
for category, path in categories.items():
    images = load_and_resize_images_from_dir(path)
    train_images, test_images = train_test_split(images, test_size=0.2, random_state=42)
    splits[f'{category}_train'] = train_images
    splits[f'{category}_test'] = test_images
    print(f'{category} - train: {len(train_images)}, test: {len(test_images)}')

# Save splits to npy files
os.makedirs(output_dir, exist_ok=True)
for split_name, split_data in splits.items():
    np.save(os.path.join(output_dir, f'{split_name}.npy'), split_data)

print("Splitting and saving completed.")


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Directories
train_data_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Traindata/'
test_data_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Testdata/'

# Load numpy files
train_land_backgrounds = np.load(train_data_dir + 'LandBackgrounds_train.npy')
train_water_backgrounds = np.load(train_data_dir + 'WaterBackgrounds_train.npy')
train_seg_water_birds = np.load(train_data_dir + 'Seg_WaterBirds_train.npy')
train_seg_land_birds = np.load(train_data_dir + 'Seg_LandBirds_train.npy')

test_land_backgrounds = np.load(test_data_dir + 'LandBackgrounds_test.npy')
test_water_backgrounds = np.load(test_data_dir + 'WaterBackgrounds_test.npy')
test_seg_water_birds = np.load(test_data_dir + 'Seg_WaterBirds_test.npy')
test_seg_land_birds = np.load(test_data_dir + 'Seg_LandBirds_test.npy')

# Function to plot images
def plot_images(images, title):
    fig, axes = plt.subplots(2, 5, figsize=(15, 6))
    axes = axes.flatten()
    for i in range(10):
        axes[i].imshow(images[i])
        axes[i].axis('off')
    fig.suptitle(title)
    plt.tight_layout()
    plt.show()

# Function to print sizes
def print_sizes(images, title):
    print(f'{title} size: {images.shape}')

# Print sizes of loaded arrays
print_sizes(train_land_backgrounds, 'Train Land Backgrounds')
print_sizes(train_water_backgrounds, 'Train Water Backgrounds')
print_sizes(train_seg_water_birds, 'Train Segmented Water Birds')
print_sizes(train_seg_land_birds, 'Train Segmented Land Birds')
print_sizes(test_land_backgrounds, 'Test Land Backgrounds')
print_sizes(test_water_backgrounds, 'Test Water Backgrounds')
print_sizes(test_seg_water_birds, 'Test Segmented Water Birds')
print_sizes(test_seg_land_birds, 'Test Segmented Land Birds')

# Plotting 10 images for each category
plot_images(train_land_backgrounds, 'Train Land Backgrounds')
plot_images(train_water_backgrounds, 'Train Water Backgrounds')
plot_images(train_seg_water_birds, 'Train Segmented Water Birds')
plot_images(train_seg_land_birds, 'Train Segmented Land Birds')
plot_images(test_land_backgrounds, 'Test Land Backgrounds')
plot_images(test_water_backgrounds, 'Test Water Backgrounds')
plot_images(test_seg_water_birds, 'Test Segmented Water Birds')
plot_images(test_seg_land_birds, 'Test Segmented Land Birds')


20% segmented birds test data

In [None]:
import numpy as np

# Directories
# train_data_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Traindata/'
test_data_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Testdata/'
output_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Testdata/Seg_birds_test/'

# Load numpy files
train_seg_land_birds = np.load(test_data_dir + 'Seg_LandBirds_test.npy')
train_seg_water_birds = np.load(test_data_dir + 'Seg_WaterBirds_test.npy')

# Combine data and labels
test_data = np.concatenate((train_seg_land_birds, train_seg_water_birds), axis=0)
test_labels = np.concatenate((np.zeros(train_seg_land_birds.shape[0]), np.ones(train_seg_water_birds.shape[0])), axis=0)

# Shuffle data and labels together
p = np.random.permutation(len(test_data))
test_data = test_data[p]
test_labels = test_labels[p]

# Print sizes of combined arrays
print(f'Test data size: {test_data.shape}')
print(f'Test labels size: {test_labels.shape}')

# Save arrays to npy files
np.save(output_dir + 'test_data.npy', test_data)
np.save(output_dir + 'test_labels.npy', test_labels)

print("Test data and labels saved.")


20% Standard data x = 100

In [None]:
import numpy as np
import os

# Directories and file paths for numpy arrays
birds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Testdata/Seg_LandBirds_test.npy'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Testdata/LandBackgrounds_test.npy'
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Testdata/Standard_birds_test100/'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load numpy arrays
birds_array = np.load(birds_dir)
backgrounds_array = np.load(backgrounds_dir)

# Ensure the number of bird images and background images are the same
assert len(birds_array) == len(backgrounds_array), "Number of bird images and background images must be the same"

# Arrays to store combined images
combined_images = []

# Iterate through each pair of bird and background images
for bird_array, background_array in zip(birds_array, backgrounds_array):
    # Create a mask where the bird pixels are not black
    mask = bird_array[..., :3] != [0, 0, 0]
    mask = mask.all(axis=-1)

    # Combine the bird and background arrays using the mask
    combined_array = np.where(mask[..., None], bird_array, background_array)

    # Append to combined images list
    combined_images.append(combined_array)

# Convert list to numpy array
combined_images_np = np.array(combined_images)

# Save combined images as .npy file
output_file = os.path.join(output_dir, 'Combined_LandBirds_and_Backgrounds.npy')
np.save(output_file, combined_images_np)

print(f"Combined images saved as .npy file: {output_file}")


In [None]:
import numpy as np
import os

# Directories and file paths for numpy arrays
birds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Testdata/Seg_WaterBirds_test.npy'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Testdata/WaterBackgrounds_test.npy'
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Testdata/Standard_birds_test100/'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load numpy arrays
birds_array = np.load(birds_dir)
backgrounds_array = np.load(backgrounds_dir)

# Ensure the number of bird images and background images are the same
assert len(birds_array) == len(backgrounds_array), "Number of bird images and background images must be the same"

# Arrays to store combined images
combined_images = []

# Iterate through each pair of bird and background images
for bird_array, background_array in zip(birds_array, backgrounds_array):
    # Create a mask where the bird pixels are not black
    mask = bird_array[..., :3] != [0, 0, 0]
    mask = mask.all(axis=-1)

    # Combine the bird and background arrays using the mask
    combined_array = np.where(mask[..., None], bird_array, background_array)

    # Append to combined images list
    combined_images.append(combined_array)

# Convert list to numpy array
combined_images_np = np.array(combined_images)

# Save combined images as .npy file
output_file = os.path.join(output_dir, 'Combined_WaterBirds_and_Backgrounds.npy')
np.save(output_file, combined_images_np)

print(f"Combined images saved as .npy file: {output_file}")


In [None]:
import numpy as np
import os

# Directories and file paths for numpy arrays
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Testdata/Standard_birds_test100/'

# Load the two numpy arrays
file1_path = os.path.join(output_dir, 'Combined_LandBirds_and_Backgrounds.npy')
file2_path = os.path.join(output_dir, 'Combined_WaterBirds_and_Backgrounds.npy')

combined_file1 = np.load(file1_path)
combined_file2 = np.load(file2_path)

# Combine the arrays
combined_data = np.concatenate((combined_file1, combined_file2), axis=0)

# Create labels
labels_file1 = np.zeros(len(combined_file1), dtype=np.int32)  # Label 0 for LandBirds
labels_file2 = np.ones(len(combined_file2), dtype=np.int32)   # Label 1 for WaterBirds
combined_labels = np.concatenate((labels_file1, labels_file2), axis=0)

# Save combined data and labels as .npy files
combined_data_output_path = os.path.join(output_dir, 'Test_data_Standard100.npy')
combined_labels_output_path = os.path.join(output_dir, 'Test_labels_Standard100.npy')

np.save(combined_data_output_path, combined_data)
np.save(combined_labels_output_path, combined_labels)

print(f"Combined data saved as .npy file: {combined_data_output_path}")
print(f"Combined labels saved as .npy file: {combined_labels_output_path}")


In [None]:
import os

# Directories and file paths for numpy arrays
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Testdata/Standard_birds_test100/'

# Print the list of files in the output directory
print(os.listdir(output_dir))

# Function to load and count images in a numpy array file
def count_images_in_npy(file_path):
    array = np.load(file_path)
    return len(array)

# Count images in Combined_LandBirds_and_Backgrounds.npy
file1_path = os.path.join(output_dir, 'Test_data_Standard100.npy')
num_images_file1 = count_images_in_npy(file1_path)


# Print the number of images in each file
print(f"Number of images in Combined_LandBirds_and_Backgrounds.npy: {num_images_file1}")


80% Train data Standard

In [None]:
import numpy as np
import os

# Directories and file paths for numpy arrays
birds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Traindata/Seg_LandBirds_train.npy'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Traindata/LandBackgrounds_train.npy'
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Traindata/Standard_birds_train100/'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load numpy arrays
birds_array = np.load(birds_dir)
backgrounds_array = np.load(backgrounds_dir)

# Ensure the number of bird images and background images are the same
assert len(birds_array) == len(backgrounds_array), "Number of bird images and background images must be the same"

# Arrays to store combined images
combined_images = []

# Iterate through each pair of bird and background images
for bird_array, background_array in zip(birds_array, backgrounds_array):
    # Create a mask where the bird pixels are not black
    mask = bird_array[..., :3] != [0, 0, 0]
    mask = mask.all(axis=-1)

    # Combine the bird and background arrays using the mask
    combined_array = np.where(mask[..., None], bird_array, background_array)

    # Append to combined images list
    combined_images.append(combined_array)

# Convert list to numpy array
combined_images_np = np.array(combined_images)

# Save combined images as .npy file
output_file = os.path.join(output_dir, 'Combined_LandBirds_and_Backgrounds.npy')
np.save(output_file, combined_images_np)

print(f"Combined images saved as .npy file: {output_file}")


In [None]:
import numpy as np
import os

# Directories and file paths for numpy arrays
birds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Traindata/Seg_WaterBirds_train.npy'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Traindata/WaterBackgrounds_train.npy'
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Traindata/Standard_birds_train100/'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load numpy arrays
birds_array = np.load(birds_dir)
backgrounds_array = np.load(backgrounds_dir)

# Ensure the number of bird images and background images are the same
assert len(birds_array) == len(backgrounds_array), "Number of bird images and background images must be the same"

# Arrays to store combined images
combined_images = []

# Iterate through each pair of bird and background images
for bird_array, background_array in zip(birds_array, backgrounds_array):
    # Create a mask where the bird pixels are not black
    mask = bird_array[..., :3] != [0, 0, 0]
    mask = mask.all(axis=-1)

    # Combine the bird and background arrays using the mask
    combined_array = np.where(mask[..., None], bird_array, background_array)

    # Append to combined images list
    combined_images.append(combined_array)

# Convert list to numpy array
combined_images_np = np.array(combined_images)

# Save combined images as .npy file
output_file = os.path.join(output_dir, 'Combined_WaterBirds_and_Backgrounds.npy')
np.save(output_file, combined_images_np)

print(f"Combined images saved as .npy file: {output_file}")


In [None]:
import numpy as np
import os

# Directories and file paths for numpy arrays
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Traindata/Standard_birds_train100/'

# Load the two numpy arrays
file1_path = os.path.join(output_dir, 'Combined_LandBirds_and_Backgrounds.npy')
file2_path = os.path.join(output_dir, 'Combined_WaterBirds_and_Backgrounds.npy')

combined_file1 = np.load(file1_path)
combined_file2 = np.load(file2_path)

# Combine the arrays
combined_data = np.concatenate((combined_file1, combined_file2), axis=0)

# Create labels
labels_file1 = np.zeros(len(combined_file1), dtype=np.int32)  # Label 0 for LandBirds
labels_file2 = np.ones(len(combined_file2), dtype=np.int32)   # Label 1 for WaterBirds
combined_labels = np.concatenate((labels_file1, labels_file2), axis=0)

# Save combined data and labels as .npy files
combined_data_output_path = os.path.join(output_dir, 'Train_data_Standard100.npy')
combined_labels_output_path = os.path.join(output_dir, 'Train_labels_Standard100.npy')

np.save(combined_data_output_path, combined_data)
np.save(combined_labels_output_path, combined_labels)

print(f"Combined data saved as .npy file: {combined_data_output_path}")
print(f"Combined labels saved as .npy file: {combined_labels_output_path}")


In [None]:
import os

# Directories and file paths for numpy arrays
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Traindata/Standard_birds_train100/'

# Print the list of files in the output directory
print(os.listdir(output_dir))

# Function to load and count images in a numpy array file
def count_images_in_npy(file_path):
    array = np.load(file_path)
    return len(array)

# Count images in Combined_LandBirds_and_Backgrounds.npy
file1_path = os.path.join(output_dir, 'Train_data_Standard100.npy')
num_images_file1 = count_images_in_npy(file1_path)


# Print the number of images in each file
print(f"Number of images in Combined_LandBirds_and_Backgrounds.npy: {num_images_file1}")


Train Data 75% Direct + 5% Inverse Correlation

In [None]:
import os

output_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Traindata/'

# List all files in the directory
files = os.listdir(output_dir)

files

['Seg_LandBirds_train.npy',
 'Seg_WaterBirds_train.npy',
 'WaterBackgrounds_train.npy',
 'LandBackgrounds_train.npy',
 'Standard_birds_train100']

In [None]:
import numpy as np
import os

# Directories and file paths for numpy arrays
output_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Traindata/'

# Load numpy arrays
seg_water_birds = np.load(os.path.join(output_dir, 'Seg_WaterBirds_train.npy'))
seg_land_birds = np.load(os.path.join(output_dir, 'Seg_LandBirds_train.npy'))
water_backgrounds = np.load(os.path.join(output_dir, 'WaterBackgrounds_train.npy'))
land_backgrounds = np.load(os.path.join(output_dir, 'LandBackgrounds_train.npy'))

# Number of images to split
split_count = 589

# Split and save Segmented Water Birds
seg_water_birds_first = seg_water_birds[:split_count]
seg_water_birds_rest = seg_water_birds[split_count:]

# Split and save Segmented Land Birds
seg_land_birds_first = seg_land_birds[:split_count]
seg_land_birds_rest = seg_land_birds[split_count:]

# Split and save Water Backgrounds
water_backgrounds_first = water_backgrounds[:split_count]
water_backgrounds_rest = water_backgrounds[split_count:]

# Split and save Land Backgrounds
land_backgrounds_first = land_backgrounds[:split_count]
land_backgrounds_rest = land_backgrounds[split_count:]

print("Split and saved first 589 images and rest as separate .npy files.")


Split and saved first 589 images and rest as separate .npy files.


In [None]:
import numpy as np
import os

def combine(birds_array, backgrounds_array, output_dir):

  # Ensure the number of bird images and background images are the same
  assert len(birds_array) == len(backgrounds_array), "Number of bird images and background images must be the same"

  # Arrays to store combined images
  combined_images = []

  # Iterate through each pair of bird and background images
  for bird_array, background_array in zip(birds_array, backgrounds_array):
      # Create a mask where the bird pixels are not black
      mask = bird_array[..., :3] != [0, 0, 0]
      mask = mask.all(axis=-1)

      # Combine the bird and background arrays using the mask
      combined_array = np.where(mask[..., None], bird_array, background_array)

      # Append to combined images list
      combined_images.append(combined_array)

  # Convert list to numpy array
  combined_images_np = np.array(combined_images)

  # Save combined images as .npy file
  # output_file = os.path.join(output_dir, 'Combined_LandBirds_and_Backgrounds.npy')
  # np.save(output_file, combined_images_np)

  # print(f"Combined images saved as .npy file: {output_file}")
  return combined_images_np


In [None]:
output_dir = '/content/drive/My Drive/RBCDSAI/Datasets/Datasets/Waterbird Datasets/NewDataset/Traindata/birds_train95'


In [None]:
waterbirds_5 = combine(seg_water_birds_first, land_backgrounds_first, output_dir)

In [None]:
landbirds_5 = combine(seg_land_birds_first, water_backgrounds_first, output_dir)

In [None]:
waterbirds_75 = combine(seg_water_birds_rest, water_backgrounds_rest, output_dir)

In [None]:
landbirds_75 = combine(seg_land_birds_rest, land_backgrounds_rest, output_dir)

In [None]:
print(len(waterbirds_5),len(landbirds_5),len(waterbirds_75),len(landbirds_75))

589 589 1349 6903


In [None]:
import numpy as np
import os

# Create labels
labels_waterbirds_5 = np.ones(len(waterbirds_5), dtype=int)
labels_landbirds_5 = np.zeros(len(landbirds_5), dtype=int)
labels_waterbirds_75 = np.ones(len(waterbirds_75), dtype=int)
labels_landbirds_75 = np.zeros(len(landbirds_75), dtype=int)

# Combine images and labels
images_combined = np.concatenate((waterbirds_5, landbirds_5, waterbirds_75, landbirds_75), axis=0)
labels_combined = np.concatenate((labels_waterbirds_5, labels_landbirds_5, labels_waterbirds_75, labels_landbirds_75), axis=0)

# Save combined images and labels as .npy files
np.save(os.path.join(output_dir, 'Train_data95.npy'), images_combined)
np.save(os.path.join(output_dir, 'Train_labels95.npy'), labels_combined)

print("Combined images and labels saved as .npy files.")


Combined images and labels saved as .npy files.


In [None]:
import os

len(os.listdir('/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Background/LandBackgrounds'))

In [None]:
birds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/BirdsSegmented/LandBirds/'
folder = os.listdir(birds_dir)
print(len(folder))

In [None]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Directories
birds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/BirdsSegmented/'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Background/'
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Birds95/LandBirdImages/'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Function to load images
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename)).convert("RGBA")
        if img is not None:
            images.append(img)
    return images

# Load all images
land_birds = load_images_from_folder(os.path.join(birds_dir, 'LandBirds'))
water_birds = load_images_from_folder(os.path.join(birds_dir, 'WaterBirds'))
land_backgrounds = load_images_from_folder(os.path.join(backgrounds_dir, 'LandBackgrounds'))
water_backgrounds = load_images_from_folder(os.path.join(backgrounds_dir, 'WaterBackgrounds'))

# Split data into train and test sets (80-20 split)
land_birds_train, land_birds_test, land_backgrounds_train, land_backgrounds_test = train_test_split(land_birds, land_backgrounds, test_size=0.2, random_state=42)
water_birds_train, water_birds_test, water_backgrounds_train, water_backgrounds_test = train_test_split(water_birds, water_backgrounds, test_size=0.2, random_state=42)

# Function to combine bird and background images
def combine_images(bird_img, background_img):
    bird_img = bird_img.resize((256, 256), Image.LANCZOS)
    background_img = background_img.resize((256, 256), Image.LANCZOS)
    bird_array = np.array(bird_img)
    background_array = np.array(background_img)
    mask = bird_array[..., :3] != [0, 0, 0]
    mask = mask.all(axis=-1)
    combined_array = np.where(mask[..., None], bird_array, background_array)
    return Image.fromarray(combined_array, 'RGBA').convert("RGB")

# Function to create the dataset
def create_dataset(bird_images, background_images, correlation="direct"):
    combined_images = []
    labels = []
    for bird_img, background_img in zip(bird_images, background_images):
        if correlation == "inverse":
            background_img = water_backgrounds_train[np.random.randint(0, len(water_backgrounds_train))] if bird_img in land_birds_train else land_backgrounds_train[np.random.randint(0, len(land_backgrounds_train))]
        combined_image = combine_images(bird_img, background_img)
        combined_images.append(np.array(combined_image))
        labels.append(0 if bird_img in land_birds_train or bird_img in land_birds_test else 1)  # 0 for land bird, 1 for water bird
    return np.array(combined_images), np.array(labels)

# Create training dataset (75% direct correlation, 5% inverse correlation)
land_birds_train_75 = land_birds_train[:int(0.75 * len(land_birds_train))]
land_birds_train_5 = land_birds_train[int(0.75 * len(land_birds_train)):]
water_birds_train_75 = water_birds_train[:int(0.75 * len(water_birds_train))]
water_birds_train_5 = water_birds_train[int(0.75 * len(water_birds_train)):]

train_images_75, train_labels_75 = create_dataset(land_birds_train_75 + water_birds_train_75, land_backgrounds_train + water_backgrounds_train, correlation="direct")
train_images_5, train_labels_5 = create_dataset(land_birds_train_5 + water_birds_train_5, land_backgrounds_train + water_backgrounds_train, correlation="inverse")
train_images = np.concatenate((train_images_75, train_images_5))
train_labels = np.concatenate((train_labels_75, train_labels_5))

# Create test dataset (20% direct correlation)
test_images, test_labels = create_dataset(land_birds_test + water_birds_test, land_backgrounds_test + water_backgrounds_test, correlation="direct")

# Save combined images and labels as numpy files
np.save(os.path.join(output_dir, 'train_images.npy'), train_images)
np.save(os.path.join(output_dir, 'train_labels.npy'), train_labels)
np.save(os.path.join(output_dir, 'test_images.npy'), test_images)
np.save(os.path.join(output_dir, 'test_labels.npy'), test_labels)

# Plot some test images
n_images_to_plot = min(20, len(test_images))

plt.figure(figsize=(20, 10))
for i in range(n_images_to_plot):
    img = test_images[i]

    plt.subplot(4, 5, i + 1)  # 4 rows, 5 columns
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"Label {test_labels[i]}")

plt.tight_layout()
plt.show()

print(f"Combined images and labels saved in: {output_dir}")


In [None]:
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np

# Directories
birds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/BirdsSegmented/LandBirds/'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Background/LandBackgrounds/'
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Birds95/LandBirdImages/'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get the list of bird images and background images
bird_images = os.listdir(birds_dir)
background_images = os.listdir(backgrounds_dir)

# Ensure the number of bird images and background images are the same
assert len(bird_images) == len(background_images), "Number of bird images and background images must be the same"

# Store combined images for plotting
combined_images = []

for bird_image_name, background_image_name in zip(bird_images, background_images):
    bird_image_path = os.path.join(birds_dir, bird_image_name)
    background_image_path = os.path.join(backgrounds_dir, background_image_name)

    # Open the bird image and resize to 256x256
    bird_image = Image.open(bird_image_path).convert("RGBA")
    bird_image = bird_image.resize((256, 256), Image.LANCZOS)

    # Open the background image and resize to 256x256
    background_image = Image.open(background_image_path).convert("RGBA")
    background_image = background_image.resize((256, 256), Image.LANCZOS)

    # Convert images to numpy arrays
    bird_array = np.array(bird_image)
    background_array = np.array(background_image)

    # Create a mask where the bird pixels are not black
    mask = bird_array[..., :3] != [0, 0, 0]
    mask = mask.all(axis=-1)

    # Combine the bird and background images using the mask
    combined_array = np.where(mask[..., None], bird_array, background_array)

    # Convert the combined array back to an image
    combined_image = Image.fromarray(combined_array, 'RGBA')
    combined_image = combined_image.convert("RGB")  # Convert to RGB for plotting

    # Save the combined image
    output_image_path = os.path.join(output_dir, f"combined_{bird_image_name}")
    combined_image.save(output_image_path)

    # Append to the list of combined images
    combined_images.append(combined_image)

# Plot some images
n_images_to_plot = min(20, len(combined_images))

plt.figure(figsize=(20, 10))
for i in range(n_images_to_plot):
    img = combined_images[i]

    plt.subplot(4, 5, i + 1)  # 4 rows, 5 columns
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"Image {i+1}")

plt.tight_layout()
plt.show()

print(f"Combined images saved in: {output_dir}")


In [None]:
# Plot some images
n_images_to_plot = min(20, len(combined_images))

plt.figure(figsize=(20, 10))
for i in range(n_images_to_plot):
    img = combined_images[i]

    plt.subplot(4, 5, i + 1)  # 4 rows, 5 columns
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"Image {i+1}")

plt.tight_layout()
plt.show()


water

In [None]:
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np

# Directories
birds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/WaterBirds/'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/WaterBackgrounds/'
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/WaterBirdImages/'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Get the list of bird images and background images
bird_images = os.listdir(birds_dir)
background_images = os.listdir(backgrounds_dir)

# Ensure the number of bird images and background images are the same
assert len(bird_images) == len(background_images), "Number of bird images and background images must be the same"

# Store combined images for plotting
combined_images = []

for bird_image_name, background_image_name in zip(bird_images, background_images):
    bird_image_path = os.path.join(birds_dir, bird_image_name)
    background_image_path = os.path.join(backgrounds_dir, background_image_name)

    # Open the bird image and resize to 256x256
    bird_image = Image.open(bird_image_path).convert("RGBA")
    bird_image = bird_image.resize((256, 256), Image.LANCZOS)

    # Open the background image and resize to 256x256
    background_image = Image.open(background_image_path).convert("RGBA")
    background_image = background_image.resize((256, 256), Image.LANCZOS)

    # Convert images to numpy arrays
    bird_array = np.array(bird_image)
    background_array = np.array(background_image)

    # Create a mask where the bird pixels are not black
    mask = bird_array[..., :3] != [0, 0, 0]
    mask = mask.all(axis=-1)

    # Combine the bird and background images using the mask
    combined_array = np.where(mask[..., None], bird_array, background_array)

    # Convert the combined array back to an image
    combined_image = Image.fromarray(combined_array, 'RGBA')
    combined_image = combined_image.convert("RGB")  # Convert to RGB for plotting

    # Save the combined image
    output_image_path = os.path.join(output_dir, f"combined_{bird_image_name}")
    combined_image.save(output_image_path)

    # Append to the list of combined images
    combined_images.append(combined_image)

# Plot some images
n_images_to_plot = min(20, len(combined_images))

plt.figure(figsize=(20, 10))
for i in range(n_images_to_plot):
    img = combined_images[i]

    plt.subplot(4, 5, i + 1)  # 4 rows, 5 columns
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"Image {i+1}")

plt.tight_layout()
plt.show()

print(f"Combined images saved in: {output_dir}")


In [None]:
import os
import numpy as np
from PIL import Image
import random
import shutil

# Directories
birds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/BirdsSegmented/'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Background/'
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Birds95/'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Function to load images from a folder and yield in batches
def load_images_from_folder(folder, batch_size=100):
    images = []
    filenames = os.listdir(folder)
    for i, filename in enumerate(filenames):
        img = Image.open(os.path.join(folder, filename)).convert("RGBA")
        if img is not None:
            images.append(img)
        if len(images) == batch_size:
            yield images
            images = []
    if images:
        yield images

# Function to combine bird and background images
def combine_images(bird_img, background_img):
    bird_img = bird_img.resize((256, 256), Image.LANCZOS)
    background_img = background_img.resize((256, 256), Image.LANCZOS)
    bird_array = np.array(bird_img)
    background_array = np.array(background_img)
    mask = bird_array[..., :3] != [0, 0, 0]
    mask = mask.all(axis=-1)
    combined_array = np.where(mask[..., None], bird_array, background_array)
    return Image.fromarray(combined_array, 'RGBA').convert("RGB")

# Function to process and save batches
def process_and_save_batches(bird_folders, background_images, correlation, split_ratio=0.8, batch_size=100):
    total_images = sum(len(os.listdir(folder)) for folder in bird_folders)
    train_count = int(total_images * split_ratio)
    test_count = total_images - train_count

    def save_batches(bird_folders, background_images, start_idx, end_idx, split_name):
        bird_count = 0
        batch_idx = 0
        for bird_folder in bird_folders:
            for bird_images in load_images_from_folder(bird_folder, batch_size):
                if bird_count >= end_idx:
                    return
                if bird_count >= start_idx:
                    batch_combined = []
                    batch_labels = []
                    for bird_img in bird_images:
                        if correlation == "inverse":
                            background_img = random.choice(water_backgrounds) if 'LandBirds' in bird_folder else random.choice(land_backgrounds)
                        else:
                            background_img = random.choice(background_images)
                        combined_image = combine_images(bird_img, background_img)
                        batch_combined.append(np.array(combined_image))
                        batch_labels.append(0 if 'LandBirds' in bird_folder else 1)  # 0 for land bird, 1 for water bird

                    combined_images = np.array(batch_combined)
                    labels = np.array(batch_labels)

                    # Save the batch
                    np.save(os.path.join(output_dir, f'{split_name}_images_batch_{batch_idx}.npy'), combined_images)
                    np.save(os.path.join(output_dir, f'{split_name}_labels_batch_{batch_idx}.npy'), labels)

                    print(f"Processed and saved {split_name} batch {batch_idx + 1} with {len(bird_images)} images")
                    batch_idx += 1
                bird_count += len(bird_images)

    # Process training batches
    save_batches(bird_folders, background_images, 0, train_count, 'train')

    # Process test batches
    save_batches(bird_folders, background_images, train_count, total_images, 'test')

# Load backgrounds
land_backgrounds = list(load_images_from_folder(os.path.join(backgrounds_dir, 'LandBackgrounds')))
water_backgrounds = list(load_images_from_folder(os.path.join(backgrounds_dir, 'WaterBackgrounds')))

# Bird folders
bird_folders = [os.path.join(birds_dir, 'LandBirds'), os.path.join(birds_dir, 'WaterBirds')]

# Create training and test datasets (75% direct correlation, 5% inverse correlation)
process_and_save_batches(bird_folders, land_backgrounds + water_backgrounds, correlation="direct", split_ratio=0.75)
process_and_save_batches(bird_folders, land_backgrounds + water_backgrounds, correlation="inverse", split_ratio=0.05)

# Create test dataset (20% direct correlation)
process_and_save_batches(bird_folders, land_backgrounds + water_backgrounds, correlation="direct", split_ratio=0.2)

print(f"Combined images and labels saved in: {output_dir}")


In [None]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split

# Directories
birds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/BirdsSegmented/'
backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Background/'
output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Birds95/'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Function to load images
def load_images_from_folder(folder, batch_size=100):
    images = []
    filenames = os.listdir(folder)
    for i, filename in enumerate(filenames):
        if i > 0 and i % batch_size == 0:
            yield images
            images = []
        img = Image.open(os.path.join(folder, filename)).convert("RGBA")
        if img is not None:
            images.append(img)
    if images:
        yield images

# Function to combine bird and background images
def combine_images(bird_img, background_img):
    bird_img = bird_img.resize((256, 256), Image.LANCZOS)
    background_img = background_img.resize((256, 256), Image.LANCZOS)
    bird_array = np.array(bird_img)
    background_array = np.array(background_img)
    mask = bird_array[..., :3] != [0, 0, 0]
    mask = mask.all(axis=-1)
    combined_array = np.where(mask[..., None], bird_array, background_array)
    return Image.fromarray(combined_array, 'RGBA').convert("RGB")

# Function to create the dataset batch-wise
def create_dataset_batch(bird_images_folder, background_images, correlation="direct", batch_size=100):
    batch_idx = 0
    for bird_images in load_images_from_folder(bird_images_folder, batch_size):
        batch_labels = []
        batch_combined = []
        for bird_img in bird_images:
            if correlation == "inverse":
                background_img = water_backgrounds_train[np.random.randint(0, len(water_backgrounds_train))] if bird_img in land_birds_train else land_backgrounds_train[np.random.randint(0, len(land_backgrounds_train))]
            else:
                background_img = background_images[np.random.randint(0, len(background_images))]
            combined_image = combine_images(bird_img, background_img)
            batch_combined.append(np.array(combined_image))
            batch_labels.append(0 if bird_img in land_birds_train or bird_img in land_birds_test else 1)  # 0 for land bird, 1 for water bird

        combined_images = np.array(batch_combined)
        labels = np.array(batch_labels)

        # Save the batch
        np.save(os.path.join(output_dir, f'{correlation}_images_batch_{batch_idx}.npy'), combined_images)
        np.save(os.path.join(output_dir, f'{correlation}_labels_batch_{batch_idx}.npy'), labels)

        print(f"Processed and saved batch {batch_idx + 1} with {len(bird_images)} images")
        batch_idx += 1

# Load all backgrounds
land_backgrounds = list(load_images_from_folder(os.path.join(backgrounds_dir, 'LandBackgrounds')))
water_backgrounds = list(load_images_from_folder(os.path.join(backgrounds_dir, 'WaterBackgrounds')))

# Split data into train and test sets (80-20 split)
land_birds_train, land_birds_test = train_test_split(list(load_images_from_folder(os.path.join(birds_dir, 'LandBirds'))), test_size=0.2, random_state=42)
water_birds_train, water_birds_test = train_test_split(list(load_images_from_folder(os.path.join(birds_dir, 'WaterBirds'))), test_size=0.2, random_state=42)

# Create training dataset (75% direct correlation, 5% inverse correlation)
create_dataset_batch(os.path.join(birds_dir, 'LandBirds'), land_backgrounds, correlation="direct")
create_dataset_batch(os.path.join(birds_dir, 'WaterBirds'), water_backgrounds, correlation="inverse")

# Create test dataset (20% direct correlation)
create_dataset_batch(os.path.join(birds_dir, 'LandBirds'), land_backgrounds, correlation="direct")
create_dataset_batch(os.path.join(birds_dir, 'WaterBirds'), water_backgrounds, correlation="direct")

print(f"Combined images and labels saved in: {output_dir}")


In [None]:
# import os
# import numpy as np
# from PIL import Image
# from sklearn.model_selection import train_test_split
# import matplotlib.pyplot as plt
# from collections import Counter

# # Directories
# birds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/BirdsSegmented/'
# backgrounds_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Background/'
# output_dir = '/content/drive/My Drive/RBCDSAI/Experiment4/Datasets/Waterbird Datasets/NewDataset/Birds95/'

# # Ensure output directory exists
# os.makedirs(output_dir, exist_ok=True)

# # Function to load images
# def load_images_from_folder(folder):
#     images = []
#     for filename in os.listdir(folder):
#         img = Image.open(os.path.join(folder, filename)).convert("RGBA")
#         if img is not None:
#             images.append(img)
#     return images

# # Load all images
# land_birds = load_images_from_folder(os.path.join(birds_dir, 'LandBirds'))
# water_birds = load_images_from_folder(os.path.join(birds_dir, 'WaterBirds'))
# land_backgrounds = load_images_from_folder(os.path.join(backgrounds_dir, 'LandBackgrounds'))
# water_backgrounds = load_images_from_folder(os.path.join(backgrounds_dir, 'WaterBackgrounds'))

# # Split data into train and test sets (80-20 split)
# land_birds_train, land_birds_test, land_backgrounds_train, land_backgrounds_test = train_test_split(land_birds, land_backgrounds, test_size=0.2, random_state=42)
# water_birds_train, water_birds_test, water_backgrounds_train, water_backgrounds_test = train_test_split(water_birds, water_backgrounds, test_size=0.2, random_state=42)

# # Function to combine bird and background images
# def combine_images(bird_img, background_img):
#     bird_img = bird_img.resize((256, 256), Image.LANCZOS)
#     background_img = background_img.resize((256, 256), Image.LANCZOS)
#     bird_array = np.array(bird_img)
#     background_array = np.array(background_img)
#     mask = bird_array[..., :3] != [0, 0, 0]
#     mask = mask.all(axis=-1)
#     combined_array = np.where(mask[..., None], bird_array, background_array)
#     return Image.fromarray(combined_array, 'RGBA').convert("RGB")

# # Function to create the dataset
# def create_dataset(bird_images, background_images, correlation="direct"):
#     combined_images = []
#     labels = []
#     for bird_img in bird_images:
#         if correlation == "inverse":
#             background_img = water_backgrounds_train[np.random.randint(0, len(water_backgrounds_train))] if bird_img in land_birds_train else land_backgrounds_train[np.random.randint(0, len(land_backgrounds_train))]
#         else:
#             background_img = background_images[np.random.randint(0, len(background_images))]
#         combined_image = combine_images(bird_img, background_img)
#         combined_images.append(np.array(combined_image))
#         labels.append(0 if bird_img in land_birds_train or bird_img in land_birds_test else 1)  # 0 for land bird, 1 for water bird
#     return np.array(combined_images), np.array(labels)

# # Create training dataset (75% direct correlation, 5% inverse correlation)
# land_birds_train_75 = land_birds_train[:int(0.75 * len(land_birds_train))]
# land_birds_train_5 = land_birds_train[int(0.75 * len(land_birds_train)):]
# water_birds_train_75 = water_birds_train[:int(0.75 * len(water_birds_train))]
# water_birds_train_5 = water_birds_train[int(0.75 * len(water_birds_train)):]

# train_images_75, train_labels_75 = create_dataset(land_birds_train_75 + water_birds_train_75, land_backgrounds_train + water_backgrounds_train, correlation="direct")
# train_images_5, train_labels_5 = create_dataset(land_birds_train_5 + water_birds_train_5, land_backgrounds_train + water_backgrounds_train, correlation="inverse")
# train_images = np.concatenate((train_images_75, train_images_5))
# train_labels = np.concatenate((train_labels_75, train_labels_5))

# # Create test dataset (20% direct correlation)
# test_images, test_labels = create_dataset(land_birds_test + water_birds_test, land_backgrounds_test + water_backgrounds_test, correlation="direct")

# # Save combined images and labels as numpy files
# np.save(os.path.join(output_dir, 'train_images.npy'), train_images)
# np.save(os.path.join(output_dir, 'train_labels.npy'), train_labels)
# np.save(os.path.join(output_dir, 'test_images.npy'), test_images)
# np.save(os.path.join(output_dir, 'test_labels.npy'), test_labels)

# # Plot some train images
# n_images_to_plot = min(20, len(train_images))

# plt.figure(figsize=(20, 10))
# for i in range(n_images_to_plot):
#     img = train_images[i]

#     plt.subplot(4, 5, i + 1)  # 4 rows, 5 columns
#     plt.imshow(img)
#     plt.axis('off')
#     plt.title(f"Label {train_labels[i]}")

# plt.tight_layout()
# plt.show()

# print(f"Combined images and labels saved in: {output_dir}")

# # Print the number of labels for each class in train_labels_75 and train_labels_5
# train_labels_75_counter = Counter(train_labels_75)
# train_labels_5_counter = Counter(train_labels_5)

# print(f"Number of labels in train_labels_75: {train_labels_75_counter}")
# print(f"Number of labels in train_labels_5: {train_labels_5_counter}")


In [None]:
n_images_to_plot = min(20, len(train_images))

plt.figure(figsize=(20, 10))
for i in range(n_images_to_plot):
    img = train_images[i]

    plt.subplot(4, 5, i + 1)  # 4 rows, 5 columns
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"Label {train_labels[i]}")

plt.tight_layout()
plt.show()


In [None]:
# Print the number of labels for each class in train_labels_75 and train_labels_5
train_labels_75_counter = Counter(train_labels_75)
train_labels_5_counter = Counter(train_labels_5)

print(f"Number of labels in train_labels_75: {train_labels_75_counter}")
print(f"Number of labels in train_labels_5: {train_labels_5_counter}")
