### **Dataset Preparation**

In [None]:
!pip install bing-image-downloader

Collecting bing-image-downloader
  Downloading bing_image_downloader-1.1.2-py3-none-any.whl.metadata (2.8 kB)
Downloading bing_image_downloader-1.1.2-py3-none-any.whl (5.9 kB)
Installing collected packages: bing-image-downloader
Successfully installed bing-image-downloader-1.1.2


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from bing_image_downloader import downloader

# Specify the search terms
search_terms = ["panda", "duck"]
num_images = 100

for term in search_terms:
    downloader.download(term, limit=num_images, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60)

print("Images downloaded successfully!")


[%] Downloading Images to /content/dataset/panda


[!!]Indexing page: 1

[%] Indexed 35 Images on Page 1.


[%] Downloading Image #1 from http://upload.wikimedia.org/wikipedia/commons/c/cd/Panda_Cub_from_Wolong%2C_Sichuan%2C_China.JPG
[%] File Downloaded !

[%] Downloading Image #2 from http://upload.wikimedia.org/wikipedia/commons/0/0f/Grosser_Panda.JPG
[%] File Downloaded !

[%] Downloading Image #3 from https://i.natgeofe.com/k/75ac774d-e6c7-44fa-b787-d0e20742f797/giant-panda-eating_3x2.jpg
[%] File Downloaded !

[%] Downloading Image #4 from https://img.freepik.com/photos-premium/bebe-panda-mignon-humoristique-suspendu-du-bambou_410516-25873.jpg?w=2000
[%] File Downloaded !

[%] Downloading Image #5 from https://img.freepik.com/premium-photo/cute-little-baby-panda-bear-giant-panda-cub_691560-1717.jpg?w=1380
[%] File Downloaded !

[%] Downloading Image #6 from https://www.fodors.com/wp-content/uploads/2018/11/shutterstock_374877793.jpg
[%] File Downloaded !

[%] Downloading Image #7

In [None]:
import shutil

# Define the source and destination paths
source_folder = '/content/dataset'
destination_folder = '/content/drive/MyDrive/MLDataset'

# Copy the folder to Google Drive
shutil.copytree(source_folder, destination_folder)

'/content/drive/MyDrive/MLDataset'

In [None]:
import os
folder_path = '/content/drive/MyDrive/MLDataset/panda'  # Replace with your folder path
file_count = len([file for file in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, file))])

print(f"Number of files in '{folder_path}':", file_count)

Number of files in '/content/drive/MyDrive/MLDataset/panda': 100


In [None]:
import os
folder_path = '/content/dataset/duck'  # Replace with your folder path
file_count = len([file for file in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, file))])

print(f"Number of files in '{folder_path}':", file_count)

Number of files in '/content/dataset/duck': 100


In [None]:
import torch
import torchvision
import torch.optim as optim
from torchvision import transforms
from PIL import Image
from torch.utils.data import DataLoader, random_split,Dataset
from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from tqdm import tqdm


In [None]:

class ImageLabelDataset(Dataset):
  def __init__(self, data_dir, transform=None):
    self.data_dir = data_dir
    self.image_paths = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f))]
    self.transform = transform

  def __len__(self):
    return len(self.image_paths)

  def __getitem__(self, idx):
    image_path = self.image_paths[idx]
    # Load image as RGB (mode='RGB')
    image = Image.open(image_path).convert('RGB')
    # Assuming your labels are encoded in the filenames (modify as needed)
    label = os.path.splitext(os.path.basename(image_path))[0].split("_")[0]
    # Convert label to integer if needed (modify as needed)
    #print("LABEL: ",label)
    if label == 'panda':
        label = [0,1]
    else:
        label = [1,0]
    if self.transform:
      image = self.transform(image)
    return image, label


In [None]:
import os
import shutil

# Define the paths
panda_folder = '/content/drive/My Drive/MLDataset/panda'
duck_folder = '/content/drive/My Drive/MLDataset/duck'
combined_folder = '/content/drive/My Drive/MLDataset/combine'

# Create the combined folder if it doesn't exist
os.makedirs(combined_folder, exist_ok=True)

# Copy and rename images from panda folder
for filename in os.listdir(panda_folder):
    source_path = os.path.join(panda_folder, filename)
    destination_path = os.path.join(combined_folder, f"panda_{filename}")
    shutil.copy(source_path, destination_path)

# Copy and rename images from duck folder
for filename in os.listdir(duck_folder):
    source_path = os.path.join(duck_folder, filename)
    destination_path = os.path.join(combined_folder, f"duck_{filename}")
    shutil.copy(source_path, destination_path)

# Verify the count
print("Number of files in '/content/drive/My Drive/MLDataset/combine':", len(os.listdir(combined_folder)))


Number of files in '/content/drive/My Drive/MLDataset/combine': 200


In [None]:
data_dir = '/content/drive/My Drive/MLDataset/combine'
data_transforms = transforms.Compose([
  transforms.Resize((200,200)),  # Resize images to 256x256
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = ImageLabelDataset(data_dir,data_transforms)


In [None]:
import os
folder_path = '/content/drive/MyDrive/MLDataset/combine'  # Replace with your folder path
file_count = len([file for file in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, file))])

print(f"Number of files in '{folder_path}':", file_count)

Number of files in '/content/drive/MyDrive/MLDataset/combine': 200


In [None]:
print(dataset[0][0].shape)

torch.Size([3, 200, 200])


In [None]:
test_size = 0.2
train_size = int(len(dataset) * (1 - test_size))
train_dataset, test_dataset = random_split(dataset, [train_size, len(dataset) - train_size])

# Create training and testing loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

Augementing

In [None]:
data_transforms_ = transforms.Compose([

  transforms.ToTensor(),
  transforms.RandomHorizontalFlip(p=1),                 #Flips the image horizontally (mirrored along the vertical axis) with p=1, all images will always be flipped horizontally
  transforms.RandomVerticalFlip(p=1),
  transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
  transforms.RandomRotation(degrees=(-90,90))
])

In [None]:
src_dir="/content/drive/MyDrive/MLDataset/combine/"
dest_dir="/content/drive/MyDrive/MLDataset/combine_augmented/"

In [None]:
import os
from PIL import Image
from torchvision import transforms

src_dir="/content/drive/MyDrive/MLDataset/combine/"
dest_dir="/content/drive/MyDrive/MLDataset/combine_augmented/"

if not os.path.exists(dest_dir):
    os.mkdir(dest_dir)

# Get a list of files from the source directory
onlyfiles = [f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f))]


# data_transforms_ = transforms.Compose([
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomRotation(30),
#     transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
#     transforms.ToTensor()
# ])

# Process each file in the source directory
for file in onlyfiles:
    # Open the image
    image = Image.open(os.path.join(src_dir, file)).convert('RGB')

    # Perform the transformation
    image_aug = data_transforms_(image)
    image_aug = transforms.ToPILImage()(image_aug)

    # Split the filename to generate new names for augmented images
    label = os.path.splitext(file)[0].split("_Image_")
    # Check if the label has at least two parts to avoid index error
    if len(label) < 2:
        continue

    # Save the original image to the destination directory
    image.save(os.path.join(dest_dir, f"{label[0]}_{label[1]}.jpg"))
    # Save the augmented image to the destination directory
    image_aug.save(os.path.join(dest_dir, f"{label[0]}_a_{label[1]}.jpg"))

print("Images have been successfully processed and saved.")




Images have been successfully processed and saved.


In [None]:
data_transforms = transforms.Compose([
  transforms.Resize((200,200)),  # Resize images to 256x256
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
import os
folder_path = '/content/drive/MyDrive/MLDataset/combine_augmented/'  # Replace with your folder path
file_count = len([file for file in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, file))])

print(f"Number of files in '{folder_path}':", file_count)

Number of files in '/content/drive/MyDrive/MLDataset/combine_augmented/': 400


In [None]:
dataset_ = ImageLabelDataset(dest_dir,data_transforms)

test_size = 0.2
train_size = int(len(dataset_) * (1 - test_size))
train_dataset_, test_dataset_ = random_split(dataset_, [train_size, len(dataset_) - train_size])

# Create training and testing loaders
train_loader_ = torch.utils.data.DataLoader(train_dataset_, batch_size=32, shuffle=True)
test_loader_ = torch.utils.data.DataLoader(test_dataset_, batch_size=32, shuffle=False)

In [None]:
len(test_loader_.dataset)

80