In [15]:
import torch
import os
from torchvision import utils as vutils
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset
from torch.utils.data import random_split
from PIL import Image

class  ImageFolder(Dataset):
    """docstring for ArtDataset"""
    def __init__(self, root, transform=None):
        super( ImageFolder, self).__init__()
        self.root = root

        self.frame = self._parse_frame()
        self.transform = transform

    def _parse_frame(self):
        frame = []
        img_names = os.listdir(self.root)
        img_names.sort()
        for i in range(len(img_names)):
            image_path = os.path.join(self.root, img_names[i])
            if image_path[-4:] == '.jpg' or image_path[-4:] == '.png' or image_path[-5:] == '.jpeg': 
                frame.append(image_path)
        return frame

    def __len__(self):
        return len(self.frame)

    def __getitem__(self, idx):
        file = self.frame[idx]
        img = Image.open(file).convert('RGB')
            
        if self.transform:
            img = self.transform(img) 

        return img


transform_list = [
        transforms.ToTensor(),
    ]
trans = transforms.Compose(transform_list)

data_root = os.path.join(os.getcwd(), "datasets", "BV", "img")
dataset = ImageFolder(root=data_root, transform=trans)

# Step 2: Shuffle the dataset randomly
shuffle_dataset = True
random_seed = 42
if shuffle_dataset:
    torch.manual_seed(random_seed)
    indices = torch.randperm(len(dataset))

# Step 3: Split the shuffled dataset into train and test subsets
# Replace 'train_ratio' with the desired ratio for the training set (e.g., 0.8 for 80% training data)
train_ratio = 0.9
train_size = int(train_ratio * len(dataset))
test_size = len(dataset) - train_size

print ('Dataset size %d' % len(dataset))
print ('Train size %d' % train_size)
print ('Test size %d' % test_size)
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Step 4: Create DataLoader objects for train and test subsets
# Set 'batch_size' to the desired batch size you want for training and testing
train_dir = os.path.join(os.getcwd(), "datasets", "BV", "train")
test_dir = os.path.join(os.getcwd(), "datasets", "BV", "test")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

for i, img in enumerate(train_dataset):
    # Save the image with a unique filename in the class directory
    vutils.save_image(img, train_dir + '/%d.jpg' % i)

for i, img in enumerate(test_dataset):
    vutils.save_image(img, test_dir + '/%d.jpg' % i)

    
print("Data saved to different directories successfully.")
print (len([name for name in os.listdir(train_dir) if os.path.isfile(os.path.join(train_dir, name))]))
print (len([name for name in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, name))]))


Dataset size 6252
Train size 5626
Test size 626
Data saved to different directories successfully.
5626
626


In [1]:
import os
import numpy as np
from PIL import Image
from torchvision import transforms

# Step 1: Define the folders for train and test images
train_dir = os.path.join(os.getcwd(), 'datasets', 'BV', 'train')
test_dir = os.path.join(os.getcwd(), 'datasets', 'BV', 'test')

# Step 2: Process the images (e.g., resizing, transformations, etc.)
# Replace these transformations with the desired ones for your use case
data_transform = transforms.Compose([
    transforms.ToTensor(),          # Convert images to tensors
])

# Load and process train images
train_data = []
for filename in os.listdir(train_dir):
    input_path = os.path.join(train_dir, filename)
    img = Image.open(input_path)
    img = data_transform(img)
    train_data.append(img.numpy())

# Load and process test images
test_data = []
for filename in os.listdir(test_dir):
    input_path = os.path.join(test_dir, filename)
    img = Image.open(input_path)
    img = data_transform(img)
    test_data.append(img.numpy())

# Step 3: Combine the NumPy arrays into a single NumPy matrix
train_data = np.stack(train_data)
test_data = np.stack(test_data)

# Save the train and test data as .npy files (optional)
np.save('train_data.npy', train_data)
np.save('test_data.npy', test_data)

print("Train and test data converted to NumPy matrices successfully.")


Train and test data converted to NumPy matrices successfully.
