In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader  # Import Dataset and DataLoader
import os
import shutil
from glob import glob
from sklearn.model_selection import train_test_split

In [None]:
# Define paths
base_dir = '/content/drive/My Drive/colab_data/wikiart_dataset'
output_dir = '/content/drive/My Drive/colab_data/wikiart_split'
os.makedirs(output_dir, exist_ok=True)

# Define split ratios
train_ratio = 0.3
val_ratio = 0.10
test_ratio = 0.60

# Loop through each painter's folder in the base directory
for painter in os.listdir(base_dir):
    painter_dir = os.path.join(base_dir, painter)

    # Skip if not a directory
    if not os.path.isdir(painter_dir):
        continue

    # Create train, val, and test directories for each painter in the output directory
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_dir, split, painter), exist_ok=True)

    # Get list of image files for the current painter
    images = glob(os.path.join(painter_dir, '*.jpg'))  # Adjust extension if needed

    # Check if there are enough images to split
    if len(images) < 2:
        print(f"Not enough images to split for {painter}. Skipping...")
        continue

    # Split the images into train, validation, and test sets
    train_images, temp_images = train_test_split(images, test_size=(val_ratio + test_ratio), random_state=42)
    val_images, test_images = train_test_split(temp_images, test_size=test_ratio / (val_ratio + test_ratio), random_state=42)

    # Copy images to the respective folders
    for img_path in train_images:
        shutil.copy(img_path, os.path.join(output_dir, 'train', painter))
    for img_path in val_images:
        shutil.copy(img_path, os.path.join(output_dir, 'val', painter))
    for img_path in test_images:
        shutil.copy(img_path, os.path.join(output_dir, 'test', painter))

print("Data split into train, validation, and test sets successfully for each painter!")

Data split into train, validation, and test sets successfully for each painter!
