In [1]:
import os
os.chdir('F:\\ba865 project data')

print("Current Working Directory:", os.getcwd())

Current Working Directory: F:\ba865 project data


In [2]:
from PIL import Image
import os
import numpy as np
import random

data = {'ai': [], 'human': []}
sample_size = 700

base_image_dir = "dataforuse"

def process_images(ai_sd_dir, ai_ld_dir, human_dir, ai_label, human_label):
    ai_image_paths = []

    if os.path.exists(ai_sd_dir):
        ai_image_paths.extend([os.path.join(ai_sd_dir, image_name) for image_name in os.listdir(ai_sd_dir)])
    if os.path.exists(ai_ld_dir):
        ai_image_paths.extend([os.path.join(ai_ld_dir, image_name) for image_name in os.listdir(ai_ld_dir)])
    
    if len(ai_image_paths) > sample_size:
        ai_image_paths = random.sample(ai_image_paths, sample_size)
    
    for image_path in ai_image_paths:
        try:
            image = Image.open(image_path)
            image = image.resize((256, 256))
            image = np.array(image) / 255.0
            data[ai_label].append((image, ai_label))
        except IOError as e:
            print(f"Could not read AI image {image_path}: {e}")
    
    # Process Human images
    if os.path.exists(human_dir):
        human_image_names = os.listdir(human_dir)
        if len(human_image_names) > sample_size:
            human_image_names = random.sample(human_image_names, sample_size)
        
        for image_name in human_image_names:
            image_path = os.path.join(human_dir, image_name)
            try:
                image = Image.open(image_path)
                image = image.resize((256, 256))
                image = np.array(image) / 255.0
                data[human_label].append((image, human_label))
            except IOError as e:
                print(f"Could not read Human image {image_path}: {e}")

genres = ["art_nouveau", "baroque", "expressionism", "impressionism", "post_impressionism",
          "realism", "renaissance", "romanticism", "ukiyo-e"]

for genre in genres:
    ai_sd_dir = os.path.join(base_image_dir, f"AI_SD_{genre}")
    ai_ld_dir = os.path.join(base_image_dir, f"AI_LD_{genre}")
    human_dir = os.path.join(base_image_dir, f"Human_{genre}")
    process_images(ai_sd_dir, ai_ld_dir, human_dir, 'ai', 'human')

combined_data = data['ai'] + data['human']

random.shuffle(combined_data)

images, labels = zip(*combined_data)

print(f"Total AI images: {len(data['ai'])}")
print(f"Total Human images: {len(data['human'])}")
print(f"Total images in combined dataset: {len(images)}")


Total AI images: 6300
Total Human images: 6300
Total images in combined dataset: 12600


In [3]:
pip install h5py

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import h5py

ai_output_filename = 'F:\\ba865 project data\\ai_artwork_dataset.h5'
human_output_filename = 'F:\\ba865 project data\\human_artwork_dataset.h5'

def save_images_to_hdf5(image_label_pairs, output_filename):
    with h5py.File(output_filename, 'w') as hdf:
        num_images = len(image_label_pairs)
        
        images_dset = hdf.create_dataset('images', shape=(num_images, 256, 256, 3),
                                         maxshape=(None, 256, 256, 3), dtype='float32', chunks=(1, 256, 256, 3))
        
        labels_dset = hdf.create_dataset('labels', shape=(num_images,), maxshape=(None,),
                                         dtype=h5py.string_dtype())

        for i, (image, label) in enumerate(image_label_pairs):
            if image.dtype != np.float32:
                image = image.astype('float32')
            images_dset[i] = image
            labels_dset[i] = label

save_images_to_hdf5(data['ai'], ai_output_filename)
save_images_to_hdf5(data['human'], human_output_filename)


In [6]:
from PIL import Image
import h5py
import os

def save_images_from_hdf5(hdf5_filename, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    with h5py.File(hdf5_filename, 'r') as hdf:
        images = hdf['images'][:]
        labels = hdf['labels'][:]

    for i, (image, label) in enumerate(zip(images, labels)):
        image = Image.fromarray((image * 255).astype('uint8'))

        output_filename = os.path.join(output_folder, f'{label.decode()}_{i}.png')

        image.save(output_filename, 'PNG')

ai_hdf5_filename = 'ai_artwork_dataset.h5'
human_hdf5_filename = 'human_artwork_dataset.h5'
ai_output_folder = 'ai_images'
human_output_folder = 'human_images'

save_images_from_hdf5(ai_hdf5_filename, ai_output_folder)
save_images_from_hdf5(human_hdf5_filename, human_output_folder)
print("All images have been saved from the HDF5 files.")

All images have been saved from the HDF5 files.


The extacted image file is too large for us to upload to Github, please check the folder google drive link if you would like to check images: https://drive.google.com/drive/folders/1UGPs65hHUtpPiEvUwpDFmNp5vpyx7EZz?usp=sharing