In [28]:
import pandas as pd

# Path to CSV art dataset description file
csv_file_path = "../datasets/art_dataset/artists.csv"

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(csv_file_path)

df.head(15)


Unnamed: 0,id,name,years,genre,nationality,bio,wikipedia,paintings
0,0,Amedeo Modigliani,1884 - 1920,Expressionism,Italian,Amedeo Clemente Modigliani (Italian pronunciat...,http://en.wikipedia.org/wiki/Amedeo_Modigliani,193
1,1,Vasiliy Kandinskiy,1866 - 1944,"Expressionism,Abstractionism",Russian,Wassily Wassilyevich Kandinsky (Russian: Васи́...,http://en.wikipedia.org/wiki/Wassily_Kandinsky,88
2,2,Diego Rivera,1886 - 1957,"Social Realism,Muralism",Mexican,Diego María de la Concepción Juan Nepomuceno E...,http://en.wikipedia.org/wiki/Diego_Rivera,70
3,3,Claude Monet,1840 - 1926,Impressionism,French,Oscar-Claude Monet (; French: [klod mɔnɛ]; 14 ...,http://en.wikipedia.org/wiki/Claude_Monet,73
4,4,Rene Magritte,1898 - 1967,"Surrealism,Impressionism",Belgian,René François Ghislain Magritte (French: [ʁəne...,http://en.wikipedia.org/wiki/René_Magritte,194
5,5,Salvador Dali,1904 - 1989,Surrealism,Spanish,Salvador Domingo Felipe Jacinto Dalí i Domènec...,http://en.wikipedia.org/wiki/Salvador_Dalí,139
6,6,Edouard Manet,1832 - 1883,"Realism,Impressionism",French,Édouard Manet (US: ; UK: ; French: [edwaʁ manɛ...,http://en.wikipedia.org/wiki/Édouard_Manet,90
7,7,Andrei Rublev,1360 - 1430,Byzantine Art,Russian,"Andrei Rublev (Russian: Андре́й Рублёв, IPA: [...",http://en.wikipedia.org/wiki/Andrei_Rublev,99
8,8,Vincent van Gogh,1853 – 1890,Post-Impressionism,Dutch,Vincent Willem van Gogh (Dutch: [ˈvɪnsɛnt ˈʋɪl...,http://en.wikipedia.org/wiki/Vincent_van_Gogh,877
9,9,Gustav Klimt,1862 - 1918,"Symbolism,Art Nouveau",Austrian,"Gustav Klimt (July 14, 1862 – February 6, 1918...",http://en.wikipedia.org/wiki/Gustav_Klimt,117


In [3]:
# Split multiple genres and create a list of all genres
all_genres = [genre.strip() for sublist in df['genre'].str.split(',') for genre in sublist]

# Get unique genres
unique_genres = set(all_genres)

print("All possible genres:")
for genre in unique_genres:
    print(genre)

All possible genres:
Surrealism
Realism
Northern Renaissance
Mannerism
High Renaissance
Neoplasticism
Romanticism
Early Renaissance
Social Realism
Baroque
Suprematism
Pop Art
Cubism
Expressionism
Impressionism
Post-Impressionism
Primitivism
Byzantine Art
Symbolism
Art Nouveau
Proto Renaissance
Muralism
Abstract Expressionism
Abstractionism


In [44]:
# Get artists by genre
def preprocess_artist_names(artists):
    return [artist.replace(' ','_') for artist in artists]

def get_artists_by_genre(df, genre, verbose = False):
    artists = df[df['genre'].str.contains(genre, na=False)]['name'].to_list()
    processed_artists = preprocess_artist_names(artists)
    if verbose:
        print(f"Artists with {genre} works:")
        print(processed_artists)
    return processed_artists

# Get artists by genre
expressionism_artists = get_artists_by_genre(df, 'Expressionism', verbose= True)
surrealism_artists = get_artists_by_genre(df, 'Surrealism', verbose= True)
abstractionism_artists = get_artists_by_genre(df, 'Abstractionism', verbose= True)

Artists with Expressionism works:
['Amedeo_Modigliani', 'Vasiliy_Kandinskiy', 'Edvard_Munch', 'Paul_Klee', 'Jackson_Pollock']
Artists with Surrealism works:
['Rene_Magritte', 'Salvador_Dali', 'Frida_Kahlo', 'Paul_Klee', 'Joan_Miro']
Artists with Abstractionism works:
['Vasiliy_Kandinskiy', 'Paul_Klee']


In [None]:
import os 
import glob
from typing import List
import shutil

# Get images for artists of a specific style
def get_style_images(images_path, artists):
    all_images = []
    for artist_name in artists:
        artist_directory = os.path.join(images_path, artist_name)
        artist_images = glob.glob(os.path.join(artist_directory, '*.jpg'))
        all_images.extend(artist_images)
    return all_images

# Find all works of Expressionism artists
images_path = '../datasets/art_dataset/images'


genres_list = [expressionism_artists, 
               surrealism_artists,
               abstractionism_artists]

def create_style_specific_ds(artists_by_genre: List, images_path: str):
    """Create the training dataset for the CycleGAN model based
       on works by one or multiple art genres."""
    full_dataset = []
    destination_dir = "../datasets/art_dataset/train"
    for artists in artists_by_genre:
        style_images = get_style_images(images_path, artists)
        full_dataset.extend(style_images)
        
    # Create trainset directory
    if os.path.exists(destination_dir):
        raise FileExistsError(f"Training directory '{destination_dir}' already exists.")
    else:
        os.makedirs(destination_dir, exist_ok=False)

    for image_path in full_dataset:
        image_name = os.path.basename(image_path)
        destination_path = os.path.join(destination_dir, image_name)
        shutil.copyfile(image_path, destination_path)

    print(f"Art style dataset created with {len(full_dataset)} images!")

create_style_specific_ds(genres_list, images_path)

In [52]:
import random

def create_full_balanced_ds(unique_genres, images_path, total_images):
    """Create a dataset from all styles where each style is equally 
       represented"""
    all_artists = []
    total_styles = len(unique_genres)
    
    # Get artists for each genre
    for genre in unique_genres:
        genre_artists = get_artists_by_genre(df, genre)
        all_artists.append(genre_artists)
    
    full_dataset = []
    destination_dir = "../datasets/art_dataset/train"
    
    # Check if the destination directory already exists
    if os.path.exists(destination_dir):
        raise FileExistsError(f"Destination directory '{destination_dir}' already exists.")
    
    # Create the destination directory
    os.makedirs(destination_dir)
    
    # Calculate number of images to sample from each style
    num_images_per_style = total_images // total_styles
    
    for artists in all_artists:
        style_images = get_style_images(images_path, artists)
        
        # Sample an equal proportion of images from each style
        sampled_images = random.sample(style_images, min(num_images_per_style, len(style_images)))
        
        full_dataset.extend(sampled_images)

    # Copy images from full dataset to destination directory
    for image_path in full_dataset:
        image_name = os.path.basename(image_path)
        destination_path = os.path.join(destination_dir, image_name)
        shutil.copyfile(image_path, destination_path)

    print(f"Art styles dataset created with {len(full_dataset)} images!")

create_full_balanced_ds(unique_genres, images_path, 3680)

Art styles dataset created with 3157 images!
