In [1]:
from google.colab import drive

In [18]:
import pandas as pd
import numpy as np
import os
import re
import shutil

In [3]:
# Step 1: Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


We want to delete the empty folders. First I want to create a list of the folders that will be deleted.

In [4]:
# Define the path to your main dataset folder
dataset_path = '/content/drive/MyDrive/colab_data/wikiart_dataset'

# Initialize a list to collect empty folders
empty_folders = []

# Traverse the directory structure from bottom-up
for root, dirs, files in os.walk(dataset_path, topdown=False):
    # If a folder is empty, add it to the list
    if not files and not dirs:
        empty_folders.append(root)

# Display the list of empty folders that would be deleted
print("Folders that would be deleted:")
for folder in empty_folders:
    print(folder)

Folders that would be deleted:
/content/drive/MyDrive/colab_data/wikiart_dataset/Baroque
/content/drive/MyDrive/colab_data/wikiart_dataset/Color_Field_Painting
/content/drive/MyDrive/colab_data/wikiart_dataset/Contemporary_Realism
/content/drive/MyDrive/colab_data/wikiart_dataset/Minimalism
/content/drive/MyDrive/colab_data/wikiart_dataset/New_Realism
/content/drive/MyDrive/colab_data/wikiart_dataset/Northern_Renaissance
/content/drive/MyDrive/colab_data/wikiart_dataset/Rococo
/content/drive/MyDrive/colab_data/wikiart_dataset/Romanticism


In [5]:
# Define the path to your main dataset folder
dataset_path = '/content/drive/MyDrive/colab_data/wikiart_dataset'

# Traverse the directory structure from bottom-up to safely remove empty folders
for root, dirs, files in os.walk(dataset_path, topdown=False):
    # If a folder is empty (i.e., has no files or subdirectories), remove it
    if not files and not dirs:
        os.rmdir(root)  # Delete the empty folder
        print(f"Deleted empty folder: {root}")

print("All empty folders have been deleted.")

Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Baroque
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Color_Field_Painting
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Contemporary_Realism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Minimalism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/New_Realism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Northern_Renaissance
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Rococo
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Romanticism
All empty folders have been deleted.


Now we'll count the number of paintings to double check that.

In [6]:
file_count = 0
for root, _, files in os.walk(dataset_path):
    for filename in files:
        # Check if it's a file
        if os.path.isfile(os.path.join(root, filename)):
            file_count += 1
print(file_count)

6561


We'll create folders for the painters.

In [8]:
# Define the path to the main dataset folder
dataset_path = '/content/drive/MyDrive/colab_data/wikiart_dataset'

# List of folder names to create
painter_folders = [
    'claude monet', 'pierre auguste renoir', 'vincent van gogh',
    'paul cezanne', 'pablo picasso', 'georges braque',
    'salvador dali', 'rene magritte'
]

In [9]:
# Create each folder in the specified path
for painter in painter_folders:
    folder_path = os.path.join(dataset_path, painter)
    os.makedirs(folder_path, exist_ok=True)  # Creates folder if it doesn't already exist
    print(f"Created folder: {folder_path}")

print("All specified folders have been created.")

Created folder: /content/drive/MyDrive/colab_data/wikiart_dataset/claude monet
Created folder: /content/drive/MyDrive/colab_data/wikiart_dataset/pierre auguste renoir
Created folder: /content/drive/MyDrive/colab_data/wikiart_dataset/vincent van gogh
Created folder: /content/drive/MyDrive/colab_data/wikiart_dataset/paul cezanne
Created folder: /content/drive/MyDrive/colab_data/wikiart_dataset/pablo picasso
Created folder: /content/drive/MyDrive/colab_data/wikiart_dataset/georges braque
Created folder: /content/drive/MyDrive/colab_data/wikiart_dataset/salvador dali
Created folder: /content/drive/MyDrive/colab_data/wikiart_dataset/rene magritte
All specified folders have been created.


In [16]:
filtered_df= pd.read_csv('/content/drive/MyDrive/colab_data/filtered_data.csv')

In [17]:
filtered_df.head()

Unnamed: 0,painting,normalized_painting,genre,painter
0,georges-braque_a-girl(1).jpg,georges braque a girl 1 jpg,analytical_cubism,georges braque
1,georges-braque_bottle-and-fishes-1910.jpg,georges braque bottle and fishes 1910 jpg,analytical_cubism,georges braque
2,georges-braque_castle-at-la-roche-guyon-1909.jpg,georges braque castle at la roche guyon 1909 jpg,analytical_cubism,georges braque
3,georges-braque_clarinet-and-bottle-of-rum-on-a...,georges braque clarinet and bottle of rum on a...,analytical_cubism,georges braque
4,georges-braque_fruitdish-and-glass-1912.jpg,georges braque fruitdish and glass 1912 jpg,analytical_cubism,georges braque


In [20]:
# Iterate over files in the dataset folder and move them to corresponding painter folders
for root, _, files in os.walk(dataset_path):
    for filename in files:
        # Find the painter for this file based on 'filtered_df'
        painter_row = filtered_df[filtered_df['painting'] == filename]

        # If a corresponding painter is found and is valid
        if not painter_row.empty:
            painter_name = painter_row['painter'].values[0]

            # Check if painter_name is a valid string (not NaN or empty)
            if isinstance(painter_name, str) and painter_name:
                source_path = os.path.join(root, filename)
                destination_path = os.path.join(dataset_path, painter_name, filename)

                # Move the file to the corresponding painter's folder
                shutil.move(source_path, destination_path)
                print(f"Moved {filename} to {destination_path}")
            else:
                print(f"Skipped {filename} due to missing painter information.")

print("All paintings have been moved to their corresponding painter folders.")





[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Moved pierre-auguste-renoir_le-jardin-de-la-poste-cagnes-1906.jpg to /content/drive/MyDrive/colab_data/wikiart_dataset/pierre auguste renoir/pierre-auguste-renoir_le-jardin-de-la-poste-cagnes-1906.jpg
Moved pierre-auguste-renoir_le-place-clichy.jpg to /content/drive/MyDrive/colab_data/wikiart_dataset/pierre auguste renoir/pierre-auguste-renoir_le-place-clichy.jpg
Moved pierre-auguste-renoir_le-poste-at-cagnes-study-1905.jpg to /content/drive/MyDrive/colab_data/wikiart_dataset/pierre auguste renoir/pierre-auguste-renoir_le-poste-at-cagnes-study-1905.jpg
Moved pierre-auguste-renoir_leaving-the-bath-1890.jpg to /content/drive/MyDrive/colab_data/wikiart_dataset/pierre auguste renoir/pierre-auguste-renoir_leaving-the-bath-1890.jpg
Moved pierre-auguste-renoir_leaving-the-conservatoire-1877.jpg to /content/drive/MyDrive/colab_data/wikiart_dataset/pierre auguste renoir/pierre-auguste-renoir_leaving-the-conservatoire-1877.jpg
Move

In [21]:
# Define the path to your main dataset folder
dataset_path = '/content/drive/MyDrive/colab_data/wikiart_dataset'

# Initialize a list to collect empty folders
empty_folders = []

# Traverse the directory structure from bottom-up
for root, dirs, files in os.walk(dataset_path, topdown=False):
    # If a folder is empty, add it to the list
    if not files and not dirs:
        empty_folders.append(root)

# Display the list of empty folders that would be deleted
print("Folders that would be deleted:")
for folder in empty_folders:
    print(folder)

Folders that would be deleted:
/content/drive/MyDrive/colab_data/wikiart_dataset/Analytical_Cubism
/content/drive/MyDrive/colab_data/wikiart_dataset/Art_Nouveau_Modern
/content/drive/MyDrive/colab_data/wikiart_dataset/Cubism
/content/drive/MyDrive/colab_data/wikiart_dataset/Expressionism
/content/drive/MyDrive/colab_data/wikiart_dataset/Fauvism
/content/drive/MyDrive/colab_data/wikiart_dataset/Impressionism
/content/drive/MyDrive/colab_data/wikiart_dataset/Naive_Art_Primitivism
/content/drive/MyDrive/colab_data/wikiart_dataset/Pointillism
/content/drive/MyDrive/colab_data/wikiart_dataset/Realism
/content/drive/MyDrive/colab_data/wikiart_dataset/Abstract_Expressionism


Now we delete the folders. I will delete Post_Impressionism folder manually. There was paul-gauguin painting.

In [22]:
# Define the path to your main dataset folder
dataset_path = '/content/drive/MyDrive/colab_data/wikiart_dataset'

# Traverse the directory structure from bottom-up to safely remove empty folders
for root, dirs, files in os.walk(dataset_path, topdown=False):
    # If a folder is empty (i.e., has no files or subdirectories), remove it
    if not files and not dirs:
        os.rmdir(root)  # Delete the empty folder
        print(f"Deleted empty folder: {root}")

print("All empty folders have been deleted.")

Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Analytical_Cubism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Art_Nouveau_Modern
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Cubism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Expressionism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Fauvism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Impressionism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Naive_Art_Primitivism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Pointillism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Realism
Deleted empty folder: /content/drive/MyDrive/colab_data/wikiart_dataset/Abstract_Expressionism
All empty folders have been deleted.


Now to double check we'll count the number of paintings again.

In [23]:
file_count = 0
for root, _, files in os.walk(dataset_path):
    for filename in files:
        # Check if it's a file
        if os.path.isfile(os.path.join(root, filename)):
            file_count += 1
print(file_count)

6468


113 paintings are missing.