# Import

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
sns.set_theme(style="darkgrid")
import matplotlib.pyplot as plt
import os
from shutil import copyfile

In [3]:
from neuralart.data import *

# Get Data

In [4]:
csv_path = "../raw_data/wikiart/csv-chan"
image_path= "../raw_data/wikiart/wikiart"
output_path = "../raw_data/wikiart"

In [5]:
data = get_data(csv_path, image_path)

In [9]:
merge_mov1={'name': 'merge_mov1',
            'dico':{'Abstract_Expressionism': None,
                 'Action_painting': None,
                 'Analytical_Cubism': None,
                 'Art_Nouveau_Modern': 'Art_Nouveau_Modern',
                 'Baroque': 'Baroque',
                 'Color_Field_Painting': None,
                 'Contemporary_Realism': None,
                 'Cubism': None,
                 'Early_Renaissance': 'Early_Renaissance',
                 'Expressionism': 'Expressionism',
                 'Fauvism': None,
                 'High_Renaissance': 'High_Renaissance',
                 'Impressionism': 'Impressionism',
                 'Mannerism_Late_Renaissance': 'Mannerism_Late_Renaissance',
                 'Minimalism': None,
                 'Naive_Art_Primitivism': 'Naive_Art_Primitivism',
                 'New_Realism': None,
                 'Northern_Renaissance': 'Northern_Renaissance',
                 'Pointillism': None,
                 'Pop_Art': None,
                 'Post_Impressionism': 'Post_Impressionism',
                 'Realism': 'Realism',
                 'Rococo': 'Rococo',
                 'Romanticism': 'Romanticism',
                 'Symbolism': 'Symbolism',
                 'Synthetic_Cubism': None,
                 'Ukiyo_e': 'Ukiyo_e'}}

In [10]:
for n in [100,1000,None]:
    for s in ['drop','max']:
        for k in [False, True]:
            for c in [merge_mov1, None]:
                get_dataset(data, target="movement", class_=c, n=n, strategy=s, 
                            random_state=123, output_path=output_path, keep_genre=k )


# Data Visualization

In [None]:
col = ['cs-split-artist','cs-split-genre','cs-split-style','path']

fig, ax = plt.subplots(1,1,figsize=(15,10))
sns.barplot(y=data[col].count().index, 
            x=data[col].count().values,
            order=data[col].count().sort_values(ascending=False).index,
            ax=ax);

for i, v in enumerate(data[col].count().sort_values(ascending=False).values):
    ax.text(v, i , str(v), color='blue', fontweight='bold')
    
ax.set_yticklabels(['total','cs-movement (style)','cs-genre','cs-artist']);
ax.set_title("Number of images per target");
ax.set_xlabel("Count");
ax.set_ylabel("Target");

In [None]:
col = ['artist','genre','movement']

fig, ax = plt.subplots(1,1,figsize=(15,10))
sns.barplot(y=data[col].nunique().index, 
            x=data[col].nunique().values,
            order=data[col].nunique().sort_values(ascending=False).index,
            ax=ax);

for i, v in enumerate(data[col].nunique().sort_values(ascending=False).values):
    ax.text(v, i , str(v), color='blue', fontweight='bold')
    
ax.set_title("Number of classes per target (all images)");
ax.set_xlabel("Count");
ax.set_ylabel("Target");

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15,10));
sns.countplot(data=data, y="movement", 
              order = data['movement'].value_counts().index,
              ax=ax);

for i, v in enumerate(data["movement"].value_counts().values):
    ax.text(v, i , str(v), color='blue', fontweight='bold')
    
ax.set_title("Number of images per movement (style)");
ax.set_ylabel("Movement (style)");

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15,10));
sns.countplot(data=dataset, y="movement", 
              order = dataset['movement'].value_counts().index,
              ax=ax);

for i, v in enumerate(dataset["movement"].value_counts().values):
    ax.text(v, i , str(v), color='blue', fontweight='bold')
    
ax.set_title("Number of images per merged movement");
ax.set_ylabel("Movement (style)");

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15,10));
sns.countplot(data=data, y="genre", 
              order = data['genre'].value_counts().index,
              ax=ax);

for i, v in enumerate(data["genre"].value_counts().values):
    ax.text(v, i , str(v), color='blue', fontweight='bold')
    
ax.set_title("Number of images per genre");

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15,10));
sns.barplot(y=data.groupby("movement").artist.nunique().index, 
            x=data.groupby("movement").artist.nunique().values,
            order=data.groupby("movement").artist.nunique().sort_values(ascending=False).index,
            ax=ax);

for i, v in enumerate(data.groupby("movement").artist.nunique().sort_values(ascending=False).values):
    ax.text(v, i , str(v), color='blue', fontweight='bold')
    
ax.set_title("Number of artists per movement");
ax.set_xlabel("Count");

In [None]:
data2 = data[data["cs-split-genre"].notnull()]

fig, ax = plt.subplots(1,1,figsize=(15,10));
sns.countplot(data=data2, y="movement", 
              order = data2['movement'].value_counts().index,
              ax=ax);

for i, v in enumerate(data2["movement"].value_counts().values):
    ax.text(v, i , str(v), color='blue', fontweight='bold')
    
ax.set_title("Number of images with genre per movement (style)");
ax.set_ylabel("Movement (style)");

In [None]:
data[data["cs-split-genre"].notnull()][data["movement"]=="Abstract_Expressionism"].head(100)

In [None]:
def show_samples(df,sample_size=10):
    root_path='../raw_data/wikiart/wikiart/'
    artist=df['artist']
    movement=df['movement']
    title=df['title']
    folder_path=list(df['path'].sample(n=sample_size))
    complete_path=[root_path + i for i in folder_path]
    fig = plt.figure(constrained_layout=True,figsize=(10,5*sample_size))
    for i in list(range(0,sample_size)):
        plt.subplot(sample_size,1,i+1)
        image=plt.imread(complete_path[i])
        label=f'{movement[i]} - {title[i]} by {artist[i]}'
        plt.text(x=10,y=-2,s=label)
        plt.imshow(image)