# Artist representation

The goal of this notebook is to generate an artist representation based on his work

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
BASE_PATH = '/root/work/datasets/'
BASE_SEQUENCE_PATH = os.path.join(BASE_PATH, 'artwork_sequence')
EMBEDDINGS_PATH = os.path.join(BASE_PATH, 'Word Embeddings Pretrained Vectors')

## Load dataset

**Load metadata**

In [3]:
df_all_metadata = pd.read_csv(os.path.join(BASE_SEQUENCE_PATH, 'all_metadata.csv'))
df_all_metadata.head()

Unnamed: 0,id,author,data,image_url,title,tour_path
0,1,n.v. haagsche plateelfabriek rozenburg,1914,https://lh3.googleusercontent.com/IJn7rB4WvYvv...,vaas beschilderd met paarse seringen en op een...,/root/work/datasets/artwork_sequence/rijksmuse...
1,2,n.v. haagsche plateelfabriek rozenburg,1900,https://lh3.googleusercontent.com/l0ccWh5aCgP5...,vaas,/root/work/datasets/artwork_sequence/rijksmuse...
2,3,n.v. haagsche plateelfabriek rozenburg,1900,https://lh3.googleusercontent.com/uNQWFg-BhiPZ...,vase with lily decoration,/root/work/datasets/artwork_sequence/rijksmuse...
3,4,n.v. haagsche plateelfabriek rozenburg,1902,https://lh3.googleusercontent.com/QRdRjQDGyvDp...,vaas beschilderd met takken met seringen en ee...,/root/work/datasets/artwork_sequence/rijksmuse...
4,6,theo colenbrander,1886,https://lh3.googleusercontent.com/TZqVQVxb-1kl...,garniture of five vases,/root/work/datasets/artwork_sequence/rijksmuse...


In [4]:
df_all_metadata.shape

(633, 6)

**Load artwork's code**

In [5]:
all_metadata_matrix = np.load(os.path.join(BASE_SEQUENCE_PATH, 'all_code_matrix.npy'))
all_metadata_matrix.shape

(633, 300)

### Get artists

In [6]:
artists = df_all_metadata['author']
artists = artists.drop_duplicates()
artists = artists.to_frame()
artists.shape

(356, 1)

### Compute artist's work mean

In [7]:
def get_artist_work_mean(artist, df_all_metadata,all_metadata_matrix):
    
    #Search artist's work
    df_artist_work = df_all_metadata[df_all_metadata['author'] == artist]
    artist_work_matrix = all_metadata_matrix[df_artist_work.index]
    
    #Compute the work mean
    artist_work_mean = np.mean(artist_work_matrix, axis=0)
    
    return artist_work_mean
    

In [8]:
artists['work mean'] = artists['author'].apply(get_artist_work_mean, args=(df_all_metadata, all_metadata_matrix))
artists.head()

Unnamed: 0,author,work mean
0,n.v. haagsche plateelfabriek rozenburg,"[-0.107390165, 0.19273052, 1.124429, 0.8956701..."
4,theo colenbrander,"[-0.051470514, 0.067177355, 0.8284261, 0.66352..."
5,manufacture de sevres,"[-0.07540219, 0.18844432, 1.2512336, 0.6224126..."
6,rene lalique,"[0.01522399, 0.15122974, 1.1221374, 0.7420414,..."
7,lucien gaillard,"[-0.055912737, 0.052373398, 1.2759172, 0.86524..."


### Save data

In [10]:
artists_to_save = artists['author'].to_frame()
artists_to_save.to_csv(os.path.join(BASE_SEQUENCE_PATH, 'all_artists.csv'), index=False)

In [17]:
artists_code_list = list(artists['work mean'].values)
artists_code_matrix = np.vstack(artists_code_list)

In [18]:
artists_code_matrix.shape

(356, 300)