# Artist representation

The goal of this notebook is to generate an artist representation based on his work

In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
BASE_PATH = '/root/work/datasets/'
BASE_SEQUENCE_PATH = os.path.join(BASE_PATH, 'artwork_sequence')


In [None]:
sequence_path={
    
    'base_path' : BASE_SEQUENCE_PATH,
    'path_metadata' : os.path.join(BASE_SEQUENCE_PATH, 'all_metadata.csv'),
    'path_matrix' : os.path.join(BASE_SEQUENCE_PATH, 'all_code_matrix.npy'),
    'result_artist_list' : os.path.join(BASE_SEQUENCE_PATH, 'all_artists.csv'),
    'result_artist_matrix' : os.path.join(BASE_SEQUENCE_PATH, 'all_artist_code_matrix'),
    'result_metadata_artist_matrix' : os.path.join(BASE_SEQUENCE_PATH, 'all_metadata_artist_code_matrix')
}


In [None]:
all_data_path={
    'base_path' : BASE_PATH,
    'path_metadata' : os.path.join(BASE_PATH, 'train_mayors_style_encoded.csv'),
    'path_matrix' : os.path.join(BASE_PATH, 'train_mayors_style_encode.npy'),
    'result_artist_list' : os.path.join(BASE_PATH, 'all_artists.csv'),
    'result_artist_matrix' : os.path.join(BASE_PATH, 'all_artist_code_matrix'),
    'result_metadata_artist_matrix' : os.path.join(BASE_PATH, 'train_mayors_style_artist_code_matrix')
}


In [None]:
work_path = all_data_path

## Load dataset

In [None]:
def get_all_metadata(path_metadata, path_matrix):
    df_all_metadata = pd.read_csv(path_metadata)
    
    all_metadata_matrix = np.load(path_matrix)
    
    return (df_all_metadata, all_metadata_matrix)

**Load metadata**

In [None]:
df_all_metadata, all_metadata_matrix = get_all_metadata(work_path['path_metadata'], work_path['path_matrix'])

df_all_metadata = df_all_metadata.rename(columns={"artist": "author"})

In [None]:
print(df_all_metadata.shape)
print(all_metadata_matrix.shape)

In [None]:
df_all_metadata.head()

### Get artists

In [None]:
artists = df_all_metadata['author']
artists = artists.drop_duplicates()
artists = artists.to_frame()
artists.shape

### Compute artist's work mean

In [None]:
def get_artist_work_mean(artist, df_all_metadata,all_metadata_matrix):
    
    #Search artist's work
    df_artist_work = df_all_metadata[df_all_metadata['author'] == artist]
    artist_work_matrix = all_metadata_matrix[df_artist_work.index]
    
    #Compute the work mean
    artist_work_mean = np.mean(artist_work_matrix, axis=0)
    
    return artist_work_mean
    

In [None]:
artists['work mean'] = artists['author'].apply(get_artist_work_mean, args=(df_all_metadata, all_metadata_matrix))
artists.head()

## Add artist's work mean to the dataset

In [None]:
df_all_metadata = pd.merge(df_all_metadata, artists, left_on='author', right_on='author', how='left')
df_all_metadata.head()

In [None]:
df_all_metadata.shape

### Save data

In [None]:
artists_to_save = artists['author'].to_frame()
artists_to_save.to_csv(work_path['result_artist_list'], index=False)

In [None]:
artists_code_list = list(artists['work mean'].values)
artists_code_matrix = np.vstack(artists_code_list)
np.save(work_path['result_artist_matrix'], artists_code_matrix)

In [None]:
artists_metadata_code_list = list(df_all_metadata['work mean'].values)
artists_metadata_code_matrix = np.vstack(artists_metadata_code_list)
np.save(work_path['result_metadata_artist_matrix'], artists_metadata_code_matrix)