In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier
%matplotlib inline

In [2]:
artworks = pd.read_csv('https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv')

In [3]:
# Select Columns.
artworks = artworks[['Artist', 'Nationality', 'Gender', 'Date', 'Department',
                    'DateAcquired', 'URL', 'ThumbnailURL', 'Height (cm)', 'Width (cm)']]

#convert URLs to booleans
artworks['URL'] = artworks['URL'].notnull()
artworks['ThumbnailURL'] = artworks['ThumbnailURL'].notnull()

#Drop films and some other tricky rows
artworks = artworks[artworks['Department'] != 'Film']
artworks = artworks[artworks['Department'] != 'Media and PErformance Art']
artworks = artworks[artworks['Department'] != 'Fluxus Collection']

#drop missing data
artworks = artworks.dropna()

In [5]:
#examine data types
artworks.dtypes

Artist           object
Nationality      object
Gender           object
Date             object
Department       object
DateAcquired     object
URL                bool
ThumbnailURL       bool
Height (cm)     float64
Width (cm)      float64
dtype: object

In [6]:
artworks['DateAcquired'] = pd.to_datetime(artworks.DateAcquired)
artworks['YearAcquired'] = artworks.DateAcquired.dt.year
artworks['YearAcquired'].dtype

dtype('int64')

In [7]:
#remove multiple nationalities, genders, and artists
artworks.loc[artworks['Gender'].str.contains('\) \('), 'Gender'] = '\(multiple_persons\)'
artworks.loc[artworks['Nationality'].str.contains('\) \('), 'Nationality'] = '\(multiple_nationalities\)'
artworks.loc[artworks['Artist'].str.contains(','), 'Artist'] = 'Multiple_Artists'

#convert dates to start date, cutting down number of distinct examples
artworks['Date'] = pd.Series(artworks.Date.str.extract(
    '([0-9]{4})', expand=False))[:-1]

#final column drops and NA drop
X = artworks.drop(['Department', 'DateAcquired', 'Artist', 'Nationality', 'Date'], 1)

#create dummies separately 
artists = pd.get_dummies(artworks.Artist)
nationalities = pd.get_dummies(artworks.Nationality)
dates = pd.get_dummies(artworks.Date)

#concat with other variables, but artists slows this way down, so keep it out for now
X = pd.get_dummies(X, sparse=True)
X = pd.concat([X, nationalities, dates], axis=1)

Y = artworks.Department

# One 100-perceptron layer 

In [10]:
#Establish and fit the model, with a single, 1000 perceptron
mlp = MLPClassifier(hidden_layer_sizes=(100,))
mlp.fit(X, Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [11]:
mlp.score(X, Y)

0.7112123303427651

In [12]:
Y.value_counts()

Drawings & Prints        67719
Photography              24569
Architecture & Design    12255
Painting & Sculpture      3653
Media and Performance      479
Name: Department, dtype: int64

In [14]:
cross_val_score(mlp, X, Y, cv=5)



array([0.70426941, 0.71963563, 0.6590449 , 0.67273982, 0.61687834])

# Two 100-perceptron layers

In [15]:
#Establish and fit the model, with a single, 1000 perceptron
mlp = MLPClassifier(hidden_layer_sizes=(100,100))
mlp.fit(X, Y)



MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100, 100), learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [16]:
mlp.score(X, Y)

0.8323809523809523

In [17]:
Y.value_counts()

Drawings & Prints        67719
Photography              24569
Architecture & Design    12255
Painting & Sculpture      3653
Media and Performance      479
Name: Department, dtype: int64

In [18]:
cross_val_score(mlp, X, Y, cv=5)



array([0.71264262, 0.75565881, 0.64602503, 0.67499425, 0.57279588])

# Four 50-perceptron layers

In [19]:
#Establish and fit the model, with a single, 1000 perceptron
mlp = MLPClassifier(hidden_layer_sizes=(50,50,50,50))
mlp.fit(X, Y)



MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(50, 50, 50, 50), learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [20]:
mlp.score(X, Y)

0.8275776397515527

In [21]:
Y.value_counts()

Drawings & Prints        67719
Photography              24569
Architecture & Design    12255
Painting & Sculpture      3653
Media and Performance      479
Name: Department, dtype: int64

In [22]:
cross_val_score(mlp, X, Y, cv=5)



array([0.73905042, 0.74526132, 0.68131211, 0.68465608, 0.57992822])

# Ten 20-perceptron layers

In [23]:
#Establish and fit the model, with a single, 1000 perceptron
mlp = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20,20,20,20,20,20))
mlp.fit(X, Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(20, 20, 20, 20, 20, 20, 20, 20, 20, 20),
              learning_rate='constant', learning_rate_init=0.001, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [24]:
mlp.score(X, Y)

0.7941108810674028

In [25]:
Y.value_counts()

Drawings & Prints        67719
Photography              24569
Architecture & Design    12255
Painting & Sculpture      3653
Media and Performance      479
Name: Department, dtype: int64

In [26]:
cross_val_score(mlp, X, Y, cv=5)



array([0.72529444, 0.73932646, 0.67390504, 0.65636071, 0.61628014])