In [23]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import timeit
from sklearn.neural_network import MLPClassifier

%matplotlib inline

In [2]:
artworks = pd.read_csv('https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv')

In [3]:
# Select Columns.
artworks = artworks[['Artist', 'Nationality', 'Gender', 'Date', 'Department',
                    'DateAcquired', 'URL', 'ThumbnailURL', 'Height (cm)', 'Width (cm)']]

# Convert URL's to booleans.
artworks['URL'] = artworks['URL'].notnull()
artworks['ThumbnailURL'] = artworks['ThumbnailURL'].notnull()

# Drop films and some other tricky rows.
artworks = artworks[artworks['Department']!='Film']
artworks = artworks[artworks['Department']!='Media and Performance Art']
artworks = artworks[artworks['Department']!='Fluxus Collection']

#remove these for the sake of computational efficiency
artworks = artworks[artworks['Department']!='Architecture & Design']
artworks = artworks[artworks['Department']!='Drawings']
artworks = artworks[artworks['Department']!='Painting & Sculpture']

# Drop missing data.
artworks = artworks.dropna()

In [4]:
print(artworks.shape)
artworks.head()

(79297, 10)


Unnamed: 0,Artist,Nationality,Gender,Date,Department,DateAcquired,URL,ThumbnailURL,Height (cm),Width (cm)
7955,K. P. Brehmer,(German),(Male),1971,Prints & Illustrated Books,1973-01-04,True,True,14.9,16.6
7956,"Various Artists, Wolf Vostell, Sigmar Polke, K...",(Various) (German) (German) (German) (German) ...,() (Male) (Male) (Male) (Male) (Male) (Male),1971,Prints & Illustrated Books,1973-01-04,True,True,20.8,24.0
7957,K. H. Hödicke,(German),(Male),1971,Prints & Illustrated Books,1973-01-04,True,True,10.8,8.1
7958,Konrad Lueg,(German),(Male),1971,Prints & Illustrated Books,1973-01-04,True,True,2.6,2.0
7959,Unknown Artist,(Nationality Unknown),(),c. 1920,Prints & Illustrated Books,2001-01-24,True,True,18.7,44.8


In [5]:
# Get data types.
artworks.dtypes

Artist           object
Nationality      object
Gender           object
Date             object
Department       object
DateAcquired     object
URL                bool
ThumbnailURL       bool
Height (cm)     float64
Width (cm)      float64
dtype: object

In [6]:
artworks['DateAcquired'] = pd.to_datetime(artworks.DateAcquired)
artworks['YearAcquired'] = artworks.DateAcquired.dt.year
artworks['YearAcquired'].dtype

dtype('int64')

In [7]:
# Remove multiple nationalities, genders, and artists.
artworks.loc[artworks['Gender'].str.contains('\) \('), 'Gender'] = '\(multiple_persons\)'
artworks.loc[artworks['Nationality'].str.contains('\) \('), 'Nationality'] = '\(multiple_nationalities\)'
artworks.loc[artworks['Artist'].str.contains(','), 'Artist'] = 'Multiple_Artists'

# Convert dates to start date, cutting down number of distinct examples.
artworks['Date'] = pd.Series(artworks.Date.str.extract(
    '([0-9]{4})', expand=False))[:-1]

# Final column drops and NA drop.
X = artworks.drop(['Department', 'DateAcquired', 'Artist', 'Nationality', 'Date'], 1)

# Create dummies separately.
artists = pd.get_dummies(artworks.Artist)
nationalities = pd.get_dummies(artworks.Nationality)
dates = pd.get_dummies(artworks.Date)

# Concat with other variables, but artists slows this wayyyyy down so we'll keep it out for now
X = pd.get_dummies(X, sparse=True)
X = pd.concat([X, nationalities, dates], axis=1)

Y = artworks.Department

In [10]:
Xt, Xz, Yt, Yz = train_test_split(X,Y, test_size=0.1,random_state=42)

## Config 1: 1 Layer, 1000 

In [71]:
# Alright! We've done our prep, let's build the model.
# Neural networks are hugely computationally intensive.
# This may take several minutes to run.

start = timeit.default_timer()

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp1 = MLPClassifier(hidden_layer_sizes=(1000,),random_state=42)
mlp1.fit(Xz, Yz)

stop = timeit.default_timer()
runtime1 = stop - start
print('Runtime: ',runtime1)

Runtime:  8.933158241767842


In [78]:
mlp1.score(Xz, Yz)

0.6102143757881463

In [79]:
from sklearn.model_selection import cross_val_score
cross_val_score(mlp1, Xz, Yz, cv=5)

array([0.68978562, 0.75598991, 0.75094578, 0.70554855, 0.73266078])

## Config 2: 1 Layer, 1000 with alpha = 0.5

In [53]:
# Alright! We've done our prep, let's build the model.
# Neural networks are hugely computationally intensive.
# This may take several minutes to run.

start = timeit.default_timer()

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp2 = MLPClassifier(hidden_layer_sizes=(1000,),alpha=0.5,random_state=42)
mlp2.fit(Xz, Yz)

stop = timeit.default_timer()
runtime2 = stop - start
print('Runtime: ',runtime2)

Runtime:  7.140291721827225


In [54]:
mlp2.score(Xz, Yz)

0.6915510718789407

In [55]:
from sklearn.model_selection import cross_val_score
cross_val_score(mlp2, Xz, Yz, cv=5)

array([0.73896595, 0.31525851, 0.6998739 , 0.71941992, 0.70996217])

Increasing alpha decreases runtime and increases performance. Of course, this is only for this particular data, as the alpha coefficient reduces the size of very large parameters.

## Config 3: 1 Layer, 1000 with activation = logistic

In [59]:
# Alright! We've done our prep, let's build the model.
# Neural networks are hugely computationally intensive.
# This may take several minutes to run.

start = timeit.default_timer()

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp3 = MLPClassifier(hidden_layer_sizes=(1000,),activation='logistic',random_state=42)
mlp3.fit(Xz, Yz)

stop = timeit.default_timer()
runtime3 = stop - start
print('Runtime: ',runtime3)

Runtime:  69.90398368867204


In [60]:
mlp3.score(Xz, Yz)

0.7601513240857504

In [61]:
from sklearn.model_selection import cross_val_score
cross_val_score(mlp3, Xz, Yz, cv=5)

array([0.69167718, 0.69167718, 0.73076923, 0.69167718, 0.69167718])

Moving to a logistic activation function increases the runtime dramatically, but also increases performance. This makes sense at this activation function allows for continuous outputs, thus more accuracy for much more computation resources used.

## Config 4: 1 Layer, 100

In [75]:
start = timeit.default_timer()

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp4 = MLPClassifier(hidden_layer_sizes=(100,),random_state=42)
mlp4.fit(Xz, Yz)

stop = timeit.default_timer()
runtime4 = stop - start
print('Runtime: ',runtime4)

Runtime:  1.7770648221999181


In [76]:
mlp4.score(Xz, Yz)

0.7064312736443884

In [77]:
from sklearn.model_selection import cross_val_score
cross_val_score(mlp4, Xz, Yz, cv=5)

array([0.7629256 , 0.71941992, 0.6443884 , 0.74842371, 0.78184111])

As expected, reducing the width of the first layer greater reduced the runtime. Unexpectedly, it also increased the accuracy.

## Config 5: 3 Layers, 1000, 100, 10

In [62]:
start = timeit.default_timer()

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp5 = MLPClassifier(hidden_layer_sizes=(1000,100,10),random_state=42)
mlp5.fit(Xz, Yz)

stop = timeit.default_timer()
runtime5 = stop - start
print('Runtime: ',runtime5)

Runtime:  11.405581517726205


In [63]:
mlp5.score(Xz, Yz)

0.35182849936948296

In [64]:
from sklearn.model_selection import cross_val_score
cross_val_score(mlp5, Xz, Yz, cv=5)

array([0.32786885, 0.31021438, 0.69167718, 0.69167718, 0.69041614])

Adding in extra layers did increase runtime, but not by that much.  Interestingly, it also drastically reduced the accuracy.

## Config 6: 3 Layers, 100, 100, 100

In [68]:
start = timeit.default_timer()

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp6 = MLPClassifier(hidden_layer_sizes=(100,100,100),random_state=42)
mlp6.fit(Xz, Yz)

stop = timeit.default_timer()
runtime6 = stop - start
print('Runtime: ',runtime6)

Runtime:  1.5941980375680487


In [69]:
mlp6.score(Xz, Yz)

0.3566204287515763

In [70]:
from sklearn.model_selection import cross_val_score
cross_val_score(mlp6, Xz, Yz, cv=5)

array([0.68852459, 0.69546028, 0.69167718, 0.69167718, 0.69041614])

These configuration has very similar results to the previous one, despite having very different later layouts. This could be because they both have the same amount of neuron connection in total. 

## Config 7: 7 Layers, 10, 10, 10, 10, 10, 10, 10

In [49]:
start = timeit.default_timer()

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp7 = MLPClassifier(hidden_layer_sizes=(10,10,10,10,10,10,10,),random_state=42)
mlp7.fit(Xz, Yz)

stop = timeit.default_timer()
runtime7 = stop - start
print('Runtime: ',runtime7)

Runtime:  1.2823904581418901


In [50]:
mlp7.score(Xz, Yz)

0.6916771752837326

In [51]:
from sklearn.model_selection import cross_val_score
cross_val_score(mlp7, Xz, Yz, cv=5)

array([0.69167718, 0.69167718, 0.74842371, 0.69167718, 0.69167718])

This configuration has the lowest runtime, while still having the 3rd highest accuracy, and being very close to the top 2 as well.  Having multiple layers with small groups seems to be a good configuration for this data.

# Conclusion

I found increasing alpha can drastically increae the performance and accuracy of MLP. Additionally, the compuation tradeoff for the logistic activation function is probably too great for the accuracy boost it gives. 

The number of layers is not anywhere near as important as the total number of neurons, which seems to be much more important when it comes to dictating accuracy and runtime. Also, more neurons does not neccessarily equate to more accuracy. The optimal layer and neuron configuration appears to be very dependent on the data.