In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
artworks = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Artworks.csv')

In [3]:
artworks.columns

Index(['Title', 'Artist', 'ConstituentID', 'ArtistBio', 'Nationality',
       'BeginDate', 'EndDate', 'Gender', 'Date', 'Medium', 'Dimensions',
       'CreditLine', 'AccessionNumber', 'Classification', 'Department',
       'DateAcquired', 'Cataloged', 'ObjectID', 'URL', 'ThumbnailURL',
       'Circumference (cm)', 'Depth (cm)', 'Diameter (cm)', 'Height (cm)',
       'Length (cm)', 'Weight (kg)', 'Width (cm)', 'Seat Height (cm)',
       'Duration (sec.)'],
      dtype='object')

In [4]:
# Select Columns.
artworks = artworks[['Artist', 'Nationality', 'Gender', 'Date', 'Department',
                    'DateAcquired', 'URL', 'ThumbnailURL', 'Height (cm)', 'Width (cm)']]

# Convert URL's to booleans.
artworks['URL'] = artworks['URL'].notnull()
artworks['ThumbnailURL'] = artworks['ThumbnailURL'].notnull()

# Drop films and some other tricky rows.
artworks = artworks[artworks['Department']!='Film']
artworks = artworks[artworks['Department']!='Media and Performance Art']
artworks = artworks[artworks['Department']!='Fluxus Collection']

# Drop missing data.
artworks = artworks.dropna()

In [5]:
artworks.head()

Unnamed: 0,Artist,Nationality,Gender,Date,Department,DateAcquired,URL,ThumbnailURL,Height (cm),Width (cm)
0,Otto Wagner,(Austrian),(Male),1896,Architecture & Design,1996-04-09,True,True,48.6,168.9
1,Christian de Portzamparc,(French),(Male),1987,Architecture & Design,1995-01-17,True,True,40.6401,29.8451
2,Emil Hoppe,(Austrian),(Male),1903,Architecture & Design,1997-01-15,True,True,34.3,31.8
3,Bernard Tschumi,(),(Male),1980,Architecture & Design,1995-01-17,True,True,50.8,50.8
4,Emil Hoppe,(Austrian),(Male),1903,Architecture & Design,1997-01-15,True,True,38.4,19.1


In [6]:
# Get data types.
artworks.dtypes

Artist           object
Nationality      object
Gender           object
Date             object
Department       object
DateAcquired     object
URL                bool
ThumbnailURL       bool
Height (cm)     float64
Width (cm)      float64
dtype: object

In [7]:
artworks['DateAcquired'] = pd.to_datetime(artworks.DateAcquired)
artworks['YearAcquired'] = artworks.DateAcquired.dt.year
artworks['YearAcquired'].dtype

dtype('int64')

In [8]:
# Remove multiple nationalities, genders, and artists.
artworks.loc[artworks['Gender'].str.contains('\) \('), 'Gender'] = '\(multiple_persons\)'
artworks.loc[artworks['Nationality'].str.contains('\) \('), 'Nationality'] = '\(multiple_nationalities\)'
artworks.loc[artworks['Artist'].str.contains(','), 'Artist'] = 'Multiple_Artists'

# Convert dates to start date, cutting down number of distinct examples.
artworks['Date'] = pd.Series(artworks.Date.str.extract(
    '([0-9]{4})', expand=False))[:-1]

# Final column drops and NA drop.
X = artworks.drop(['Department', 'DateAcquired', 'Artist', 'Nationality', 'Date'], 1)

# Create dummies separately.
artists = pd.get_dummies(artworks.Artist)
nationalities = pd.get_dummies(artworks.Nationality)
dates = pd.get_dummies(artworks.Date)

# Concat with other variables, but artists slows this wayyyyy down so we'll keep it out for now
X = pd.get_dummies(X, sparse=True)
X = pd.concat([X, nationalities, dates], axis=1)

Y = artworks.Department

In [9]:
X_sample = X.sample(5000, random_state=123)
Y_sample = Y[X_sample.index]

## Network: 1000 neurons wide and 1 layer deep

In [10]:
# Import the model.
from sklearn.neural_network import MLPClassifier

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp = MLPClassifier(hidden_layer_sizes=(1000,))
mlp.fit(X_sample, Y_sample)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(1000,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [11]:
# Return the mean accuracy on the given test data and labels
mlp.score(X_sample, Y_sample)

0.7096

In [12]:
Y_sample.value_counts()/len(Y_sample)

Drawings & Prints        0.6224
Photography              0.2238
Architecture & Design    0.1148
Painting & Sculpture     0.0328
Media and Performance    0.0062
Name: Department, dtype: float64

In [13]:
from sklearn.model_selection import cross_val_score
cross_val_score(mlp, X_sample, Y_sample, cv=5)

array([0.697, 0.549, 0.702, 0.529, 0.598])

## Network: 500 neurons wide and 1 layer deep

In [15]:
# Establish and fit the model, with a single, 500 perceptron layer.
mlp = MLPClassifier(hidden_layer_sizes=(500,))
mlp.fit(X_sample, Y_sample)

# Return the mean accuracy on the given test data and labels
mlp.score(X_sample, Y_sample)

0.232

In [17]:
cross_val_score(mlp, X_sample, Y_sample, cv=5)

array([0.705, 0.631, 0.586, 0.662, 0.58 ])

Not as overfit as the 1000 wide one layer network, but much less accurate.

## Network: 500 neurons wide and 2 layers deep

In [18]:
# Establish and fit the model, with two, 500 perceptron layers.
mlp = MLPClassifier(hidden_layer_sizes=(500, 2))
mlp.fit(X_sample, Y_sample)

# Return the mean accuracy on the given test data and labels
mlp.score(X_sample, Y_sample)

0.6224

In [19]:
cross_val_score(mlp, X_sample, Y_sample, cv=5)



array([0.623, 0.622, 0.622, 0.622, 0.623])

More accurate than the 500 wide two layer network and nearing the accuracy of the 1000 wide one layer network. About three times as slow as the 1000 wide one layer network, but not overfit.

## Network: 500 neurons wide and 3 layers deep

In [20]:
# Establish and fit the model, with three, 500 perceptron layers.
mlp = MLPClassifier(hidden_layer_sizes=(500, 3))
mlp.fit(X_sample, Y_sample)

# Return the mean accuracy on the given test data and labels
mlp.score(X_sample, Y_sample)

0.6224

In [21]:
cross_val_score(mlp, X_sample, Y_sample, cv=5)



array([0.623, 0.698, 0.622, 0.622, 0.623])

No improvement in accuracy over the 500 neuron wide two layer network, but took a bit longer to run. Not overfit.

## Network: 1000 neurons wide and 2 layers deep

In [22]:
# Establish and fit the model, with three, 500 perceptron layers.
mlp = MLPClassifier(hidden_layer_sizes=(1000, 2))
mlp.fit(X_sample, Y_sample)

# Return the mean accuracy on the given test data and labels
mlp.score(X_sample, Y_sample)

0.6224

In [23]:
cross_val_score(mlp, X_sample, Y_sample, cv=5)



array([0.623, 0.622, 0.622, 0.622, 0.623])

No improvement in accuracy over the 500 neuron wide two layer network, but took a bit longer to run. Not overfit.

## Conclusions
"A single-layer neural network can only be used to represent linearly separable functions. This means very simple problems where, say, the two classes in a classification problem can be neatly separated by a line." 

"A Multilayer Perceptron can be used to represent convex regions. This means that in effect, they can learn to draw shapes around examples in some high-dimensional space that can separate and classify them, overcoming the limitation of linear separability."

"Since a single sufficiently large hidden layer is adequate for approximation of most functions, why would anyone ever use more? One reason hangs on the words “sufficiently large”. Although a single hidden layer is optimal for some functions, there are others for which a single-hidden-layer-solution is very inefficient compared to solutions with more layers."

(https://machinelearningmastery.com/how-to-configure-the-number-of-layers-and-nodes-in-a-neural-network/)

From my experimentation, similar accuracies can be reached with 1000 wide 1 layer deep networks and 500 wide 2 layer deep networks, with the 500 wide 2 layer deep network demonstrating significantly less overfitting. However, the trade off is computational time.