# Supervised neural nets

In [4]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
%matplotlib inline

artworks = pd.read_csv('https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv')
artworks.columns

Index(['Title', 'Artist', 'ConstituentID', 'ArtistBio', 'Nationality',
       'BeginDate', 'EndDate', 'Gender', 'Date', 'Medium', 'Dimensions',
       'CreditLine', 'AccessionNumber', 'Classification', 'Department',
       'DateAcquired', 'Cataloged', 'ObjectID', 'URL', 'ThumbnailURL',
       'Circumference (cm)', 'Depth (cm)', 'Diameter (cm)', 'Height (cm)',
       'Length (cm)', 'Weight (kg)', 'Width (cm)', 'Seat Height (cm)',
       'Duration (sec.)'],
      dtype='object')

In [5]:
# Select Columns.
artworks = artworks[['Artist', 'Nationality', 'Gender', 'Date', 'Department',
                    'DateAcquired', 'URL', 'ThumbnailURL', 'Height (cm)', 'Width (cm)']]

# Convert URL's to booleans.
artworks['URL'] = artworks['URL'].notnull()
artworks['ThumbnailURL'] = artworks['ThumbnailURL'].notnull()

# Drop films and some other tricky rows.
artworks = artworks[artworks['Department']!='Film']
artworks = artworks[artworks['Department']!='Media and Performance Art']
artworks = artworks[artworks['Department']!='Fluxus Collection']

# Drop missing data.
artworks = artworks.dropna()

In [19]:
artworks['DateAcquired'] = pd.to_datetime(artworks.DateAcquired)
artworks['YearAcquired'] = artworks.DateAcquired.dt.year

# Remove multiple nationalities, genders, and artists.
artworks.loc[artworks['Gender'].str.contains('\) \('), 'Gender'] = '\(multiple_persons\)'
artworks.loc[artworks['Nationality'].str.contains('\) \('), 'Nationality'] = '\(multiple_nationalities\)'
artworks.loc[artworks['Artist'].str.contains(','), 'Artist'] = 'Multiple_Artists'

# Convert dates to start date, cutting down number of distinct examples.
artworks['Date'] = pd.Series(artworks.Date.str.extract(
    '([0-9]{4})', expand=False))[:-1]

# Final column drops and NA drop.
X = artworks.drop(['Department', 'DateAcquired', 'Artist', 'Nationality', 'Date'], 1)

# Create dummies separately.
artists = pd.get_dummies(artworks.Artist)
nationalities = pd.get_dummies(artworks.Nationality)
dates = pd.get_dummies(artworks.Date)

# Concat with other variables, but artists slows this wayyyyy down so we'll keep it out for now
X = pd.get_dummies(X, sparse=True)
X = pd.concat([X, nationalities, dates], axis=1)

Y = artworks.Department

X.head()

Unnamed: 0,URL,ThumbnailURL,Height (cm),Width (cm),YearAcquired,Gender_(),Gender_(Female),Gender_(Male),Gender_(male),Gender_\(multiple_persons\),...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,True,True,48.6,168.9,1996,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,True,True,40.6401,29.8451,1995,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,True,True,34.3,31.8,1997,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,True,True,50.8,50.8,1995,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,True,True,38.4,19.1,1997,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(1000,))
mlp.fit(X, Y)

In [None]:
mlp.score(X, Y)

In [None]:
Y.value_counts()/len(Y)

In [None]:
cross_val_score(mlp, X, Y, cv=5)

We included one parameter: hidden layer size. How many layers to include is determined by two things: computational resources and cross validation searching for convergence. It's generally < # of input variables.

You can also set an alpha. Neural networks like this use a regularization parameter that penalizes large coefficients – alpha scales that penalty.

Lastly, the activation function determines whether the output from an individual perceptron is binary or continuous. By default, this is a 'relu' ('rectified linear unit function') function. Sigmoid (called 'logistic' by SKLearn, because it's a 'logistic sigmoid function') is a reasonable alternative. The sigmoid allows for continuous variables between 0 and 1, which results in a more nuanced model. It does come at the cost of increased computational complexity.

## Drill: Playing with layers

Experiment with different hidden layer structures. You can try this on a subset of the data to improve runtime. See how things vary. See what seems to matter the most. Feel free to manipulate other parameters as well. It may also be beneficial to do some real feature selection work.

In [20]:
X = X[:math.floor(X.shape[0]/2)]
Y = Y[:math.floor(Y.shape[0]/2)]

print(X.shape, Y.shape)

(52667, 314) (52667,)


In [21]:
mlp_2_lyrs = MLPClassifier(hidden_layer_sizes=(1000,1000))
mlp_2_lyrs.fit(X, Y)
cross_val_score(mlp_2_lyrs, X, Y, cv=5)

array([0.62572378, 0.6886273 , 0.71565556, 0.77992974, 0.44948728])

In [22]:
# alpha : float, optional, default 0.0001
mlp_lower_alpha = MLPClassifier(hidden_layer_sizes=(1000,), alpha=0.00001)
mlp_lower_alpha.fit(X, Y)
cross_val_score(mlp_lower_alpha, X, Y, cv=5)

array([0.58224964, 0.64828175, 0.53640938, 0.44308364, 0.4736992 ])

In [23]:
mlp_higher_alpha = MLPClassifier(hidden_layer_sizes=(1000,), alpha=0.001)
mlp_higher_alpha.fit(X, Y)
cross_val_score(mlp_higher_alpha, X, Y, cv=5)

array([0.59155197, 0.48091893, 0.76511915, 0.76654325, 0.46771743])

In [24]:
# activation – default 'relu'
mlp_sigmoid = MLPClassifier(hidden_layer_sizes=(1000,), activation='logistic')
mlp_sigmoid.fit(X, Y)
cross_val_score(mlp_sigmoid, X, Y, cv=5)

array([0.62857143, 0.69128536, 0.77328396, 0.79882275, 0.43163692])