# Multi Layer (Deep) Neural Network

In [7]:
from __future__ import print_function # for python 2 and 3 compatibility

import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import linkage, dendrogram
from scipy.spatial.distance import pdist

## Load data

In [17]:
# read csv
all_charts = pd.read_csv('BillboardLyricData.txt', sep='\t', encoding='utf-8')
all_charts = all_charts.dropna()

# countvecotrize data
num_features = 500
vectorizer = CountVectorizer(max_df=0.5, min_df=0.0, max_features=num_features, stop_words='english')
X = np.asarray(vectorizer.fit_transform(all_charts.lyrics).todense()).astype(np.float32)

# y to ints
labels = np.unique(all_charts.chart).tolist()
num_labels = len(labels)
class_mapping = {label:idx for idx,label in enumerate(labels)}
y = all_charts.chart.map(class_mapping)

# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# scale
std_scaler = StandardScaler()
X_train_std = std_scaler.fit_transform(X_train)
X_test_std = std_scaler.transform(X_test)


## Fit data with MLPClassifier
### http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html

In [18]:
model = MLPClassifier(alpha=1e-5,
                      hidden_layer_sizes=(100),
                      activation='logistic',
                      batch_size=10,
                      learning_rate_init=0.01,
                      learning_rate='constant')
model.fit(X_train_std, y_train)

MLPClassifier(activation='logistic', alpha=1e-05, batch_size=10, beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 50), learning_rate='constant',
       learning_rate_init=0.01, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [16]:
# evaluate model
train_acc = model.score(X_train_std, y_train)
test_acc = model.score(X_test_std, y_test)
print('Train accuracy: {}'.format(train_acc))
print('Test accuracy: {}'.format(test_acc))

Train accuracy: 0.934837092732
Test accuracy: 0.421052631579


## Singular Value Decomposition of Hidden Representations

In [None]:
# compute hidden representations
hiddens_mat = ? # in-class exercise

In [None]:
# compute the SVD
U, s, V = np.linalg.svd(hiddens_mat)

# plot U, s, V
fig1, axarr1 = plt.subplots(1, 2)
for ax, mat, ytlabels in zip(axarr1, [U, V], [item_labels, feature_labels]):
    ax.imshow(mat, interpolation='none')
    ax.set_yticks(np.arange(V.shape[0]) + 0.5, minor=False)
    ax.set_yticklabels(ytlabels, minor=False)
    ax.set_xticks([])
    ax.plot(s)

# dimensionality reduction to 2D using first two columns of U
fig2, ax2 = plt.subplots()
ax2.plot(U[:, 0], U[:,1], "o")
ax2.set_xlabel('dim1')
ax2.set_ylabel('dim2')
plt.suptitle('Dimensionality Reduction with First 2 PCs', fontweight='bold', fontsize=14)

plt.show()

## Hierarchical Clustering of Hidden Representations

In [None]:
# compute distances, and linkages
data_dist = pdist(hiddens_mat) 
data_link = linkage(data_dist) 

# plot the cluster diagram using linkages
fig3, ax3 = plt.subplots()
dendrogram(data_link, ax=ax3)
ax3.set_xlabel('Items')
ax3.set_ylabel('Distance')
plt.suptitle('Hierarchical clustering', fontweight='bold', fontsize=14)

plt.show()