In [None]:
# Adapted from sample digits recognition client on Scikit-Learn site.

import numpy as np
from sklearn import linear_model, datasets
from sklearn.cross_validation import train_test_split
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.lda import LDA


In [None]:
# import some data to play with
iris = datasets.load_iris()
iris.viewkeys()



In [36]:
features = iris.data
print(features)

[[ 5.1  3.5  1.4  0.2]
 [ 4.9  3.   1.4  0.2]
 [ 4.7  3.2  1.3  0.2]
 [ 4.6  3.1  1.5  0.2]
 [ 5.   3.6  1.4  0.2]
 [ 5.4  3.9  1.7  0.4]
 [ 4.6  3.4  1.4  0.3]
 [ 5.   3.4  1.5  0.2]
 [ 4.4  2.9  1.4  0.2]
 [ 4.9  3.1  1.5  0.1]
 [ 5.4  3.7  1.5  0.2]
 [ 4.8  3.4  1.6  0.2]
 [ 4.8  3.   1.4  0.1]
 [ 4.3  3.   1.1  0.1]
 [ 5.8  4.   1.2  0.2]
 [ 5.7  4.4  1.5  0.4]
 [ 5.4  3.9  1.3  0.4]
 [ 5.1  3.5  1.4  0.3]
 [ 5.7  3.8  1.7  0.3]
 [ 5.1  3.8  1.5  0.3]
 [ 5.4  3.4  1.7  0.2]
 [ 5.1  3.7  1.5  0.4]
 [ 4.6  3.6  1.   0.2]
 [ 5.1  3.3  1.7  0.5]
 [ 4.8  3.4  1.9  0.2]
 [ 5.   3.   1.6  0.2]
 [ 5.   3.4  1.6  0.4]
 [ 5.2  3.5  1.5  0.2]
 [ 5.2  3.4  1.4  0.2]
 [ 4.7  3.2  1.6  0.2]
 [ 4.8  3.1  1.6  0.2]
 [ 5.4  3.4  1.5  0.4]
 [ 5.2  4.1  1.5  0.1]
 [ 5.5  4.2  1.4  0.2]
 [ 4.9  3.1  1.5  0.1]
 [ 5.   3.2  1.2  0.2]
 [ 5.5  3.5  1.3  0.2]
 [ 4.9  3.1  1.5  0.1]
 [ 4.4  3.   1.3  0.2]
 [ 5.1  3.4  1.5  0.2]
 [ 5.   3.5  1.3  0.3]
 [ 4.5  2.3  1.3  0.3]
 [ 4.4  3.2  1.3  0.2]
 [ 5.   3.5

In [43]:
target = iris.target
print(target)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [45]:
X = iris.data  # we only take the first two features.
Y = iris.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=10)

In [46]:
print('X:')
print(X)
print
print('Y:')
print(Y)

X:
[[ 5.1  3.5  1.4  0.2]
 [ 4.9  3.   1.4  0.2]
 [ 4.7  3.2  1.3  0.2]
 [ 4.6  3.1  1.5  0.2]
 [ 5.   3.6  1.4  0.2]
 [ 5.4  3.9  1.7  0.4]
 [ 4.6  3.4  1.4  0.3]
 [ 5.   3.4  1.5  0.2]
 [ 4.4  2.9  1.4  0.2]
 [ 4.9  3.1  1.5  0.1]
 [ 5.4  3.7  1.5  0.2]
 [ 4.8  3.4  1.6  0.2]
 [ 4.8  3.   1.4  0.1]
 [ 4.3  3.   1.1  0.1]
 [ 5.8  4.   1.2  0.2]
 [ 5.7  4.4  1.5  0.4]
 [ 5.4  3.9  1.3  0.4]
 [ 5.1  3.5  1.4  0.3]
 [ 5.7  3.8  1.7  0.3]
 [ 5.1  3.8  1.5  0.3]
 [ 5.4  3.4  1.7  0.2]
 [ 5.1  3.7  1.5  0.4]
 [ 4.6  3.6  1.   0.2]
 [ 5.1  3.3  1.7  0.5]
 [ 4.8  3.4  1.9  0.2]
 [ 5.   3.   1.6  0.2]
 [ 5.   3.4  1.6  0.4]
 [ 5.2  3.5  1.5  0.2]
 [ 5.2  3.4  1.4  0.2]
 [ 4.7  3.2  1.6  0.2]
 [ 4.8  3.1  1.6  0.2]
 [ 5.4  3.4  1.5  0.4]
 [ 5.2  4.1  1.5  0.1]
 [ 5.5  4.2  1.4  0.2]
 [ 4.9  3.1  1.5  0.1]
 [ 5.   3.2  1.2  0.2]
 [ 5.5  3.5  1.3  0.2]
 [ 4.9  3.1  1.5  0.1]
 [ 4.4  3.   1.3  0.2]
 [ 5.1  3.4  1.5  0.2]
 [ 5.   3.5  1.3  0.3]
 [ 4.5  2.3  1.3  0.3]
 [ 4.4  3.2  1.3  0.2]
 [ 5.   

In [47]:
print('X_train:')
print(X_train)
print
print('y_train:')
print(Y_train)

X_train:
[[ 6.6  2.9  4.6  1.3]
 [ 6.2  2.9  4.3  1.3]
 [ 7.2  3.   5.8  1.6]
 [ 5.8  2.8  5.1  2.4]
 [ 6.3  2.5  5.   1.9]
 [ 4.6  3.2  1.4  0.2]
 [ 6.7  3.3  5.7  2.1]
 [ 6.9  3.2  5.7  2.3]
 [ 7.7  2.6  6.9  2.3]
 [ 6.9  3.1  5.1  2.3]
 [ 5.   3.4  1.6  0.4]
 [ 5.   3.5  1.6  0.6]
 [ 5.2  2.7  3.9  1.4]
 [ 4.5  2.3  1.3  0.3]
 [ 6.3  3.3  4.7  1.6]
 [ 5.2  4.1  1.5  0.1]
 [ 6.9  3.1  4.9  1.5]
 [ 5.9  3.2  4.8  1.8]
 [ 5.6  2.8  4.9  2. ]
 [ 6.7  3.3  5.7  2.5]
 [ 6.2  2.2  4.5  1.5]
 [ 7.2  3.6  6.1  2.5]
 [ 5.5  2.4  3.7  1. ]
 [ 6.   2.9  4.5  1.5]
 [ 6.4  3.2  4.5  1.5]
 [ 5.8  4.   1.2  0.2]
 [ 5.3  3.7  1.5  0.2]
 [ 6.1  2.9  4.7  1.4]
 [ 5.4  3.4  1.7  0.2]
 [ 6.4  3.1  5.5  1.8]
 [ 4.7  3.2  1.6  0.2]
 [ 4.6  3.1  1.5  0.2]
 [ 4.9  2.5  4.5  1.7]
 [ 5.1  2.5  3.   1.1]
 [ 6.7  3.1  5.6  2.4]
 [ 4.9  3.1  1.5  0.1]
 [ 6.9  3.1  5.4  2.1]
 [ 4.8  3.   1.4  0.3]
 [ 5.8  2.7  3.9  1.2]
 [ 6.   2.7  5.1  1.6]
 [ 4.4  3.2  1.3  0.2]
 [ 6.8  3.2  5.9  2.3]
 [ 7.9  3.8  6.4  2. ]
 [

In [48]:
print('X_test:')
print(X_test)
print
print('y_test:')
print(Y_test)

X_test:
[[ 6.3  2.3  4.4  1.3]
 [ 6.4  2.7  5.3  1.9]
 [ 5.4  3.7  1.5  0.2]
 [ 6.1  3.   4.6  1.4]
 [ 5.   3.3  1.4  0.2]
 [ 5.   2.   3.5  1. ]
 [ 6.3  2.5  4.9  1.5]
 [ 5.8  2.7  4.1  1. ]
 [ 5.1  3.4  1.5  0.2]
 [ 5.7  2.8  4.5  1.3]
 [ 5.6  3.   4.5  1.5]
 [ 5.8  2.7  5.1  1.9]
 [ 5.5  2.3  4.   1.3]
 [ 4.9  3.   1.4  0.2]
 [ 5.1  3.8  1.5  0.3]
 [ 6.8  3.   5.5  2.1]
 [ 6.   3.4  4.5  1.6]
 [ 4.4  3.   1.3  0.2]
 [ 5.1  3.7  1.5  0.4]
 [ 5.   3.2  1.2  0.2]
 [ 7.1  3.   5.9  2.1]
 [ 6.4  2.8  5.6  2.2]
 [ 6.2  2.8  4.8  1.8]
 [ 4.8  3.4  1.9  0.2]
 [ 5.9  3.   4.2  1.5]
 [ 4.7  3.2  1.3  0.2]
 [ 5.7  3.   4.2  1.2]
 [ 5.5  2.6  4.4  1.2]
 [ 6.8  2.8  4.8  1.4]
 [ 7.7  3.8  6.7  2.2]]

y_test:
[1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2]


In [56]:

# Models we will use
rbm = BernoulliRBM(n_iter=50,random_state=None, verbose=True)
logistic = linear_model.LogisticRegression()
classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])
lda = LDA(n_components=3)



In [57]:
#########################################################################

# Training RBM-Logistic Pipeline
logistic.fit(X_train, Y_train)
classifier.fit(X_train, Y_train)


[BernoulliRBM] Iteration 1, pseudo-likelihood = -22.67, time = 0.00s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -20.20, time = 0.00s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -16.91, time = 0.01s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -51.97, time = 0.01s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -62.19, time = 0.01s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -71.51, time = 0.01s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -76.74, time = 0.01s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -72.90, time = 0.01s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -85.93, time = 0.00s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -108.94, time = 0.01s
[BernoulliRBM] Iteration 11, pseudo-likelihood = -114.69, time = 0.01s
[BernoulliRBM] Iteration 12, pseudo-likelihood = -97.83, time = 0.01s
[BernoulliRBM] Iteration 13, pseudo-likelihood = -158.78, time = 0.01s
[BernoulliRBM] Iteration 14, pseudo-likelihood = -151.87, time = 0.01s
[BernoulliRBM] Iteration 

Pipeline(steps=[('rbm', BernoulliRBM(batch_size=10, learning_rate=0.1, n_components=256, n_iter=50,
       random_state=None, verbose=True)), ('logistic', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))])

In [55]:

#########################################################################

# Get predictions
print "The RBM model:"
print "Predict: ", classifier.predict(X_test)
print "Real:    ", Y_test

print

print "Linear Discriminant Analysis: "
lda.fit(X_train, Y_train)
print "Predict: ", lda.predict(X_test)
print "Real:    ", Y_test 

The RBM model:
Predict:  [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
Real:     [1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2]

Linear Discriminant Analysis: 
Predict:  [1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2]
Real:     [1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2]
