### Perceptrons and SGD  
In scikit learn, there is a (one layer) perceptron binary classifier that is the basis of multi-layer neural nets. It is similar to stochastic gradient descent SGD). Here, we run both on the whole Iris dataset focusing on classifying one species against the rest.  
Scikit-learn has multi-layer perceptrons as well but they are not suitable for heavy processing. Here, we use them on the breast cancer set (binary) and Iris (3 classes)

In [126]:
import numpy as np 
from sklearn.linear_model import SGDClassifier, Perceptron
from sklearn.datasets import load_iris 
iris = load_iris()
X = iris.data
y = (iris.target == 0).astype(np.int) # is it setosa

In [127]:
clf_p = Perceptron(random_state = 777)
clf_s = SGDClassifier()
clf_p.fit(X,y)
clf_s.fit(X,y)
print(clf_p.predict([[5.1,3.3,1.5,.3]]))
print(clf_s.predict([[3.1,3.3,1.5,1.3]]))

[1]
[1]


In [128]:
print(clf_s, clf_s.coef_, clf_s.intercept_)
print(clf_p, clf_p.coef_, clf_p.intercept_)

SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,
       penalty='l2', power_t=0.5, random_state=None, shuffle=True,
       verbose=0, warm_start=False) [[  7.43281875  21.72670097 -34.30531732 -14.86563751]] [ 9.92051582]
Perceptron(alpha=0.0001, class_weight=None, eta0=1.0, fit_intercept=True,
      n_iter=5, n_jobs=1, penalty=None, random_state=777, shuffle=True,
      verbose=0, warm_start=False) [[ 2.6  5.9 -8.9 -3.9]] [ 1.]


### Multi-layer perceptrons

In [129]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
scaler = StandardScaler()
X = cancer['data']
y = cancer['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, random_state=707)  

In [130]:
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [131]:
# 3 layers, with 90, 60 and 30 neurons 
mlp = MLPClassifier(hidden_layer_sizes=(90,60,30), activation='tanh', max_iter=300) 
mlp.fit(X_train,y_train)

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(90, 60, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [132]:
predictions = mlp.predict(X_test)
print(confusion_matrix(y_test,predictions))
print(classification_report(y_test,predictions))

[[51  2]
 [ 2 88]]
             precision    recall  f1-score   support

          0       0.96      0.96      0.96        53
          1       0.98      0.98      0.98        90

avg / total       0.97      0.97      0.97       143



In [133]:
#print(mlp.coefs_[3]) # for each layer neurons, weights
#print(mlp.intercepts_[0]) # for each layer, biases per neuron

### 3 classes, iris data

In [153]:
X2 = iris.data  # iris defined above
y2 = iris.target

In [156]:
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, stratify=y2, train_size=0.75, random_state=717)
scaler2 = StandardScaler()
scaler2.fit(X2_train)
X2_train = scaler2.transform(X2_train)
X2_test = scaler2.transform(X2_test)
mlp2 = MLPClassifier(solver='lbfgs',hidden_layer_sizes=(4,8,4,8,4), 
                     activation='tanh', max_iter=300, random_state = 0) # 
mlp2.fit(X2_train,y2_train)
predictions2 = mlp2.predict(X2_test)
print(confusion_matrix(y2_test,predictions2))
print(classification_report(y2_test,predictions2))

[[12  0  0]
 [ 0 13  0]
 [ 0  0 13]]
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        12
          1       1.00      1.00      1.00        13
          2       1.00      1.00      1.00        13

avg / total       1.00      1.00      1.00        38



In [136]:
mlp

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(90, 60, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

### Note:   
This does not mean we achieved perfect prediction. Change the splitted samples using another random state and see.