# Block 6 Exercise 1: Non-Linear Classification

## MNIST Data
We return to the MNIST data set on handwritten digits to compare non-linear classification algorithms ...   

In [2]:
#imports 
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import fetch_openml

In [3]:
# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
data_op = X, y

In [4]:
#the full MNIST data set contains 70k samples of digits 0-9 as 28*28 gray scale images (represented as 784 dim vectors)
np.shape(X)

(70000, 784)

In [5]:
X.min()

0.0

In [6]:
#look at max/min value in the data
X.max()

255.0

### E1.1: Cross-Validation and Support Vector Machines
Train and optimize  C-SVM classifier on MNIST (https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC)
* use a RBF kernel
* use *random search* with cross-validation to find the best settings for *gamma* and *C* (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV)

In [7]:
# Import train_test_split function
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn import svm, metrics

In [8]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # 70% training and 30% test

In [12]:
#Create a svm Classifier
clf = svm.SVC(kernel='rbf', max_iter= 15) # RBF Kernel

#Train the model using the training sets
clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)



In [13]:
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.7301904761904762


In [78]:
C = [0.20,0.40,0.60,0.80,1.0,1.1]
gamma = ["auto","scale"]

parameters = {'C': C,'gamma': gamma}

Randomized_s = RandomizedSearchCV(clf, parameters,n_iter=5,n_jobs=-2)

search = Randomized_s.fit(X_train, y_train)



In [80]:
print("The best parameters are ",search.best_params_)

The best parameters are  {'gamma': 'scale', 'C': 1.0}


### E1.2: Pipelines and simple Neural Networks
Split the MNIST data into  train- and test-sets and then train and evaluate a simple Multi Layer Perceptron (MLP) network. Since the non-linear activation functions of MLPs are sensitive to the scaling on the input (recall the *sigmoid* function), we need to scale all input values to [0,1] 

* combine all steps of your training in a SKL pipeline (https://scikit-learn.org/stable/modules/compose.html#pipeline)
* use a SKL-scaler to scale the data (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html)
* MLP Parameters: https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html#sklearn.neural_network.MLPClassifier
    * use a *SGD* solver
    * use *tanh* as activation function
    * compare networks with 1, 2 and 3 layers, use different numbers of neurons per layer
    * adjust training parameters *alpha* (regularization) and *learning rate* - how sensitive is the model to these parameters?
    * Hint: do not change all parameters at the same time, split into several experiments
* How hard is it to find the best parameters? How many experiments would you need to find the best parameters?
    


In [18]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=100)

In [65]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import numpy as np

In [25]:
make_pipeline(StandardScaler(), MLPClassifier())

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('mlpclassifier', MLPClassifier())])

In [27]:
%%time
#use a SGD solver
pipe = make_pipeline(StandardScaler(), MLPClassifier(solver='sgd'))

pipe.fit(X_train, y_train)

In [36]:
%%time
#use a SGD solver
pipe.fit(X_train, y_train)

y_pred = pipe.predict(X_test[:100])#####################

print("Accuracy:",metrics.accuracy_score(y_test[:100], y_pred))######################

Accuracy: 0.44
CPU times: user 1.31 s, sys: 19.5 ms, total: 1.33 s
Wall time: 342 ms




In [38]:
%%time
#use a tanh parameter
pipe = make_pipeline(StandardScaler(), MLPClassifier(activation='tanh',solver='sgd'))

pipe.fit(X_train, y_train)

y_pred = pipe.predict(X_test[:100])#####################

print("Accuracy:",metrics.accuracy_score(y_test[:100], y_pred))######################

Accuracy: 0.48
CPU times: user 1.32 s, sys: 20.7 ms, total: 1.34 s
Wall time: 345 ms




In [57]:
%%time
#use different networks
pipe_layer1 = make_pipeline(StandardScaler(), MLPClassifier(hidden_layer_sizes=(1,5),activation='tanh',solver='sgd'))

pipe_layer1.fit(X_train, y_train)

y_pred1 = pipe_layer1.predict(X_test[:100])#####################

print("Accuracy of layer1:",metrics.accuracy_score(y_test[:100], y_pred1))######################

#----------------------------------------------------------------------------------------------------------------
pipe_layer2 = make_pipeline(StandardScaler(), MLPClassifier(hidden_layer_sizes=(2,10),activation='tanh',solver='sgd'))

pipe_layer2.fit(X_train, y_train)

y_pred2 = pipe_layer2.predict(X_test[:100])#####################

print("Accuracy of layer2:",metrics.accuracy_score(y_test[:100], y_pred2))######################

#----------------------------------------------------------------------------------------------------------------------

pipe_layer3 = make_pipeline(StandardScaler(), MLPClassifier(hidden_layer_sizes=(3,20),activation='tanh',solver='sgd'))

pipe_layer3.fit(X_train, y_train)

y_pred3 = pipe_layer3.predict(X_test[:100])#####################

print("Accuracy of layer3:",metrics.accuracy_score(y_test[:100], y_pred3))######################





Accuracy of layer1: 0.17
Accuracy of layer2: 0.22
Accuracy of layer3: 0.24
CPU times: user 1.48 s, sys: 20.8 ms, total: 1.5 s
Wall time: 386 ms




In [70]:
%%time
#use alpha parameter
alpha_value = np.arange(0.0001,0.0010,0.0001)

for i in range(len(alpha_value)):
    pipe = make_pipeline(StandardScaler(), MLPClassifier(activation='tanh',solver='sgd',alpha=alpha_value[i]))

    pipe.fit(X_train, y_train)

    y_pred = pipe.predict(X_test[:100])#####################

    print("Accuracy with alpha parameter:",metrics.accuracy_score(y_test[:100], y_pred))######################



Accuracy with alpha parameter: 0.57




Accuracy with alpha parameter: 0.41




Accuracy with alpha parameter: 0.47




Accuracy with alpha parameter: 0.5




Accuracy with alpha parameter: 0.49




Accuracy with alpha parameter: 0.45




Accuracy with alpha parameter: 0.47




Accuracy with alpha parameter: 0.43
Accuracy with alpha parameter: 0.49
CPU times: user 11 s, sys: 138 ms, total: 11.1 s
Wall time: 2.82 s




In [77]:
%%time
#use learning rate
Learning = ["constant", "invscaling", "adaptive"]

for i in range(len(Learning)):
    pipe = make_pipeline(StandardScaler(), MLPClassifier(activation='tanh',solver='sgd',learning_rate=Learning[i]))

    pipe.fit(X_train, y_train)

    y_pred = pipe.predict(X_test[:100])#####################

    print("Accuracy with learning_rate different:",metrics.accuracy_score(y_test[:100], y_pred))######################



Accuracy with learning_rate different: 0.48




Accuracy with learning_rate different: 0.08
Accuracy with learning_rate different: 0.45
CPU times: user 3.52 s, sys: 40.8 ms, total: 3.57 s
Wall time: 903 ms


