In [None]:
%matplotlib inline
import numpy as np
import sklearn.datasets as data
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sb; sb.set_style( 'darkgrid' ) # use whitegrid if prefer a white background
import pandas as pd

from numpy.random import SeedSequence, default_rng
rng = default_rng( SeedSequence().entropy )

import warnings
warnings.filterwarnings("ignore")

#matplotlib.rcParams.update( { 'font.size': 18 } ) # Use this to setup your preferred font size for plotting

#### 1- Use a Linear SVM to fit a model on the Iris dataset. Try different train/test partitions and $C$ values and plot the found supporting vectors along with the 0-1 loss error. Also compute how many support vectors are used on a given train/test split to generate the separating hyperplane. 

+ It will be easier to use only two features at a time from the data and in a binary classification context. Also try out different feature and class combinations to see how the model behaves with different feature options. For example: sepal (width vs length) | petal (width vs length ) | sepal width vs petal width | ...
+ Use the supporting vectors to extract the used data from the training set and examine the features (for example, comparing mean values between classes, mean absolute deviance, ... ). Compare also the initial training set with the  extracted data.
+ Recommend using Pandas and Seaborn libraries to to some explorative data plotting

### Iris data

Number of Instances
150 (50 in each of three classes)

Number of Attributes
4 numeric, predictive attributes and the class

Attribute Information
sepal length in cm

sepal width in cm

petal length in cm

petal width in cm

class:
Iris-Setosa

Iris-Versicolour

Iris-Virginica

In [None]:
iris = data.load_iris()
X = iris.data
y = iris.target

X = X[y != 0, :2] # Sepal width and length columns.
y = y[y != 0]     # Versicolour vs Virginicia

In [None]:
sb.pairplot( data = pd.DataFrame( X ) );

In [None]:
from sklearn.svm import SVC # See help( SVC ) for class details
from sklearn.model_selection import train_test_split
from sklearn.metrics import zero_one_loss

np.random.seed( 241542 )

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size = 0.2, random_state = rng.integers( 421 ) )

C   = 1.0
clf = SVC( kernel = 'linear', C = C)
clf.fit( X_train, y_train )


plt.figure( figsize = ( 10, 7 ) )
plt.clf()


# Plot support vectors
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=80,
                facecolors='none', zorder=10, edgecolors='r')
    
plt.scatter(X[:, 0], X[:, 1], c=y, zorder=10, cmap=plt.cm.Paired,
                edgecolor='k', s=20)

# Circle out the test data
plt.scatter(X_test[:, 0], X_test[:, 1], s=80, facecolors='none',
                zorder=10, edgecolor='g')

plt.axis('tight')
x_min = X[:, 0].min()
x_max = X[:, 0].max()
y_min = X[:, 1].min()
y_max = X[:, 1].max()

XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])

# Put the result into a color plot
Z = Z.reshape(XX.shape)
plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
                linestyles=['--', '-', '--'], levels=[-.5, 0, .5])


plt.title( 'Linear SVM, C = {0}, 0-1 loss {1}'.format( C, zero_one_loss( y_test, clf.predict( X_test ) ) ) )
plt.show()


In [None]:
Xsv = X_train[ clf.support_ ]
Xdf = pd.DataFrame( Xsv, columns = [ 'sepal width', 'sepal length' ] )
Xdf.head()

In [None]:
sb.pairplot( data = Xdf );

#### 2- Use A Neural Network to model digital images. Use different layer sizes, activation functions, and regularizations and examine how these affect the resulting weights and train & test scores. What reveals more structure in the weights? Feel free to vary anyother settings in the model. 
+ Bonus: Experiment how adding Gaussian and Laplacian noise affects the regularization and structures of the gradients in the layers.
  + np.random.laplace, np.random.normal

In [None]:
from sklearn.exceptions import ConvergenceWarning
from sklearn.neural_network import MLPClassifier # Use help( MLPClassifier )


Xmnist, Ymnist = data.fetch_openml('mnist_784', version=1, return_X_y=True)

In [None]:
noise_lap = np.random.laplace( 0, 1, Xmnist.shape )
Xmnist = Xmnist + noise_lap
Xminst = Xmnist / 255.
mlp = MLPClassifier( hidden_layer_sizes = 30, activation = 'relu', max_iter = 100 )

X_train, X_test, y_train, y_test = train_test_split( Xmnist, Ymnist, test_size=0.1, random_state= rng.integers( 521 ) )

# This is for catching and ignoring if fitting produces an convergence issue.
# Comment away if you want verbose warnings.
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning,
                            module="sklearn")
    mlp.fit(X_train, y_train)
    
print( "Training score: {0}".format( mlp.score( X_train, y_train ) ) )
print( "Test score: {0}".format( mlp.score(X_test, y_test ) ) )

fig, axes = plt.subplots(4, 4, figsize = ( 10, 7 ) )
# use global min / max to ensure all weights are shown on the same scale
vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()
for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):
    ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=.5 * vmin,
               vmax=.5 * vmax)
    ax.set_xticks(())
    ax.set_yticks(())

plt.show()

#### 3- Experiment the effects of regularization on SVMs using both the $C$ and $gamma$ parameters. Use the Iris dataset and observe how the the regularization parameter affects the model fitting. Plot the the best coefficient into the train and error curves, and report the number of support vectors
+ Add Gaussian noise to the observations to see how regularization affects the results
+ Fix either C or gamma and vary the other parameter
+ Use also different train/test splits
+ Bonus: extract the support vector indicises, like in Task 1, and inpsect the statistics of found support vectors

In [None]:
X, Y = data.load_iris( return_X_y = True )

X += np.random.normal( 0, 11, X.shape )

Xtrain, Xtest, ytrain, ytest = train_test_split( X, Y, test_size = 0.4, random_state = rng.integers( 42 ) )

param_range = np.logspace( -10, 1, 10 ) # gamma parameter
C           = 122.0
svc         = SVC( C = C, kernel = 'linear' )
train_e     = []
test_e      = []

for a in param_range:
    svc.set_params( gamma = a )
    svc.fit( Xtrain, ytrain )
    train_e.append( svc.score( Xtrain, ytrain ) )
    test_e.append( svc.score( Xtest, ytest ) )
    
i_optim = np.argmax(test_e)
optim   = param_range[i_optim]
print("Optimal regularization parameter : %s" % optim)

svc.set_params( gamma = optim)
svc.fit( X, Y )
print( "Number of support vectors per class:")
print( "Class 0: {0} | Class 1: {1}".format( svc.n_support_[ 0 ], svc.n_support_[ 1 ] ) )

##### Plot here ther train and error curves

In [None]:
plt.figure( figsize = ( 17, 10 ) )
plt.semilogx(param_range, train_e, label='Train')
plt.semilogx(param_range, test_e, label='Test')
plt.vlines(optim, plt.ylim()[0], np.max(test_e), color='k',
           linewidth=5, label='Optimum on test')
plt.legend(loc='lower left')
plt.ylim([min(test_e)-0.1, max(train_e)+0.1])
plt.xlabel('Regularization parameter')
plt.ylabel('Performance');

##### Plot here the decision surface

In [None]:
plt.figure( figsize = ( 10, 7 ) )
plt.clf()
# Plot support vectors
plt.scatter(svc.support_vectors_[:, 0], svc.support_vectors_[:, 1], s=80,
                facecolors='none', zorder=10, edgecolors='r')
    
plt.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired,
                edgecolor='k', s=20)

plt.axis('tight')
x_min = X[:, 0].min()
x_max = X[:, 0].max()
y_min = X[:, 1].min()
y_max = X[:, 1].max()

XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])

# Put the result into a color plot
Z = Z.reshape(XX.shape)
plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
                linestyles=['--', '-', '--'], levels=[-.5, 0, .5])


plt.title( 'Linear SVM, C = {0}, gamma = {1}'.format( C, optim ) )
plt.show()