In [0]:
#MLP
import numpy as np
# Load the dataset from sklearn
from sklearn.datasets  import load_breast_cancer
# Use the MLP class defined in sklearn
from sklearn.neural_network import MLPClassifier
# Load the dataset
cancerDataset  =load_breast_cancer()
# Print a detailed description of the dataset
print(cancerDataset.DESCR)
# Load the attributes and target in X and y
X = cancerDataset.data
y =cancerDataset.target
# Split the data into 80% training and 20% testing
from sklearn.model_selection import train_test_split
# While splitting, make an unbiased splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.2 ,stratify=y,
random_state=42)
# Create an object (model) for MLP.
mlpClassifier=MLPClassifier(solver='lbfgs', alpha= 1e-5 , hidden_layer_sizes=( 5  ,  2  ),
random_state= 1)
mlpClassifier.fit(X_train, y_train)
score  =  mlpClassifier.score(X_test, y_test)
print(score)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry 
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 3 is Mean Radius, f

In [0]:
#random forest

In [0]:
import numpy as np
# Load the dataset from sklearn
from sklearn.datasets import fetch_covtype
# Use the MLP class defined in sklearn
from sklearn.neural_network import MLPClassifier

# When return_X_y = True, the load function
# return data and target instead of Bunch object.
X, y = fetch_covtype(return_X_y=True)


print(type(X))
# <class 'numpy.ndarray'>

print(X.shape)
# (581012, 54)
print(y.shape)
# (581012,)
# Reduce the number of attributes, consider only first 10 attributes.
X_10 = X[:,:10]

print(X_10.shape)
# (581012, 10)
# Split the data into 90% training and 10% testing
from sklearn.model_selection import train_test_split
# The 10% testing data obtained during this split will be take as our entire database.
# This is because the original dataset is too big.
X10_train, X10_test, y10_train, y10_test = train_test_split(X_10, y, test_size=0.1,
stratify=y, random_state=42)

print(X10_test.shape)
# (58102, 10)

# Handle only the modified 1% dataset. Split that into training and testing.
# X and y are updated with the downsized dataset
X = X10_test
y = y10_test

# Split the data into 80% training and 20% testing
from sklearn.model_selection import train_test_split



# While splitting, make an unbiased splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
stratify=y, random_state=42)

# Feature scaling using Standardization
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
# Scale both trainign and testing data
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# Create an object (model) for MLP.
mlpClassifier = MLPClassifier(solver='lbfgs', alpha=1e-6,
hidden_layer_sizes=(50, 25), random_state=1)

# Train the MLP using 80% training set
mlpClassifier.fit(X_train_std, y_train)
# MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
# beta_2=0.999, early_stopping=False, epsilon=1e-08,
# hidden_layer_sizes=(50, 25), learning_rate='constant',
# learning_rate_init=0.001, max_iter=200, momentum=0.9,
# n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
# random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
# validation_fraction=0.1, verbose=False, warm_start=False)

# The classification score accuracy obtained.
score = mlpClassifier.score(X_test_std, y_test)
print(score)


<class 'numpy.ndarray'>
(581012, 54)
(581012,)
(581012, 10)
(58102, 10)
0.7542380173823251


In [0]:
#WINE

In [0]:
import numpy as np
# Load the dataset from sklearn
from sklearn.datasets import load_wine
# Use the MLP class defined in sklearn
from sklearn.neural_network import MLPClassifier

# When return_X_y = True, the load function
# return data and target instead of Bunch object.
X, y = load_wine(return_X_y=True)

print(type(X))
# <class 'numpy.ndarray'>
print(X.shape)
# (178, 13)
print(y.shape)
# (178,)

# Split the data into 80% training and 20% testing
from sklearn.model_selection import train_test_split
# While splitting, make an unbiased splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
stratify=y, random_state=42)
# Create an object (model) for MLP.
mlpClassifier = MLPClassifier(solver='lbfgs', alpha=1e-5,
hidden_layer_sizes=(25, 25), random_state=1)
# Init signature: MLPClassifier(hidden_layer_sizes=(100,), activation='relu',
#solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant',
#learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True,
#random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9,
#nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1,
#beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)
# Docstring:
# Multi-layer Perceptron classifier.
# This model optimizes the log-loss function using LBFGS or stochastic
# gradient descent.
# Parameters
# ----------
# hidden_layer_sizes : tuple, length = n_layers - 2, default (100,)
# The ith element represents the number of neurons in the ith
# hidden layer.


# solver : {'lbfgs', 'sgd', 'adam'}, default 'adam'
# The solver for weight optimization.
# - 'lbfgs' is an optimizer in the family of quasi-Newton methods.
# - 'sgd' refers to stochastic gradient descent.
# - 'adam' refers to a stochastic gradient-based optimizer proposed
# by Kingma, Diederik, and Jimmy Ba
# Note: The default solver 'adam' works pretty well on relatively
# large datasets (with thousands of training samples or more) in terms of
# both training time and validation score.
# For small datasets, however, 'lbfgs' can converge faster and perform
# better.
# alpha : float, optional, default 0.0001
# L2 penalty (regularization term) parameter.

# Train the MLP using 80% training set
mlpClassifier.fit(X_train, y_train)
# MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
# beta_2=0.999, early_stopping=False, epsilon=1e-08,
# hidden_layer_sizes=(25, 25), learning_rate='constant',
# learning_rate_init=0.001, max_iter=200, momentum=0.9,
# n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
# random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
# validation_fraction=0.1, verbose=False, warm_start=False)

score = mlpClassifier.score(X_test, y_test)
# The classification score accuracy obtained.
print(score)

<class 'numpy.ndarray'>
(178, 13)
(178,)
0.9444444444444444


In [0]:
#IRIS

In [0]:
import numpy as np
# Load the dataset from sklearn
from sklearn.datasets import load_wine
# Use the MLP class defined in sklearn
from sklearn.neural_network import MLPClassifier

# When return_X_y = True, the load function
# return data and target instead of Bunch object.
X, y = load_wine(return_X_y=True)



print(type(X))
# <class 'numpy.ndarray'>
print(X.shape)
# (178, 13)
print(y.shape)
# (178,)

# Split the data into 80% training and 20% testing
from sklearn.model_selection import train_test_split
# While splitting, make an unbiased splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
stratify=y, random_state=42)
# Create an object (model) for MLP.
mlpClassifier = MLPClassifier(solver='lbfgs', alpha=1e-5,
hidden_layer_sizes=(25, 25), random_state=1)
# Init signature: MLPClassifier(hidden_layer_sizes=(100,), activation='relu',
#solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant',
#learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True,
#random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9,
#nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1,
#beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)
# Docstring:
# Multi-layer Perceptron classifier.
# This model optimizes the log-loss function using LBFGS or stochastic
# gradient descent.
# Parameters
# ----------
# hidden_layer_sizes : tuple, length = n_layers - 2, default (100,)
# The ith element represents the number of neurons in the ith
# hidden layer.



# solver : {'lbfgs', 'sgd', 'adam'}, default 'adam'
# The solver for weight optimization.
# - 'lbfgs' is an optimizer in the family of quasi-Newton methods.
# - 'sgd' refers to stochastic gradient descent.
# - 'adam' refers to a stochastic gradient-based optimizer proposed
# by Kingma, Diederik, and Jimmy Ba
# Note: The default solver 'adam' works pretty well on relatively
# large datasets (with thousands of training samples or more) in terms of
# both training time and validation score.
# For small datasets, however, 'lbfgs' can converge faster and perform
# better.
# alpha : float, optional, default 0.0001
# L2 penalty (regularization term) parameter.

# Train the MLP using 80% training set
mlpClassifier.fit(X_train, y_train)
# MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
# beta_2=0.999, early_stopping=False, epsilon=1e-08,
# hidden_layer_sizes=(25, 25), learning_rate='constant',
# learning_rate_init=0.001, max_iter=200, momentum=0.9,
# n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
# random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
# validation_fraction=0.1, verbose=False, warm_start=False)

score = mlpClassifier.score(X_test, y_test)
# The classification score accuracy obtained.
print(score)


<class 'numpy.ndarray'>
(178, 13)
(178,)
0.9444444444444444
