# ELM usage in Scikit-Learn ecosystem

### 1. Getting data

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [2]:
X, y = load_breast_cancer(return_X_y=True)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [4]:
from sklearn.preprocessing import RobustScaler

In [5]:
rs = RobustScaler().fit(X_train)
X_train = rs.transform(X_train)
X_test = rs.transform(X_test)

### 2. Run model

In [6]:
from skelm import ELMClassifier

In [7]:
elm = ELMClassifier()

In [8]:
elm.fit(X_train, y_train)

SOLVER forget= False
SOLVER forget= False


ELMClassifier(alpha=1e-07, batch_size=None, classes=None, density=None,
              include_original_features=False, n_neurons=None,
              pairwise_metric=None, random_state=None, ufunc='tanh')

In [9]:
elm.score(X_test, y_test)

0.9230769230769231

### 3. Tune model

In [10]:
elm2 = ELMClassifier(n_neurons=300, alpha=0.01)
elm2.fit(X_train, y_train)
elm2.score(X_test, y_test)

SOLVER forget= False
SOLVER forget= False


0.9790209790209791

In [11]:
elm3 = ELMClassifier(include_original_features=True)
elm3.fit(X_train, y_train)
elm3.score(X_test, y_test)

SOLVER forget= False
SOLVER forget= False


0.9230769230769231

In [12]:
elm4 = ELMClassifier(pairwise_metric='euclidean')
elm4.fit(X_train, y_train)
elm4.score(X_test, y_test)

SOLVER forget= False
SOLVER forget= False


0.951048951048951

### 4. Grid search for best parameters

In [13]:
from sklearn.model_selection import GridSearchCV

In [14]:
parameters = {
    "n_neurons": (None, 300),   # 'None' means automatic number
    "pairwise_metric": (None, "euclidean", "cosine"),
    "include_original_features": (True, False),
    "alpha": (1e-3, 1e-1, 1e+1),
}

In [15]:
gsv = GridSearchCV(ELMClassifier(), parameters, cv=5, iid=False, n_jobs=-1)

In [16]:
gsv.fit(X_train, y_train)

SOLVER forget= False
SOLVER forget= False


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=ELMClassifier(alpha=1e-07, batch_size=None, classes=None,
                                     density=None,
                                     include_original_features=False,
                                     n_neurons=None, pairwise_metric=None,
                                     random_state=None, ufunc='tanh'),
             iid=False, n_jobs=-1,
             param_grid={'alpha': (0.001, 0.1, 10.0),
                         'include_original_features': (True, False),
                         'n_neurons': (None, 300),
                         'pairwise_metric': (None, 'euclidean', 'cosine')},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [17]:
gsv.score(X_test, y_test)

0.972027972027972

In [20]:
gsv.best_estimator_

ELMClassifier(alpha=0.1, batch_size=None, classes=None, density=None,
              include_original_features=True, n_neurons=300,
              pairwise_metric='euclidean', random_state=None, ufunc='tanh')

In [18]:
gsv.best_params_

{'alpha': 0.1,
 'include_original_features': True,
 'n_neurons': 300,
 'pairwise_metric': 'euclidean'}

Get actual number of neurons in ELM model, including original features if used, and a bias term

In [19]:
gsv.best_estimator_.n_neurons_

330