In [None]:
# evaluate a lda model on the dataset
import numpy as np
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


In [None]:
# define dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=10, n_redundant=0, random_state=1)
np.round(X[:10], 0)

array([[ 0., -4., -2., -2.,  2.,  0.,  1.,  2.,  1.,  1.],
       [ 1.,  1., -0., -4., -3., -2., -1., -3., -0.,  1.],
       [-2., -2.,  1.,  1.,  0., -4.,  1.,  1.,  3.,  1.],
       [-1., -4., -0.,  0.,  0.,  1., -4.,  0., -2.,  2.],
       [ 3.,  3.,  4.,  2.,  2.,  2.,  1., -3.,  1.,  2.],
       [ 3.,  3.,  3., -0., -0.,  2.,  1.,  0., -2., -1.],
       [ 2., -1.,  1., -1., -0., -2.,  3., -1., -3.,  1.],
       [-5.,  1.,  1.,  1.,  1.,  0., -0., -2.,  1., -0.],
       [ 0.,  0., -3., -1., -1., -2.,  0., -2.,  3., -1.],
       [ 1.,  5., -2., -5., -0.,  4., -1., -0.,  2., -1.]])

In [None]:
y[:10]

array([0, 1, 0, 0, 0, 1, 1, 0, 0, 0])

In [None]:

# define model
model = LinearDiscriminantAnalysis()
# define model evaluation method
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=10, random_state=1)
# evaluate model
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
# summarize result
print('Mean Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean Accuracy: 0.894 (0.029)


In [None]:
scores

array([0.84, 0.86, 0.9 , 0.89, 0.87, 0.9 , 0.9 , 0.95, 0.88, 0.95, 0.9 ,
       0.87, 0.92, 0.94, 0.9 , 0.92, 0.9 , 0.85, 0.89, 0.82, 0.87, 0.9 ,
       0.84, 0.91, 0.89, 0.88, 0.9 , 0.86, 0.95, 0.94, 0.89, 0.91, 0.9 ,
       0.85, 0.92, 0.89, 0.91, 0.87, 0.91, 0.88, 0.87, 0.94, 0.89, 0.89,
       0.88, 0.87, 0.84, 0.95, 0.94, 0.89, 0.93, 0.84, 0.92, 0.91, 0.92,
       0.87, 0.9 , 0.91, 0.86, 0.88, 0.86, 0.9 , 0.92, 0.87, 0.87, 0.9 ,
       0.91, 0.87, 0.92, 0.89, 0.91, 0.88, 0.85, 0.94, 0.92, 0.92, 0.87,
       0.87, 0.87, 0.91, 0.9 , 0.91, 0.93, 0.9 , 0.88, 0.91, 0.89, 0.86,
       0.91, 0.89, 0.93, 0.91, 0.86, 0.91, 0.83, 0.88, 0.95, 0.89, 0.87,
       0.92])

In [None]:
model.fit(X, y)
# define new data
row = [0.12777556,-3.64400522,-2.23268854,-1.82114386,1.75466361,0.1243966,1.03397657,2.35822076,1.01001752,0.56768485]
# make a prediction
yhat = model.predict([row])
# summarize prediction
print('Predicted Class: %d' % yhat)

Predicted Class: 1


In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
# define dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=10, n_redundant=0, random_state=1)
# define model
model = LinearDiscriminantAnalysis()
# define model evaluation method
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# define grid
grid = dict()
grid['solver'] = ['eigen', 'svd', 'lsqr',]
# grid['solver'] = ['svd', 'lsqr', 'eigen']

In [None]:
search = GridSearchCV(model, grid, scoring='accuracy', cv=cv, n_jobs=-1)
# perform the search
results = search.fit(X, y)
# summarize
print('Mean Accuracy: %.3f' % results.best_score_)
print('Config: %s' % results.best_params_)

Mean Accuracy: 0.893
Config: {'solver': 'eigen'}


In [None]:
from numpy import arange
# define dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=10, n_redundant=0, random_state=1)
# define model
model = LinearDiscriminantAnalysis(solver='lsqr')
# define model evaluation method
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# define grid
grid = dict()
grid['shrinkage'] = arange(0, 1, 0.01)
# define search
search = GridSearchCV(model, grid, scoring='accuracy', cv=cv, n_jobs=-1)
# perform the search
results = search.fit(X, y)
# summarize
print('Mean Accuracy: %.3f' % results.best_score_)
print('Config: %s' % results.best_params_)

Mean Accuracy: 0.894
Config: {'shrinkage': 0.02}
