# How to use nearest neighbours for Classification

In [1]:
## How to use nearest neighbours for Classification
def Snippet_155(): 
    print()
    print(format('## How to use nearest neighbours for Classification','*^82'))  
    
    import warnings
    warnings.filterwarnings("ignore")
    
    # load libraries
    from sklearn import decomposition, datasets
    from sklearn import neighbors
    from sklearn.pipeline import Pipeline
    from sklearn.model_selection import GridSearchCV, cross_val_score
    from sklearn.preprocessing import StandardScaler
    
    # Load the iris flower data
    dataset = datasets.make_classification(n_samples=1000, n_features=20, n_informative=5, 
                n_redundant=2, n_repeated=0, n_classes=10, n_clusters_per_class=2, 
                weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, 
                scale=1.0, shuffle=True, random_state=None)
    X = dataset[0]
    y = dataset[1]
    
#     print(y)
    
    # Create an scaler object
    sc = StandardScaler()
    # Create a pca object
    pca = decomposition.PCA()
    # Create a logistic regression object with an L2 penalty
    KNN = neighbors.KNeighborsClassifier()
    # Create a pipeline of three steps. First, standardize the data.
    # Second, tranform the data with PCA.
    # Third, train a Decision Tree Classifier on the data.
    pipe = Pipeline(steps=[('sc', sc), 
                           ('pca', pca), 
                           ('KNN', KNN)])
    
    # Create Parameter Space
    # Create a list of a sequence of integers from 1 to 30 (the number of features in X + 1)
    n_components = list(range(1,X.shape[1]+1,1))
    # Create lists of parameter for KNeighborsRegressor()
    n_neighbors = [2, 3, 5, 10]
    algorithm = ['auto',  'ball_tree', 'kd_tree', 'brute']
    # Create a dictionary of all the parameter options 
    # Note has you can access the parameters of steps of a pipeline by using '__’
    parameters = dict(pca__n_components=n_components,
                      KNN__n_neighbors=n_neighbors,
                      KNN__algorithm=algorithm)
    
    # Conduct Parameter Optmization With Pipeline
    # Create a grid search object
    clf = GridSearchCV(pipe, parameters)
    # Fit the grid search
    clf.fit(X, y)
    
    # View The Best Parameters
    print('Best Number Of Components:', clf.best_estimator_.get_params()['pca__n_components'])
    print(); print(clf.best_estimator_.get_params()['KNN'])
    
    # Use Cross Validation To Evaluate Model
    CV_Result = cross_val_score(clf, X, y, cv=3, n_jobs=-1, scoring='accuracy', verbose=1)
    print(); print(CV_Result)
    print(); print(CV_Result.mean())
    print(); print(CV_Result.std())    
    
Snippet_155()


***************## How to use nearest neighbours for Classification****************
[3 6 2 9 0 3 2 3 8 7 7 3 3 1 4 2 9 3 4 1 9 5 0 2 6 5 1 0 3 9 4 8 2 6 5 9 5
 5 6 9 5 8 3 8 5 8 0 8 9 2 2 1 1 3 5 9 7 7 7 5 6 5 8 5 7 9 8 0 7 0 9 6 2 3
 9 4 4 0 8 8 2 1 5 7 0 7 4 5 8 2 0 9 7 2 6 5 9 1 1 8 1 9 3 4 7 2 6 5 2 2 7
 2 3 9 3 5 4 7 5 1 0 9 0 0 5 5 9 9 1 3 5 4 8 6 3 3 7 3 6 7 6 7 3 7 0 7 2 0
 1 9 5 8 1 4 3 9 7 8 5 2 9 2 6 6 9 2 4 9 2 0 6 1 7 9 4 4 8 2 2 8 1 1 1 0 6
 1 1 6 0 5 3 6 9 6 8 0 0 6 4 8 6 3 4 8 0 3 0 6 2 8 8 3 8 5 3 3 4 0 7 8 4 9
 9 9 4 3 9 4 6 8 4 3 7 6 1 3 3 1 5 6 8 2 5 5 2 4 3 0 6 8 4 4 0 6 2 6 7 7 8
 5 9 2 6 5 0 8 5 9 3 2 1 1 6 3 6 4 7 5 3 8 0 8 6 5 5 6 2 9 5 0 4 5 1 9 4 2
 1 9 3 8 8 6 0 0 7 7 8 1 9 9 3 6 7 3 1 2 0 3 9 0 2 4 1 0 3 0 0 2 5 6 0 0 0
 7 4 8 6 8 1 8 9 8 6 4 1 5 0 6 3 8 7 0 5 7 3 1 1 4 2 3 7 2 2 3 5 4 6 6 4 0
 2 2 8 5 5 0 4 1 9 6 5 9 2 1 5 9 4 1 2 4 1 1 9 0 0 2 9 7 6 8 7 1 2 4 8 0 5
 4 7 8 2 7 3 6 0 2 5 3 5 4 1 9 0 8 3 2 2 3 6 2 5 0 2 6 1 6 3 2 4 0 7 3 9 6
 6 5 9 2 3 9 8 2

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.



[0.33827893 0.24550898 0.31914894]

0.3009789499856276

0.03999314267040688


[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   19.5s finished
