In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
from quantbullet.model.neighbors import FeatureScaledKNNRegressor
import numpy as np
import pandas as pd
from quantbullet.model_selection.optimizer import GridSearch

In [18]:
np.random.seed(42)

def make_mahalanobis_friendly_data(n_samples=500):
    X = np.random.randn(n_samples, 3)
    # Add strong correlation
    X[:, 1] = X[:, 0] * 0.9 + np.random.randn(n_samples) * 0.1
    X[:, 2] = X[:, 0] * -0.8 + np.random.randn(n_samples) * 0.2
    y = np.sin(X[:, 0]) + 0.1 * np.random.randn(n_samples)
    return X, y

def make_euclidean_friendly_data(n_samples=500):
    X = np.random.randn(n_samples, 3)
    y = 2 * X[:, 0] + 0.5 * X[:, 1] + 0.1 * X[:, 2] + 0.2 * np.random.randn(n_samples)
    return X, y

In [19]:
# Choose your dataset
# X, y = make_mahalanobis_friendly_data()
X, y = make_euclidean_friendly_data()

In [28]:
gridSearchCvOptimizer = GridSearch(
    estimator=FeatureScaledKNNRegressor(),
    param_grid={
        'n_neighbors': [1, 3, 5, 7, 9],
        'weights': ['uniform'],
        'metrics': ['euclidean', 'mahalanobis'],
        'feature_weights': [None],
    },
    scoring='neg_mean_squared_error',
    cv=5,
    verbose=1,
)

In [29]:
gridSearchCvOptimizer.fit(X, y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


<quantbullet.model_selection.optimizer.GridSearch at 0x1a8917219d0>

In [30]:
gridSearchCvOptimizer.summary().head()

Unnamed: 0,feature_weights,metrics,n_neighbors,weights,mean_test_score,std_test_score,rank
1,,euclidean,3,uniform,-0.171741,0.019716,1
6,,mahalanobis,3,uniform,-0.178907,0.048498,2
7,,mahalanobis,5,uniform,-0.184177,0.051585,3
2,,euclidean,5,uniform,-0.186842,0.02751,4
8,,mahalanobis,7,uniform,-0.195956,0.062516,5


In [36]:
best_model = gridSearchCvOptimizer.best_model()

In [37]:
gridSearchCvOptimizer.evaluate(X, y)

{'mse': 0.06956211023813165}

In [38]:
X[0, :]

array([ 0.49671415, -0.1382643 ,  0.64768854])

In [39]:
best_model.predict_with_neighbors( X[0, :].reshape(1, -1) )

Unnamed: 0,0,1,2,request_index,prediction,neighbor_index,neighbor_rank,distance,neighbor_y
0,0.496714,-0.138264,0.647689,0,0.91086,0,0,0.0,1.144737
1,0.482472,-0.223463,0.714,0,0.91086,61,1,0.108105,0.775171
2,0.558327,0.076005,0.538756,0,0.91086,420,2,0.24833,0.812672
