In [2]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Generate a Simulated Regression Dataset
X, y = make_regression(n_samples=1000, n_features=2, noise=0.1, random_state=42)

# Step 2: Split the Data into Training and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Apply K-Nearest Neighbors with Hyperparameter Tuning
param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}
knn = KNeighborsRegressor()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best number of neighbors (k) from hyperparameter tuning
best_k = grid_search.best_params_['n_neighbors']

# Step 4: Model Validation
# Create the final KNN regression model with the best k
knn_final = KNeighborsRegressor(n_neighbors=best_k)
knn_final.fit(X_train, y_train)

# Make predictions on the training and test sets
y_train_pred = knn_final.predict(X_train)
y_test_pred = knn_final.predict(X_test)

# Step 5: Model Evaluation
# Calculate Mean Squared Error (MSE) and R-squared for both training and test sets
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

# Print the evaluation metrics
print(f"Train Mean Squared Error: {train_mse:.4f}")
print(f"Test Mean Squared Error: {test_mse:.4f}")
print(f"Train R-squared: {train_r2:.4f}")
print(f"Test R-squared: {test_r2:.4f}")


Train Mean Squared Error: 5.6193
Test Mean Squared Error: 9.3193
Train R-squared: 0.9964
Test R-squared: 0.9941
