In [2]:
# Import necessary libraries
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

# Step 1: Generate a Simulated Dataset for 4-Class Classification
# Generate a synthetic dataset with specific characteristics:
# - 1000 samples
# - 2 informative features
# - 0 redundant features
# - 4 classes
# - 1 cluster per class
# Set the random state to 16 for reproducibility
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_classes=4, n_clusters_per_class=1, random_state=16)

# Step 2: Split the Data into Training and Test Sets
# Split the generated dataset into training (80%) and test (20%) sets.
# Use a random state of 42 to ensure reproducibility.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Apply K-Nearest Neighbors with Hyperparameter Tuning
# Define a hyperparameter grid for the number of neighbors (k).
param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}

# Create a K-Nearest Neighbors classifier.
knn = KNeighborsClassifier()

# Perform hyperparameter tuning using 5-fold cross-validation.
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best number of neighbors (k) from the hyperparameter tuning results.
best_k = grid_search.best_params_['n_neighbors']

# Step 4: Model Validation
# Create the final K-Nearest Neighbors model using the best k.
knn_final = KNeighborsClassifier(n_neighbors=best_k)
knn_final.fit(X_train, y_train)

# Make predictions on the training and test sets.
y_train_pred = knn_final.predict(X_train)
y_test_pred = knn_final.predict(X_test)

# Step 5: Classification Reports
# Evaluate the model's performance using classification reports.
train_classification_report = classification_report(y_train, y_train_pred)
test_classification_report = classification_report(y_test, y_test_pred)

# Print the classification reports for the training and test sets.
print("Train Classification Report:\n", train_classification_report)
print("Test Classification Report:\n", test_classification_report)


Train Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.95      0.89       190
           1       0.94      0.93      0.93       204
           2       0.89      0.76      0.82       211
           3       0.88      0.92      0.90       195

    accuracy                           0.89       800
   macro avg       0.89      0.89      0.89       800
weighted avg       0.89      0.89      0.89       800

Test Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.96      0.85        55
           1       0.95      0.81      0.87        47
           2       0.83      0.67      0.74        43
           3       0.91      0.93      0.92        55

    accuracy                           0.85       200
   macro avg       0.86      0.84      0.85       200
weighted avg       0.86      0.85      0.85       200

