In [1]:
# importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.impute import SimpleImputer
from joblib import Parallel, delayed

In [2]:
# Loading data
df = pd.read_csv('Grouped.csv')
df.shape


(24016, 2401)

In [3]:
# Initialize SimpleImputer to fill null values with the mean of each column
imputer = SimpleImputer(strategy='mean')

# Perform mean imputation on the entire DataFrame
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
print("\nDataFrame after mean imputation:")
print(df_imputed)


DataFrame after mean imputation:
            x1      y1  xVel1  yVel1   xA1   yA1     xS1     yS1   xC1   yC1  \
0     -1414.14 -535.22 -17.88  -7.23  0.00  0.00    0.00    0.00  0.00  0.00   
1     -1412.93  597.54 -13.55  -5.48  0.00  0.00    0.00    0.00  0.00  0.00   
2     -1407.38   70.72 -14.37  -5.81  0.00  0.00    0.00    0.00  0.00  0.00   
3     -1407.00 -759.80  -7.59  -1.27 -0.98 -0.20    0.00    0.00  0.91  0.41   
4     -1406.36  698.39 -16.54  -6.95 -1.00  0.00 -944.07 -396.62  0.00  0.00   
...        ...     ...    ...    ...   ...   ...     ...     ...   ...   ...   
24011  1403.71  948.55   4.54  -6.29  0.00  0.00    0.00    0.00 -0.13 -0.29   
24012  1403.72  133.09   9.46  14.33  0.00  1.00    0.00    0.00  0.00  0.00   
24013  1404.38  144.31   6.98   3.89  0.00  0.00    0.00    0.00  0.00  0.00   
24014  1404.61 -315.55   6.50   4.27  0.00  0.00    0.00    0.00  0.00  0.00   
24015  1406.08 -354.52  10.00   1.18  0.00 -0.01    0.00    0.00  0.31  0.07   

     

In [4]:
# Selecting columns
x = df.drop(columns=['Class'])
y = df['Class']

In [5]:
# Train and Split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.3, random_state=42)

In [6]:
wg = ['uniform', 'distance']  # weights
p_values = list(range(1,3))  # p
n_neighbors_values = list(range(5,10) ) # n_neighbors
algorithm_values = ['auto','brute', 'ball_tree', 'kd_tree']  # algorithm
leaf_size_values = list(range(20, 40, 10))  # leaf_size
results = []


In [7]:
# KNN classifier
for wgh in wg:
    for p in p_values:
        for c in n_neighbors_values:
            for algorithm in algorithm_values:
                for leaf_size in leaf_size_values:
                    # Train KNN model with current hyperparameters
                    knn_model = KNeighborsClassifier(n_neighbors=c, weights=wgh, p=p, algorithm=algorithm, leaf_size=leaf_size)
                    knn_model.fit(x_train, y_train)
                    
                    # Make predictions on the test set
                    y_pred = knn_model.predict(x_test)
                    
                    # Calculate accuracy
                    accuracy = accuracy_score(y_test, y_pred)
                    
                    # Calculate F1 score
                    f1 = f1_score(y_test, y_pred, average='weighted')
                    
                    
                    # Print and store results
                    print(f'n_neighbors:{c} weights:{wgh} p:{p} Algorithm:{algorithm} Leaf size:{leaf_size} Accuracy: {accuracy} F1 Score: {f1}')
                    results.append({'n_neighbor': c,
                                     'weights': wgh, 
                                     'p': p, 
                                     'Algorithm': algorithm,
                                     'Leaf size': leaf_size,
                                     'Accuracy': accuracy,
                                     'F1 Score': f1})
                    
                    

df_results = pd.DataFrame(results,columns=['n_neighbor', 'weights','p', 'Algorithm', 'Leaf size','Accuracy', 'F1 Score'])

# Save the DataFrame to a CSV file
df_results.to_csv('swarm_result_one.csv', index=False)

n_neighbors:5 weights:uniform p:1 Algorithm:auto Leaf size:20 Accuracy: 1.0 F1 Score: 1.0
n_neighbors:5 weights:uniform p:1 Algorithm:auto Leaf size:30 Accuracy: 1.0 F1 Score: 1.0
n_neighbors:5 weights:uniform p:1 Algorithm:brute Leaf size:20 Accuracy: 1.0 F1 Score: 1.0
n_neighbors:5 weights:uniform p:1 Algorithm:brute Leaf size:30 Accuracy: 1.0 F1 Score: 1.0
n_neighbors:5 weights:uniform p:1 Algorithm:ball_tree Leaf size:20 Accuracy: 1.0 F1 Score: 1.0
n_neighbors:5 weights:uniform p:1 Algorithm:ball_tree Leaf size:30 Accuracy: 1.0 F1 Score: 1.0
n_neighbors:5 weights:uniform p:1 Algorithm:kd_tree Leaf size:20 Accuracy: 1.0 F1 Score: 1.0
n_neighbors:5 weights:uniform p:1 Algorithm:kd_tree Leaf size:30 Accuracy: 1.0 F1 Score: 1.0
n_neighbors:6 weights:uniform p:1 Algorithm:auto Leaf size:20 Accuracy: 1.0 F1 Score: 1.0
n_neighbors:6 weights:uniform p:1 Algorithm:auto Leaf size:30 Accuracy: 1.0 F1 Score: 1.0
n_neighbors:6 weights:uniform p:1 Algorithm:brute Leaf size:20 Accuracy: 1.0 F1 Sc