In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier

# Load the dataset
data = pd.read_csv("/content/gender_classification_v7 (1).csv")

# Encode the target variable
data['gender'] = LabelEncoder().fit_transform(data['gender'])

# Separate features and target variable
X = data.drop('gender', axis=1)
y = data['gender']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the KNN model
# n_neighbors: number of neighbors to use
# metric: distance metric (e.g., 'minkowski')
# p: parameter for the Minkowski metric (p=2 is equivalent to Euclidean distance)
knn = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
knn.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 96.20%
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.98      0.96       502
           1       0.98      0.95      0.96       499

    accuracy                           0.96      1001
   macro avg       0.96      0.96      0.96      1001
weighted avg       0.96      0.96      0.96      1001

Confusion Matrix:
 [[491  11]
 [ 27 472]]
