#knn Classifier

Example 1

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier

# Step 1: Define Training Data# -----------------------------
X = np.array([[1,2],[2,3],[3,3],[6,5],[7,7],[8,6]]) # Feature values (2D points)
y = np.array([0,0,0,1,1,1]) # Corresponding class labels (0 or 1)

#Step 2: Create and Train Model# 
k = 3 # Number of nearest neighbors

knn = KNeighborsClassifier(n_neighbors=k) # Initialize KNN classifier
knn.fit(X, y) # Train the classifier using the dataset (X, y)

# Step 3: Predict for a New Sample
sample1 = np.array([[3,5]]) # New data point to classify
predicted_class1 = knn.predict(sample1)
print("Predicted class for [3,5]:", predicted_class1)

sample2 = np.array([[6,5]]) # New data point to classify
predicted_class2 = knn.predict(sample2)
print("Predicted class for [6,5]:", predicted_class2)

Predicted class for [3,5]: [0]
Predicted class for [6,5]: [1]


#knn Example 2

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix 
from sklearn.metrics import precision_score, recall_score, f1_score 
from sklearn.neighbors import KNeighborsClassifier
data = pd.read_csv("apndcts.csv")
predictors = data.iloc[:,0:7] #Seggretating the predictor variables …
target = data.iloc[:,7] #Seggretating the target / class variable …
predictors_train, predictors_test, target_train, target_test = train_test_split(predictors, target, test_size = 0.3, random_state = 123)
knn = KNeighborsClassifier(n_neighbors= 3) # Instantiate the model with 3 neighbors

# First train model / classifier with the input dataset (training data part of it)
knn_model = knn.fit(predictors_train, target_train)
predictors = knn_model.predict(predictors_test)

print("\n--- KNN Results ---")
print("Accuracy:", accuracy_score(predictors, target_test))     
print("Precision:", precision_score(predictors, target_test, average='macro', zero_division=0))  
    # Average precision across all classes
print("Recall:", recall_score(predictors, target_test, average='macro', zero_division=0))  
    # Average recall across all classes
print("F1-score:", f1_score(predictors, target_test, average='macro', zero_division=0))  
    # Average F1-score across all classes
    

    # Confusion Matrix
cm = confusion_matrix(predictors, target_test)  
print(cm)
    # Matrix showing counts of actual vs predicted labels
plt.figure(figsize=(4,3))  
    # Set figure size
import seaborn as sns
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')  
    # Heatmap with annotation, integer format, blue colormap
plt.title("Confusion Matrix")  
    # Set title dynamically based on model
plt.xlabel("Predicted")  
plt.ylabel("Actual")  
plt.show()  
    # Display confusion matrix

FileNotFoundError: [Errno 2] No such file or directory: 'apndcts.csv'

Decision Tree Example

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
data = pd.read_csv("apndcts.csv")
predictors = data.iloc[:,0:7] # Seggretating the predictor variables …
target = data.iloc[:,7] # Seggretating the target / class variable …
predictors_train, predictors_test, target_train, target_test = train_test_split(predictors, target, test_size = 0.3, random_state = 123)
dtree_entropy = DecisionTreeClassifier(criterion ="entropy", random_state = 100, max_depth=3, min_samples_leaf=5)
# First train model / classifier with the input dataset (training data part of it)
model = dtree_entropy.fit(predictors_train, target_train)  
# Make prediction using the trained model
prediction = model.predict(predictors_test)
# Time to check the prediction accuracy …
accuracy_score(target_test, prediction, normalize = True)

#Random Forest Example

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
data = pd.read_csv("apndcts.csv")
predictors = data.iloc[:,0:7] # Seggretating the predictor variables …
target = data.iloc[:,7] # Seggretating the target / class variable …
predictors_train, predictors_test, target_train, target_test = train_test_split(predictors, target, test_size = 0.3, random_state = 123)
rf = RandomForestClassifier()
# First train model / classifier with the input dataset (training data part of it)
model = rf.fit(predictors_train, target_train)  
# Make prediction using the trained model
prediction = model.predict(predictors_test)
# Time to check the prediction accuracy …
print(accuracy_score(target_test, prediction, normalize = True))
print(confusion_matrix(target_test, prediction))

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn import svm
data = pd.read_csv("apndcts.csv")
predictors = data.iloc[:,0:7] # Seggretating the predictor variables …
target = data.iloc[:,7] # Seggretating the target / class variable …
predictors_train, predictors_test, target_train, target_test = train_test_split(predictors, target, test_size = 0.3, random_state = 123)

svm = svm.SVC(kernel="linear") # SVC with linear kernel
# First train model / classifier with the input dataset (training data part of it)
model = svm.fit(predictors_train, target_train)  
# Make prediction using the trained model
prediction = model.predict(predictors_test)
# Time to check the prediction accuracy …
print(accuracy_score(target_test, prediction, normalize = True))
print(confusion_matrix(target_test, prediction))

In [None]:
#Other Kernels
rbf_svc = svm.SVC (kernel = 'rbf', gamma = 0.7, C =1.0)
poly_svc = svm.SVC (kernel = 'poly', degree = 3, C= 1.0)