In [5]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder,StandardScaler

In [10]:
# Step 1: Load and preprocess the data
data = sns.load_dataset('iris')  # Loading the dataset
X = data.drop('species', axis=1)  # Features (sepal and petal dimensions)
y = data['species']  # Target labels (variety of Iris flowers)
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [11]:
# Encode the categorical target labels into numerical values for the model
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [12]:
# Step 2: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [13]:
X_train

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
22,4.6,3.6,1.0,0.2
15,5.7,4.4,1.5,0.4
65,6.7,3.1,4.4,1.4
11,4.8,3.4,1.6,0.2
42,4.4,3.2,1.3,0.2
...,...,...,...,...
71,6.1,2.8,4.0,1.3
106,4.9,2.5,4.5,1.7
14,5.8,4.0,1.2,0.2
92,5.8,2.6,4.0,1.2


In [14]:
X_test

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
73,6.1,2.8,4.7,1.2
18,5.7,3.8,1.7,0.3
118,7.7,2.6,6.9,2.3
78,6.0,2.9,4.5,1.5
76,6.8,2.8,4.8,1.4
31,5.4,3.4,1.5,0.4
64,5.6,2.9,3.6,1.3
141,6.9,3.1,5.1,2.3
68,6.2,2.2,4.5,1.5
82,5.8,2.7,3.9,1.2


In [15]:
y_train

array([0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0, 1, 2, 2, 1, 2, 1, 2,
       1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 0, 2, 2,
       1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 1, 0, 2, 0, 0, 1, 1, 2, 1, 2, 2, 1,
       0, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 2, 0, 1, 1, 2, 1, 2, 0, 2, 1, 2,
       1, 1, 1, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 2, 0, 1, 2, 2, 1, 2,
       1, 1, 2, 2, 0, 1, 2, 0, 1, 2])

In [16]:
y_test

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [17]:
# Step 3: Apply K-Nearest Neighbors (KNN) classifier
knn = KNeighborsClassifier(n_neighbors=3)  # k=3 (choosing 3 nearest neighbors)
knn.fit(X_train, y_train)  # Train the model on the training data

# Predict the labels of the test data
y_pred = knn.predict(X_test)

In [18]:
# Step 4: Evaluate the model
conf_matrix = confusion_matrix(y_test, y_pred)  # Confusion matrix
conf_matrix

array([[10,  0,  0],
       [ 0,  9,  0],
       [ 0,  0, 11]])

In [21]:
report_dict = classification_report(y_test, y_pred, target_names=label_encoder.classes_, output_dict=True)

# Convert it into a DataFrame for better formatting
report_df = pd.DataFrame(report_dict).transpose()

# Display the DataFrame in an organized way
print("Classification Report:\n")
print(report_df)

Classification Report:

              precision  recall  f1-score  support
setosa              1.0     1.0       1.0     10.0
versicolor          1.0     1.0       1.0      9.0
virginica           1.0     1.0       1.0     11.0
accuracy            1.0     1.0       1.0      1.0
macro avg           1.0     1.0       1.0     30.0
weighted avg        1.0     1.0       1.0     30.0


In [20]:
# Step 5: Test the model with a new input
new_input_df = pd.DataFrame(new_input, columns=X.columns)

# Predict the class of the new input
new_prediction = knn.predict(new_input_df)

# Decode the predicted label back to the original class name
predicted_class = label_encoder.inverse_transform(new_prediction)

print(predicted_class)

NameError: name 'new_input' is not defined

Example 2

In [22]:
# Step 1: Load the dataset
data = pd.read_csv('./../datafiles/diabetes.csv')
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [23]:

# Step 2: Preprocess the data (e.g., scaling features)
X = data.drop('Outcome', axis=1)  # Assuming 'Outcome' is the target column
y = data['Outcome']  # Target variable

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [24]:
# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [25]:
# Step 4: KNN Model Training
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

In [26]:
# Step 5: Predictions and Evaluation
y_pred = knn.predict(X_test)

In [27]:
# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
conf_matrix

array([[80, 19],
       [27, 28]])

In [28]:
# Classification report
report_dict = classification_report(y_test, y_pred, target_names=['class_0', 'class_1'], output_dict=True)

# Convert the dictionary into a pandas DataFrame for better formatting
report_df = pd.DataFrame(report_dict).transpose()

# Display the DataFrame in a more organized format
print("Classification Report:\n")
print(report_df)

Classification Report:

              precision    recall  f1-score     support
class_0        0.747664  0.808081  0.776699   99.000000
class_1        0.595745  0.509091  0.549020   55.000000
accuracy       0.701299  0.701299  0.701299    0.701299
macro avg      0.671704  0.658586  0.662859  154.000000
weighted avg   0.693407  0.701299  0.695385  154.000000


In [29]:
# Step 6: Test with a new input
new_input = [[5, 116, 74, 0, 0, 25.6, 0.201, 30]]  
new_input2 = [[6, 148, 72, 35, 0, 33.6, 0.627, 50]] 

# Convert the new input to a DataFrame with correct feature names
new_input_df = pd.DataFrame(new_input, columns=X.columns)
new_input2_df = pd.DataFrame(new_input2, columns=X.columns)

# Scale the new input using the same scaler fitted on the training data
new_input_scaled = scaler.transform(new_input_df)
new_input2_scaled = scaler.transform(new_input2_df)

# Predict the class of the new input
prediction = knn.predict(new_input_scaled)
prediction2 = knn.predict(new_input2_scaled)

print(f"Predicted class for new input: {prediction}")
print(f"Predicted class for new input: {prediction2}")

Predicted class for new input: [0]
Predicted class for new input: [1]
