**Step1: Import all Necessary**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

**Step2: Import the Dataset**

In [None]:
df= pd.read_csv('/content/gene_expression.csv')

**Step3: Data Overview**

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

**Step4: EDA**

In [None]:
sns.scatterplot(data=df, x='Gene One', y='Gene Two', hue='Cancer Present')

**Step5: Determine the Features & Target Variable**

In [None]:
X= df.drop('Cancer Present', axis=1)
y= df['Cancer Present']

**Step6: Split the Data to Train & Test**

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

**Step7: Scaling the Features**

In [None]:
from sklearn.preprocessing import StandardScaler
scaler= StandardScaler()
scaler.fit(X_train)

In [None]:
scaled_X_train= scaler.transform(X_train)
scaled_X_test= scaler.transform(X_test)

**Step8: Train the Model with default parameter**

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn_model= KNeighborsClassifier()
#knn_model= KNeighborsClassifier(n_neighbors=1)          # Default value of k=5
knn_model.fit(scaled_X_train, y_train)

**Step9: Predicting Test Data**

In [None]:
y_pred= knn_model.predict(scaled_X_test)
#y_pred_test= knn_model.predict(scaled_X_train)

#The prediction Value VS Actual Value of Test Data
pd.DataFrame({'Y_Test':y_test, 'Y_Pred': y_pred})

**Step10: Evaluating the Model**

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
accuracy_score(y_test, y_pred)

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

# Display the confusion matrix
ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
#ConfusionMatrixDisplay.from_predictions(y_pred,y_test)

In [None]:
print(classification_report(y_test, y_pred))

**Elbow Method for Choosing Reasonable K Values**

In [None]:
test_error_rate= []


for k in range (1, 30):
    knn_model = KNeighborsClassifier(n_neighbors=k)
    knn_model.fit(scaled_X_train, y_train)

    y_pred_test = knn_model.predict(scaled_X_test)

    test_error=1- accuracy_score(y_test, y_pred_test)
    test_error_rate.append(test_error)

In [None]:
test_error_rate

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(range(1, 30), test_error_rate, label='Test Error')
plt.legend()
plt.ylabel('Error Rate')
plt.xlabel('K Value')