In [1]:
import os
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
import pickle
import cv2
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA

In [2]:
data = []
category = []

In [3]:
from google.colab import drive
drive.mount('/content/drive')

path = '/content/drive/My Drive/brain_tumor_dataset'

Mounted at /content/drive


In [4]:
for filename in os.scandir("/content/drive/My Drive/brain_tumor_dataset/no"):
    if filename.is_file():
        #print(filename.path)
        img = Image.open(filename.path)
        img = img.resize(size=(32,32))
        img = img.convert('L')
        data.append(np.array(img).flatten())
        category.append(0)
        del img

In [5]:
for filename in os.scandir("/content/drive/My Drive/brain_tumor_dataset/yes"):
    if filename.is_file():
        #print(filename.path)
        img = Image.open(filename.path)
        img = img.resize(size=(32,32))
        img = img.convert('L')
        data.append(np.array(img).flatten())
        category.append(1)
        del img

In [6]:
# convert to numpy arrays
x,y = np.array(data),category

In [7]:
x

array([[0, 0, 0, ..., 3, 3, 3],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [2, 2, 3, ..., 3, 3, 3]], dtype=uint8)

In [8]:
x.shape

(4000, 1024)

In [9]:
len(y)

4000

In [10]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=20)

In [11]:
# Standardize data
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [12]:
# Apply PCA
pca = PCA(n_components=0.95)  # Keep 95% of variance
x_train_pca = pca.fit_transform(x_train_scaled)
x_test_pca = pca.transform(x_test_scaled)

In [13]:
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(x_train_pca, y_train)

print("Best Parameters:", grid_search.best_params_)
knn_model = grid_search.best_estimator_

Best Parameters: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}


In [14]:
# Train KNN with best parameters
knn_model.fit(x_train_pca, y_train)

In [33]:
# Accuracy
print("KNN Train Accuracy:", knn_model.score(x_train_pca, y_train))
print("KNN Test Accuracy:", knn_model.score(x_test_pca, y_test))

KNN Train Accuracy: 1.0
KNN Test Accuracy: 0.99375


In [34]:
# Evaluate KNN
y_pred = knn_model.predict(x_test_pca)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.99375
Confusion Matrix:
 [[817   0]
 [ 10 773]]
Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99       817
           1       1.00      0.99      0.99       783

    accuracy                           0.99      1600
   macro avg       0.99      0.99      0.99      1600
weighted avg       0.99      0.99      0.99      1600



In [35]:
with open('brain_tumor1_KNN.pkl', 'wb') as file:
    pickle.dump(knn_model, file)

# Save the scaler
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

# Save the PCA
with open('pca.pkl', 'wb') as file:
    pickle.dump(pca, file)

from google.colab import files
files.download('brain_tumor1_KNN.pkl')
files.download('scaler.pkl')
files.download('pca.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [36]:
def preprocess_image(file_path):
    image = cv2.imread(file_path)
    if image is None:
        raise ValueError(f"Could not read the image from path: {file_path}")
    image = cv2.resize(image, (32, 32))  # Resize to 32*32
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    # Flattens the 2D grayscale image into a 1D array. This step is necessary because KNN requires a 1D array as input for each sample, and flattening the image reduces its dimensionality.
    image = image.flatten()
    image = scaler.transform([image])  # Standardize
    image = pca.transform(image)  # Apply PCA
    return image

In [37]:
uploaded_image_path = '/content/yes (1).jpg'
uploaded_image = preprocess_image(uploaded_image_path)

In [38]:
prediction = knn_model.predict(uploaded_image)
print(prediction)
# Interpret the prediction
if prediction[0] == 0:
    print("Predicted: No Brain Tumor")
else:
    print("Predicted: Brain Tumor Detected")

[1]
Predicted: Brain Tumor Detected
