In [7]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def load_images_with_subclasses(folder_normal, folder_pneumonia, image_size=(400, 400)):
    images = []
    labels = []

    # NORMAL images → label 0
    for filename in os.listdir(folder_normal):
        path = os.path.join(folder_normal, filename)
        img = load_img(path, target_size=image_size, color_mode='grayscale')
        img_array = img_to_array(img).flatten() / 255.0
        images.append(img_array)
        labels.append(0)

    # PNEUMONIA images → label 1 or 2
    for filename in os.listdir(folder_pneumonia):
        path = os.path.join(folder_pneumonia, filename)
        img = load_img(path, target_size=image_size, color_mode='grayscale')
        img_array = img_to_array(img).flatten() / 255.0
        images.append(img_array)

        if 'bacteria' in filename.lower():
            labels.append(1)  # Bacterial Pneumonia
        elif 'virus' in filename.lower():
            labels.append(2)  # Viral Pneumonia
        else:
            print(f"Warning: Unknown pneumonia type in '{filename}'")
    
    return images, labels


# Load images with subclasses (NORMAL, Bacterial, Viral)
folder_normal = '../chest_Xray/test/NORMAL'
folder_pneumonia = '../chest_Xray/test/PNEUMONIA'

# Load the data
images, labels = load_images_with_subclasses(folder_normal, folder_pneumonia)

# Create features X and target y.
X = np.array(images)
y = np.array(labels)

# Split the dataset into training (80%) and testing (20%) sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Apply PCA for dimensionality reduction
n_components = 0.95
pca = PCA(n_components=n_components)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Train a classifier on the PCA-reduced data to classify the images into one of the three categories
classifier = LogisticRegression(max_iter=1000) 
classifier.fit(X_train_pca, y_train)

# Predict and evaluate accuracy
y_pred = classifier.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy with PCA + Logistic Regression: {accuracy:.2f}")


Accuracy with PCA + Logistic Regression: 0.82
