In [9]:
import os
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.preprocessing import LabelEncoder

def load_images_with_subclasses(folder_normal, folder_pneumonia, image_size=(400, 400)):
    images = []
    labels = []

    # NORMAL images → label 0
    for filename in os.listdir(folder_normal):
        path = os.path.join(folder_normal, filename)
        img = load_img(path, target_size=image_size, color_mode='grayscale')
        img_array = img_to_array(img).flatten() / 255.0
        images.append(img_array)
        labels.append(0)

    # PNEUMONIA images → label 1 (bacterial) or 2 (viral)
    for filename in os.listdir(folder_pneumonia):
        path = os.path.join(folder_pneumonia, filename)
        img = load_img(path, target_size=image_size, color_mode='grayscale')
        img_array = img_to_array(img).flatten() / 255.0
        images.append(img_array)

        if 'bacteria' in filename.lower():
            labels.append(1)  # Bacterial Pneumonia
        elif 'virus' in filename.lower():
            labels.append(2)  # Viral Pneumonia
        else:
            print(f"Warning: Unknown pneumonia type in '{filename}'")
    
    return images, labels

# Dossiers
folder_normal = '../chest_Xray/test/NORMAL'
folder_pneumonia = '../chest_Xray/test/PNEUMONIA'

# Chargement des données
images, labels = load_images_with_subclasses(folder_normal, folder_pneumonia)

# Création des features X et des labels y
X = np.array(images)
y = np.array(labels)

# Split des données (80% pour l'entraînement et 20% pour le test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Label Encoding for multi-class classification
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Instantiate Logistic Regression model with multi-class support
model = LogisticRegression(max_iter=1000)

# Fit the model to the training data
model.fit(X_train, y_train_encoded)

# Get predictions on the test data
y_pred_encoded = model.predict(X_test)

# Convert predictions back to original labels
y_pred = label_encoder.inverse_transform(y_pred_encoded)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Map the labels back to the class names
class_names = {0: "Normal", 1: "Bacterial Pneumonia", 2: "Viral Pneumonia"}

# Print classification report with class names
print("\nClassification Report:")
report = classification_report(y_test, y_pred, target_names=[class_names[i] for i in label_encoder.classes_])
print(report)

Accuracy: 0.86

Classification Report:
                     precision    recall  f1-score   support

             Normal       0.86      0.91      0.89        47
Bacterial Pneumonia       0.87      0.94      0.90        48
    Viral Pneumonia       0.87      0.67      0.75        30

           accuracy                           0.86       125
          macro avg       0.86      0.84      0.85       125
       weighted avg       0.86      0.86      0.86       125

