1- Import XRay Images


In [None]:
import os 
import cv2
import numpy as np 
# traitement dataframe
import pandas as pd
# apprentissage automatique
import tensorflow as tf
# permet de faire des graphiques
import matplotlib.pyplot as plt


2- Class XrayLoader

In [60]:
class XrayLoader:
    def __init__(self, x_train=None, y_train=None, x_val=None, y_val=None, x_test=None, y_test=None):
        self.x_train = x_train
        self.y_train = y_train
        self.x_val = x_val
        self.y_val = y_val
        self.x_test = x_test
        self.y_test = y_test
        self.data_folder = './chest_Xray'
        self.image_size = (256,256)
        self.batch_size = 5

    def load_data(self):
        subfolders = ["train","val","test"]
        categories = ["NORMAL","PNEUMONIA"]
        data = {}

        for subfolder in subfolders:
            data[subfolder] = {"x":[],"y":[]}
            for category in categories:
                # pour avoir tous les chemins des 6 dossiers
                folder_path = os.path.join(self.data_folder,subfolder,category)
                # liste de toutes les images
                images_files = os.listdir(folder_path)
                for file_name in images_files:
                    image_path = os.path.join(folder_path,file_name)
                    img = cv2.imread(image_path,cv2.IMREAD_GRAYSCALE)
                    img = cv2.resize(img,(256,256))
                    data[subfolder]["x"].append(img)
                    data[subfolder]["y"].append(category)
        
        self.x_train = np.array(data["train"]["x"])
        self.y_train = data["train"]["y"]
        self.x_val = np.array(data["val"]["x"])
        self.y_val = data["val"]["y"]
        self.x_test = np.array(data["test"]["x"])
        self.y_test = data["test"]["y"]
    
    def load_data_faster(self):
        (self.x_train, self.y_train) = self.make_dataset(self.data_folder+'/train', self.batch_size, self.image_size)
        (self.x_val, self.y_val) = self.make_dataset(self.data_folder+'/val', self.batch_size, self.image_size)
        (self.x_test, self.y_test) = self.make_dataset(self.data_folder+'/test', self.batch_size, self.image_size)

        
    def make_dataset(self, directory, batch_size, image_size):
        dataset = tf.keras.preprocessing.image_dataset_from_directory(
        directory=directory,
        labels='inferred',
        label_mode='int',
        color_mode='grayscale',
        image_size=image_size,
        batch_size=batch_size
        )
        datas = [x for x,_ in dataset]
        labels = [y for _,y in  dataset]
        #Using the np.concatenate to convert datas and labels into single np.array 
        return np.concatenate(datas, axis = 0), np.concatenate(labels, axis = 0)

    def display_image(self,dataset,index):
        if dataset == 'train':
            image = self.x_train[index]
            label = self.y_train[index]
        elif dataset == 'test':
            image = self.x_test[index]
            label = self.y_test[index]
        else:
            raise ValueError('Invalid dataset. Choose either "train" or "test".')

        plt.imshow(image,cmap="gray")
        plt.title(f'Image {index} - Catégorie: {label}')
        plt.axis('off')
        plt.show()
    
    def display_category_means(self):
        normal_mean = self.calculate_category_mean(self.x_train, self.y_train, "NORMAL")
        pneumonia_mean = self.calculate_category_mean(self.x_train, self.y_train, "PNEUMONIA")

        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.imshow(normal_mean, cmap="gray")
        plt.title("Mean NORMAL Image")
        plt.axis("off")

        plt.subplot(1, 2, 2)
        plt.imshow(pneumonia_mean, cmap="gray")
        plt.title("Mean PNEUMONIA Image")
        plt.axis("off")

        plt.tight_layout()
        plt.show()

    @staticmethod
    def calculate_category_mean(images, labels, category):
        images = np.array(images)
        labels = np.array(labels)
        category_images = images[labels == category]
        mean_image = np.mean(category_images, axis=0)
        return mean_image
    
    def display_xray_distribution(self):
        test = ( self.y_test.count("NORMAL"), self.y_test.count("PNEUMONIA") )
        train = ( self.y_train.count("NORMAL"), self.y_train.count("PNEUMONIA") )
        val = ( self.y_val.count("NORMAL"), self.y_val.count("PNEUMONIA") )
        
        # Tracer les graphiques de distribution
        fig, ax = plt.subplots(1, 3, figsize=(15, 5))
        ax[0].bar(["NORMAL", "PNEUMONIA"], train)
        ax[0].set_title("Train Distribution")
        ax[1].bar(["NORMAL", "PNEUMONIA"], val)
        ax[1].set_title("Validation Distribution")
        ax[2].bar(["NORMAL", "PNEUMONIA"], test)
        ax[2].set_title("Test Distribution")
        plt.show()
    
    


In [None]:
loader = XrayLoader()
loader.load_data()
xtrain = loader.x_train
ytrain = loader.y_train
xtest = loader.x_test
ytest = loader.y_test
xval = loader.x_val
yval = loader.y_val
loader = XrayLoader(x_train=xtrain, y_train=ytrain, x_val=xval, y_val=yval, x_test=xtest, y_test=ytest)

In [None]:
loader.display_image("train",2000)

In [None]:
# loader.display_category_means()

In [None]:
# loader.x_train

In [None]:
loader.display_xray_distribution();

In [61]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

model = RandomForestClassifier()
model.fit(loader.x_train.reshape(-1,256*256), loader.y_train)

y_pred = model.predict(loader.x_test.reshape(-1,256*256))
accuracy = accuracy_score(loader.y_test, y_pred)

print('Accuracy:', accuracy)

Accuracy: 0.7628205128205128
