In [40]:
import pandas as pd
import numpy as np
import os
from PIL import Image
from io import BytesIO
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [41]:
# def create_image_df(folder_path, is_positive):
#     # List to store file names and images
#     file_data = []
    
#     # Loop through all files in the folder
#     for file in os.listdir(folder_path):
#         # Check if the file is an image file
#         if file.endswith(('.jpg', '.jpeg', '.png', '.gif')):
#             # Open the image file
#             image_path = os.path.join(folder_path, file)
#             if is_positive:
#                 if 'bacteria' in file:
#                     y_val = 'bacteria'
#                 elif 'virus' in file:
#                     y_val = 'virus'
#                 else:
#                     y_val = 'pneumonia'
#             else:
#                 y_val = 'normal'
#             with Image.open(image_path) as image:
#                 # Convert to grayscale
#                 if image.mode != 'L':
#                                 grayscale_image = image.convert('L')
#                 # Save the image to a byte array
#                 img_byte_arr = BytesIO()
#                 image.save(img_byte_arr, format=image.format)
#                 img_byte_arr = img_byte_arr.getvalue()
#                 # Append the file name and image to the list
#                 file_data.append({'File_Name': file, 'Image': img_byte_arr, 'y_val': y_val})
    
#     # Create a DataFrame from the list of dictionaries
#     return pd.DataFrame(file_data)

In [42]:
def create_image_df(folder_path, is_positive, target_size=(256, 256)):
    def resize_with_padding(img, target_size):
        # Resize the image while keeping the aspect ratio
        img.thumbnail(target_size)
        # Create a new image with the target size and a white background
        new_img = Image.new("L", target_size, color=255)  # color for white background
        # Paste the resized image onto the new image, centered
        new_img.paste(img, ((target_size[0] - img.size[0]) // 2, (target_size[1] - img.size[1]) // 2))
        img_arr = np.array(new_img) / 255.0  # Normalize pixel values
        # img_arr = np.expand_dims(img_arr, axis=0)  # Add batch dimension
        img_arr = np.expand_dims(img_arr, axis=-1)  # Add channel dimension for grayscale
        return new_img, img_arr
    
    # List to store file names and images
    file_data = []
    
    # Loop through all files in the folder
    for file in os.listdir(folder_path):
        # Check if the file is an image file
        if file.endswith(('.jpg', '.jpeg', '.png', '.gif')):
            # Open the image file
            image_path = os.path.join(folder_path, file)
            if is_positive:
                if 'bacteria' in file:
                    y_val = 'bacteria'
                elif 'virus' in file:
                    y_val = 'virus'
                else:
                    y_val = 'pneumonia'
            else:
                y_val = 'normal'
            with Image.open(image_path) as image:
                # Convert to grayscale
                if image.mode != 'L':
                    grayscale_image = image.convert('L')
                image, image_arr = resize_with_padding(image, target_size)
                # Save the image to a byte array
                img_byte_arr = BytesIO()
                image.save(img_byte_arr, format='JPEG')
                img_byte_arr = img_byte_arr.getvalue()
                # Append the file name and image to the list
                file_data.append({'File_Name': file, 'Image': img_byte_arr, 'Array': image_arr, 'y_val': y_val})
    
    # Create a DataFrame from the list of dictionaries
    return pd.DataFrame(file_data)



In [43]:
normal_test_df = create_image_df('./test/NORMAL', False)
pneumonia_test_df = create_image_df('./test/PNEUMONIA', True)
normal_train_df = create_image_df('./train/NORMAL', False)
pneumonia_train_df = create_image_df('./train/PNEUMONIA', True)
normal_val_df = create_image_df('./val/NORMAL', False)
pneumonia_val_df = create_image_df('./val/PNEUMONIA', True)

normal_train_df

Unnamed: 0,File_Name,Image,Array,y_val
0,IM-0115-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
1,IM-0117-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
2,IM-0119-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
3,IM-0122-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
4,IM-0125-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
...,...,...,...,...
1336,NORMAL2-IM-1406-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
1337,NORMAL2-IM-1412-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
1338,NORMAL2-IM-1419-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
1339,NORMAL2-IM-1422-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal


In [44]:
pneumonia_train_df

Unnamed: 0,File_Name,Image,Array,y_val
0,person1000_bacteria_2931.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
1,person1000_virus_1681.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",virus
2,person1001_bacteria_2932.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
3,person1002_bacteria_2933.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
4,person1003_bacteria_2934.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
...,...,...,...,...
3869,person99_virus_183.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",virus
3870,person9_bacteria_38.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
3871,person9_bacteria_39.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
3872,person9_bacteria_40.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria


In [45]:
pneumonia_val_df['y_val'].value_counts()

y_val
bacteria    8
virus       1
Name: count, dtype: int64

In [46]:
pneumonia_train_df['y_val'].value_counts()

y_val
bacteria    2530
virus       1344
Name: count, dtype: int64

In [47]:
train_df = pd.concat([normal_train_df, pneumonia_train_df], ignore_index=True)
train_df

Unnamed: 0,File_Name,Image,Array,y_val
0,IM-0115-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
1,IM-0117-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
2,IM-0119-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
3,IM-0122-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
4,IM-0125-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
...,...,...,...,...
5210,person99_virus_183.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",virus
5211,person9_bacteria_38.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
5212,person9_bacteria_39.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
5213,person9_bacteria_40.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria


In [48]:
test_df = pd.concat([normal_test_df, pneumonia_test_df], ignore_index=True)
test_df

Unnamed: 0,File_Name,Image,Array,y_val
0,IM-0001-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
1,IM-0003-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
2,IM-0005-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
3,IM-0006-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
4,IM-0007-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
...,...,...,...,...
619,person96_bacteria_465.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
620,person96_bacteria_466.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
621,person97_bacteria_468.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria
622,person99_bacteria_473.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria


In [49]:
val_df = pd.concat([normal_val_df, pneumonia_val_df], ignore_index=True)
val_df

Unnamed: 0,File_Name,Image,Array,y_val
0,NORMAL2-IM-1427-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
1,NORMAL2-IM-1430-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
2,NORMAL2-IM-1431-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
3,NORMAL2-IM-1436-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
4,NORMAL2-IM-1437-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
5,NORMAL2-IM-1438-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
6,NORMAL2-IM-1440-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
7,NORMAL2-IM-1442-0001.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",normal
8,person1588_virus_2762.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",virus
9,person1946_bacteria_4874.jpeg,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"[[[1.0], [1.0], [1.0], [1.0], [1.0], [1.0], [1...",bacteria


In [50]:
# Extract features and labels
X_train = np.stack(train_df['Array'].values)
y_train = train_df['y_val'].values
X_test = np.stack(test_df['Array'].values)
y_test = test_df['y_val'].values
X_val = np.stack(val_df['Array'].values)
y_val = val_df['y_val'].values

In [51]:
y_train

array(['normal', 'normal', 'normal', ..., 'bacteria', 'bacteria',
       'bacteria'], dtype=object)

In [52]:
X_train

array([[[[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        ...,

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]]],


       [[[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        ...,

        [[1.],
 

In [53]:
# Encode labels
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_test_encoded = encoder.transform(y_test)
y_val_encoded = encoder.transform(y_val)

# Convert labels to one-hot encoding
y_train_one_hot = to_categorical(y_train_encoded)
y_test_one_hot = to_categorical(y_test_encoded)
y_val_one_hot = to_categorical(y_val_encoded)

In [54]:
# Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')  # 3 classes: bacteria, virus, normal
])

# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

  super().__init__(


In [55]:
# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

In [56]:
y_train_one_hot[0]

array([0., 1., 0.])

In [57]:
y_val_one_hot[0]

array([0., 1., 0.])

In [58]:
# Train the model
history = model.fit(X_train, y_train_one_hot, 
                    validation_data=(X_val, y_val_one_hot), 
                    epochs=20, 
                    batch_size=32, 
                    callbacks=[early_stopping])

Epoch 1/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 919ms/step - accuracy: 0.5883 - loss: 0.9942 - val_accuracy: 0.5294 - val_loss: 0.9823
Epoch 2/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 842ms/step - accuracy: 0.7877 - loss: 0.5371 - val_accuracy: 0.5294 - val_loss: 0.9474
Epoch 3/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 825ms/step - accuracy: 0.7800 - loss: 0.4934 - val_accuracy: 0.5294 - val_loss: 1.0394
Epoch 4/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 861ms/step - accuracy: 0.8056 - loss: 0.4405 - val_accuracy: 0.6471 - val_loss: 1.0285
Epoch 5/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 836ms/step - accuracy: 0.8265 - loss: 0.4124 - val_accuracy: 0.6471 - val_loss: 0.7708
Epoch 6/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 835ms/step - accuracy: 0.8324 - loss: 0.3688 - val_accuracy: 0.6471 - val_loss: 0.8413
Epoc

In [59]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test_one_hot)
print(f'Test accuracy: {test_acc}')

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 154ms/step - accuracy: 0.4857 - loss: 2.7432
Test accuracy: 0.6266025900840759


In [61]:
# Save the model
model.save('pneumonia_classification_model.keras')