<a href="https://colab.research.google.com/github/darshandugar2004/XrayImageClassification-TransferLearning/blob/main/dataset_preperation_lungs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import warnings
warnings.filterwarnings('ignore')
from tensorflow import keras
from keras.layers import Input, Dense, Flatten, GlobalAveragePooling2D, Dropout
from keras.models import Model
from keras.applications.densenet import DenseNet121, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator  # Corrected import
from sklearn.utils import class_weight
from keras.callbacks import EarlyStopping
import numpy as np

In [None]:
import os
import shutil
import pandas as pd
import cv2
from tqdm import tqdm
import random

# dataset paths
cpn_path = "/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset"
pn_path = "/kaggle/input/chest-xray-pneumonia/chest_xray/train"
c_path = "/kaggle/input/image-processing/train-20230326T152931Z-001/train"

# create output folders
output_base = "/kaggle/working/Cov-Pneum"
categories = ["COVID-19", "Pneumonia", "Normal"]

for category in categories:
    os.makedirs(os.path.join(output_base, category), exist_ok=True)

# function to copy and resize images
def copy_and_resize_images(src_folder, dest_folder, target_size=(224, 224)):
    for img_name in tqdm(os.listdir(src_folder), desc=f"Processing {dest_folder}"):
        img_path = os.path.join(src_folder, img_name)
        try:
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, target_size)
                cv2.imwrite(os.path.join(dest_folder, img_name), img)
        except Exception as e:
            print(f"Error processing {img_path}: {e}")


copy_and_resize_images(os.path.join(cpn_path, "COVID/images"), os.path.join(output_base, "COVID-19"))
copy_and_resize_images(os.path.join(cpn_path, "Viral Pneumonia/images"), os.path.join(output_base, "Pneumonia"))
copy_and_resize_images(os.path.join(cpn_path, "Normal/images"), os.path.join(output_base, "Normal"))
copy_and_resize_images(os.path.join(c_path, "COVID"), os.path.join(output_base, "COVID-19"))
copy_and_resize_images(os.path.join(pn_path, "PNEUMONIA"), os.path.join(output_base, "Pneumonia"))
copy_and_resize_images(os.path.join(pn_path, "NORMAL"), os.path.join(output_base, "Normal"))





split_ratios = {"train": 0.57, "validation": 0.15, "test": 0.28}
output_split_path = "/kaggle/working/Cov-Pneum-Split"


for split in split_ratios.keys():
    for category in categories:
        os.makedirs(os.path.join(output_split_path, split, category), exist_ok=True)


def split_data(category):
    images = os.listdir(os.path.join(output_base, category))
    random.shuffle(images)  # Shuffle for randomness

    train_idx = int(len(images) * split_ratios["train"])
    val_idx = train_idx + int(len(images) * split_ratios["validation"])

    for i, img_name in enumerate(images):
        src_path = os.path.join(output_base, category, img_name)

        if i < train_idx:
            dest_folder = os.path.join(output_split_path, "train", category)
        elif i < val_idx:
            dest_folder = os.path.join(output_split_path, "validation", category)
        else:
            dest_folder = os.path.join(output_split_path, "test", category)

        shutil.copy(src_path, os.path.join(dest_folder, img_name))

# Process all categories
for category in categories:
    split_data(category)

print("✅ Dataset successfully split into train, validation, and test sets!")


Processing /kaggle/working/Cov-Pneum/COVID-19: 100%|██████████| 3616/3616 [00:25<00:00, 144.14it/s]
Processing /kaggle/working/Cov-Pneum/Pneumonia: 100%|██████████| 1345/1345 [00:10<00:00, 134.40it/s]
Processing /kaggle/working/Cov-Pneum/Normal: 100%|██████████| 10192/10192 [01:11<00:00, 141.86it/s]
Processing /kaggle/working/Cov-Pneum/COVID-19: 100%|██████████| 970/970 [00:15<00:00, 60.98it/s]
Processing /kaggle/working/Cov-Pneum/Pneumonia: 100%|██████████| 3875/3875 [00:29<00:00, 130.46it/s]
Processing /kaggle/working/Cov-Pneum/Normal: 100%|██████████| 1341/1341 [00:33<00:00, 40.18it/s]


✅ Dataset successfully split into train, validation, and test sets!


In [None]:
!zip -r /kaggle/working/Cov-Pneum-Split.zip /kaggle/working/Cov-Pneum-Split

  adding: kaggle/working/Cov-Pneum-Split/ (stored 0%)
  adding: kaggle/working/Cov-Pneum-Split/train/ (stored 0%)
  adding: kaggle/working/Cov-Pneum-Split/train/COVID-19/ (stored 0%)
  adding: kaggle/working/Cov-Pneum-Split/train/COVID-19/COVID-19 (267).jpg (deflated 1%)
  adding: kaggle/working/Cov-Pneum-Split/train/COVID-19/COVID-2818.png (deflated 18%)
  adding: kaggle/working/Cov-Pneum-Split/train/COVID-19/COVID-240.png (deflated 19%)
  adding: kaggle/working/Cov-Pneum-Split/train/COVID-19/COVID-3315.png (deflated 18%)
  adding: kaggle/working/Cov-Pneum-Split/train/COVID-19/COVID-1383.png (deflated 20%)
  adding: kaggle/working/Cov-Pneum-Split/train/COVID-19/COVID-2864.png (deflated 17%)
  adding: kaggle/working/Cov-Pneum-Split/train/COVID-19/COVID-2619.png (deflated 19%)
  adding: kaggle/working/Cov-Pneum-Split/train/COVID-19/COVID-1663.png (deflated 19%)
  adding: kaggle/working/Cov-Pneum-Split/train/COVID-19/COVID-2317.png (deflated 19%)
  adding: kaggle/working/Cov-Pneum-Split/