In [None]:
%pip install numpy matplotlib opencv-python scikit-image


In [None]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage import filters , morphology, measure
print("✅ All libraries imported successfully")


In [None]:
base_path = r"D:\PHD\Image Analysing Program\covid19-pneumonia-normal-chest-xraypa-dataset\COVID19_Pneumonia_Normal_Chest_Xray_PA_Dataset"  # Change to your extracted folder path

categories = ['covid', 'pneumonia', 'normal']
image_paths = []

for label in categories:
    folder = os.path.join(base_path, label)
    for file in os.listdir(folder):
        if file.endswith(".png") or file.endswith(".jpg"):
            image_paths.append((os.path.join(folder, file), label))
            break  # just take one example per class for now

print(image_paths)


In [None]:
for path, label in image_paths:
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    plt.imshow(img, cmap='gray')
    plt.title(f"{label.upper()} Sample")
    plt.axis("off")
    plt.show()


In [None]:
image = cv2.imread(image_paths[0][0], cv2.IMREAD_GRAYSCALE)

# Normalize and blur
image = image / 255.0
blurred = cv2.GaussianBlur(image, (5, 5), 0)

plt.imshow(blurred, cmap='gray')
plt.title("Blurred Image")
plt.axis("off")
plt.show()


In [None]:
thresh = filters.threshold_otsu(blurred)
binary = blurred > thresh
binary = morphology.remove_small_objects(binary, 500)

plt.imshow(binary, cmap='gray')
plt.title("Segmented Region")
plt.axis("off")
plt.show()


In [None]:
labels = measure.label(binary)
props = measure.regionprops(labels)

for i, region in enumerate(props):
    print(f"Region {i+1}: Area = {region.area}, Centroid = {region.centroid}")


In [None]:
%pip install tensorflow
%pip install scikit-learn matplotlib opencv-python


In [25]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam


In [26]:
# Path to extracted folders
base_path = r"D:\PHD\Image Analysing Program\covid19-pneumonia-normal-chest-xraypa-dataset\COVID19_Pneumonia_Normal_Chest_Xray_PA_Dataset"  # Change to your extracted folder path

categories = ['COVID', 'PNEUMONIA', 'NORMAL']

data = []
labels = []
img_size = 128  # Resize all images to 128x128

for category in categories:
    folder = os.path.join(base_path, category)
    for file in os.listdir(folder):
        if file.endswith(".png") or file.endswith(".jpg"):
            try:
                img_path = os.path.join(folder, file)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                img = cv2.resize(img, (img_size, img_size))
                data.append(img)
                labels.append(category)
            except:
                print(f"Failed to process: {img_path}")


In [27]:
X = np.array(data).reshape(-1, img_size, img_size, 1) / 255.0  # Normalize
le = LabelEncoder()
y = to_categorical(le.fit_transform(labels))  # One-hot encoding


In [28]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(img_size, img_size, 1)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')  # 3 classes
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)


In [33]:
model.save("xray_cnn_model.h5")


In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.legend()
plt.title('Accuracy Over Epochs')
plt.show()


In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

y_pred = model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_val, axis=1)

print(classification_report(y_true, y_pred_classes, target_names=le.classes_))

cm = confusion_matrix(y_true, y_pred_classes)
sns.heatmap(cm, annot=True, fmt="d", xticklabels=le.classes_, yticklabels=le.classes_, cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [17]:
base_path = r"D:\PHD\Image Analysing Program\covid19-pneumonia-normal-chest-xraypa-dataset\COVID19_Pneumonia_Normal_Chest_Xray_PA_Dataset"  # Change to your extracted folder path

categories = ['COVID', 'PNEUMONIA', 'NORMAL']

X_features = []
y_labels = []
img_size = 256

for label in categories:
    folder = os.path.join(base_path, label)
    for file in os.listdir(folder)[:50]:  # limit to 50 per class for now
        if file.endswith('.png') or file.endswith('.jpg'):
            path = os.path.join(folder, file)
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (img_size, img_size))
            norm = img / 255.0

            # Segmentation
            blur = cv2.GaussianBlur(norm, (5,5), 0)
            thresh = filters.threshold_otsu(blur)
            binary = blur > thresh
            binary = morphology.remove_small_objects(binary, 500)

            # Feature extraction
            label_img = measure.label(binary)
            props = measure.regionprops(label_img)

            if props:
                largest = max(props, key=lambda p: p.area)
                features = [
                    largest.area,
                    largest.perimeter,
                    largest.eccentricity,
                    largest.solidity,
                    largest.extent,
                    np.mean(img),
                    np.std(img)
                ]
                X_features.append(features)
                y_labels.append(label)


In [18]:
from sklearn.preprocessing import LabelEncoder

X = np.array(X_features)
y = LabelEncoder().fit_transform(y_labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))


In [None]:
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
print("SVM Classification Report:\n", classification_report(y_test, y_pred_svm))


In [None]:
import seaborn as sns

cm = confusion_matrix(y_test, y_pred_rf)
sns.heatmap(cm, annot=True, fmt='d')
plt.title("Random Forest Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()


In [None]:
import seaborn as sns
import pandas as pd

df = pd.DataFrame({'Label': y_labels})
sns.countplot(x='Label', data=df)
plt.title("Image Count per Class")
plt.show()


In [None]:
df_features = pd.DataFrame(X_features, columns=[
    "Area", "Perimeter", "Eccentricity", "Solidity", "Extent", "Mean Intensity", "Std Intensity"])
df_features["Label"] = y_labels

plt.figure(figsize=(12, 6))
sns.boxplot(x="Label", y="Area", data=df_features)
plt.title("Area Distribution by Class")
plt.show()


In [24]:
df_features.to_csv("xray_feature_report.csv", index=False)
