In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
dataset_path = "/content/drive/My Drive/chest_xray"

In [None]:
import os

dataset_path = "/content/drive/My Drive/chest_xray"
if not os.path.exists(dataset_path):
    print("Error: Dataset folder not found! Check your path.")
else:
    print("Dataset folder found!")

Dataset folder found!


In [None]:
import os

# List subdirectories (should contain 'Normal', 'Pneumonia', 'Covid 19')
print("Dataset Folders:", os.listdir(dataset_path))

# Check sample images
for category in os.listdir(dataset_path):
    folder = os.path.join(dataset_path, category)
    print(f"Category: {category} - {len(os.listdir(folder))} images")

Dataset Folders: ['Pneumonia', 'Normal', 'Covid 19']
Category: Pneumonia - 4283 images
Category: Normal - 1587 images
Category: Covid 19 - 357 images


In [None]:
import numpy as np
import os
import cv2
from tensorflow.keras.applications.vgg19 import preprocess_input
from sklearn.model_selection import train_test_split

# Correct the dataset path - it should point to your Google Drive location
dataset_path = "/content/drive/My Drive/chest_xray"

# Define categories -  Make sure these match the actual folder names in your Google Drive
categories = ["Normal", "Pneumonia", "Covid 19"]  # Changed 'Covid19' to 'Covid-19'
image_size = (224, 224)
X, y = [], []

# Load images and labels
for category in categories:
    folder_path = os.path.join(dataset_path, category)

    # Check if the folder exists before trying to access it
    if not os.path.exists(folder_path):
        print(f"Warning: Folder '{folder_path}' not found. Skipping.")
        continue

    label = categories.index(category)  # (0 = Normal, 1 = Pneumonia, 2 = Covid 19)

    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, image_size)
            img = preprocess_input(img)  # Preprocess for VGG19
            X.append(img)
            y.append(label)

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Dataset Loaded Successfully!")
print(f"Training Data: {X_train.shape}, Testing Data: {X_test.shape}")

Dataset Loaded Successfully!
Training Data: (4981, 224, 224, 3), Testing Data: (1246, 224, 224, 3)


In [None]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Model
import numpy as np
import tensorflow as tf

# Load pre-trained VGG19 model without fully connected layers
base_model = VGG19(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
feature_extractor = Model(inputs=base_model.input, outputs=base_model.get_layer("block5_pool").output)

# Function to extract features in mini-batches
def extract_features(model, data, batch_size=32):
    features = []
    for i in range(0, len(data), batch_size):
        batch = data[i:i+batch_size]
        batch_features = model.predict(batch, verbose=1)
        batch_features = batch_features.reshape(batch_features.shape[0], -1)  # Flatten features
        features.append(batch_features)
    return np.vstack(features)  # Combine all extracted features

# Extract features in small batches to prevent crashing
X_train_features = extract_features(feature_extractor, X_train, batch_size=32)
X_test_features = extract_features(feature_extractor, X_test, batch_size=32)

print("✅ Feature Extraction Completed!")
print(f"Feature Shape: {X_train_features.shape}")  # Expected shape: (4981, 8192)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 876ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 37s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 44s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step   
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step   
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step   
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 464ms/step

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel

# Feature selection using Extra Trees Classifier
selector = ExtraTreesClassifier(n_estimators=100)
selector.fit(X_train_features, y_train)

# Feature selection using SelectFromModel
sfm = SelectFromModel(selector, threshold="mean", max_features=10)  # Choose features with importance above the mean
X_train_selected = sfm.transform(X_train_features)
X_test_selected = sfm.transform(X_test_features)

print("Feature Selection Completed!")
print(f"Reduced Feature Shape: {X_train_selected.shape}")


Feature Selection Completed!
Reduced Feature Shape: (4981, 10)


In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

# Train SVM
svm_model = SVC(probability=True)
svm_model.fit(X_train_selected, y_train)

# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train_selected, y_train)

print("SVM and Random Forest Training Completed!")

SVM and Random Forest Training Completed!


In [None]:
!pip install catboost



In [None]:
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import numpy as np

# Ensure the data is in dense format or numpy arrays if it's sparse
X_train_selected_dense = X_train_selected.toarray() if hasattr(X_train_selected, 'toarray') else X_train_selected
X_test_selected_dense = X_test_selected.toarray() if hasattr(X_test_selected, 'toarray') else X_test_selected

# Train XGBoost
xgb_model = XGBClassifier(n_estimators=100)
xgb_model.fit(X_train_selected_dense, y_train)

# Train CatBoost
catboost_model = CatBoostClassifier(iterations=100, verbose=0)
catboost_model.fit(X_train_selected_dense, y_train)

print("XGBoost and CatBoost Training Completed!")


XGBoost and CatBoost Training Completed!


In [None]:
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
from datetime import datetime
import pytz

# Define IST timezone
india_timezone = pytz.timezone('Asia/Kolkata')

# Get current date and time in IST
india_time = datetime.now(india_timezone)

# Print formatted date and time
print("Current Date and Time in India:", india_time.strftime("%Y-%m-%d %H:%M:%S"))
print()


# Ensure SVM supports probability prediction
svm_model = svm_model.set_params(probability=True)

# Get probabilities from classifiers
svm_probs = svm_model.predict_proba(X_test_selected)
rf_probs = rf_model.predict_proba(X_test_selected)
xgb_probs = xgb_model.predict_proba(X_test_selected)
catboost_probs = catboost_model.predict_proba(X_test_selected)

# Check the shape of each model's predicted probabilities
print(f"SVM Probability Shape: {svm_probs.shape}")
print(f"Random Forest Probability Shape: {rf_probs.shape}")
print(f"XGBoost Probability Shape: {xgb_probs.shape}")
print(f"CatBoost Probability Shape: {catboost_probs.shape}")

# Combine probabilities (Sum Fusion)
final_probs = (svm_probs + rf_probs + xgb_probs + catboost_probs) / 4

# Get final predictions
final_predictions = np.argmax(final_probs, axis=1)

# Evaluate model
accuracy = accuracy_score(y_test, final_predictions)
print("Final Model Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, final_predictions))


Current Date and Time in India: 2025-04-24 11:32:37

SVM Probability Shape: (1246, 3)
Random Forest Probability Shape: (1246, 3)
XGBoost Probability Shape: (1246, 3)
CatBoost Probability Shape: (1246, 3)
Final Model Accuracy: 0.8691813804173355
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.77      0.82       349
           1       0.88      0.93      0.90       828
           2       0.74      0.57      0.64        69

    accuracy                           0.87      1246
   macro avg       0.83      0.76      0.79      1246
weighted avg       0.87      0.87      0.87      1246

