In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import os
import zipfile
import cv2
import numpy as np
from glob import glob
from skimage import exposure, restoration
from collections import Counter

: 

In [None]:
# Define dataset path
dataset_path = "/content/drive/MyDrive/University /Spring 2025/Data Mining and Machine Learning /Project/GAN-Traning Images"


In [None]:
# Check if path exists
if os.path.exists(dataset_path):
    print("✅ Dataset path exists.")
    print("Contents of dataset folder:", os.listdir(dataset_path))
else:
    print("❌ Dataset path does not exist. Check the path.")

In [None]:
# Get sample images
image_files = [f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.png'))][:6]
image_files

In [None]:
# Get sample images for initial visualization (grayscale)
image_files = [f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.png'))]
image_files

# Plot initial sample images (grayscale)
fig, axes = plt.subplots(2, 3, figsize=(12, 8))
num_images_to_plot = min(6, len(image_files)) # Ensure we don't try to plot more than 6 images
for i in range(num_images_to_plot):
    img_name = image_files[i]
    img = Image.open(os.path.join(dataset_path, img_name)).convert("L") # Convert to grayscale [1]
    ax = axes[i // 3, i % 3]
    ax.imshow(img, cmap="gray")
    ax.set_title(img_name)
    ax.axis("off")
plt.show()


In [None]:
# 1. Inspect dataset structure
# We assume the dataset has a structure like: dataset_path/<class_name>/image_files...
classes = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print("Detected classes:", classes)

In [None]:
# Inspect dataset structure and get class information
classes = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print("Detected classes:", classes)
# Get list of image paths and their labels
image_paths = []
labels = []

# Assuming your dataset is structured like: dataset_path/class_name/image.jpg [5, 6]
for cls in classes:
    cls_folder = os.path.join(dataset_path, cls)
    imgs = glob(os.path.join(cls_folder, '*.[jp][pn]g')) # jpg, jpeg, png files [5]
    image_paths.extend(imgs)
    labels.extend([cls]*len(imgs))
print("Total images loaded:", len(image_paths))
print(f"Labels: {labels[:5]}") # Print first 5 labels [7]
print("Class distribution:", Counter(labels))

In [None]:
# Get list of image paths and their labels
image_paths = []
labels = []
for cls in classes:
    cls_folder = os.path.join(dataset_path, cls)
    # Assuming common image file extensions; adjust if necessary.
    imgs = glob(os.path.join(cls_folder, '*.[jp][pn]g'))  # jpg, jpeg, png files
    image_paths.extend(imgs)
    labels.extend([cls]*len(imgs))

print("Total images loaded:", len(image_paths))
print("Class distribution:", Counter(labels))

In [None]:
# List all image files (assuming jpg and png images)
image_paths = glob(os.path.join(dataset_path, "*.[jp][pn]g"))
print("Total images found:", len(image_paths))

In [None]:
# 2. Load and visualize sample images
def load_image(image_path):
    # Read the image in color first (for visualization)
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error loading {image_path}")
    # Convert from BGR (OpenCV default) to RGB
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
# Function to load images in color (BGR to RGB conversion for visualization)
def load_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error loading {image_path}")
        return None
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
# Visualize a few sample images
num_samples = 6  # Number of images to display (you can adjust this)

if len(image_paths) > 0:
    # Create subplots (adjusting layout based on number of samples)
    cols = 3
    rows = (num_samples + cols - 1) // cols  # Calculate required rows
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))

    # Flatten axes for easy iteration if it's a 2D array
    axes = axes.flatten() if isinstance(axes, (list, np.ndarray)) else [axes]

    for i in range(num_samples):
        if i < len(image_paths):
            img = load_image(image_paths[i])
            if img is not None:
                axes[i].imshow(img)
                axes[i].axis('off')
                axes[i].set_title(f"Image {i+1}")
        else:
            # Hide unused subplots
            axes[i].axis('off')
    plt.tight_layout()
    plt.show()
else:
    print("No images found in the dataset.")

In [None]:
# Preprocessing function
def preprocess_image(image_path):
    # Load image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    # Check if image is loaded
    if img is None:
        raise ValueError(f"Image at {image_path} cannot be loaded.")
    # Intensity normalization (contrast stretching) [12]
    p2, p98 = np.percentile(img, (2, 98))
    img_norm = exposure.rescale_intensity(img, in_range=(p2, p98))
    # Noise reduction using Gaussian filtering [12]
    img_denoised = cv2.GaussianBlur(img_norm, (5, 5), 0)
    # ROI extraction (placeholder) [12]
    roi = img_denoised
    return roi

In [None]:
# 3. Preprocessing functions
def preprocess_image(image_path):
    # Load image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Check if image is loaded
    if img is None:
        raise ValueError(f"Image at {image_path} cannot be loaded.")

    # Intensity normalization (contrast stretching)
    p2, p98 = np.percentile(img, (2, 98))
    img_norm = exposure.rescale_intensity(img, in_range=(p2, p98))

    # Noise reduction using Gaussian filtering
    img_denoised = cv2.GaussianBlur(img_norm, (5, 5), 0)

    # ROI extraction: as an example, here we simply return the full image.
    # You might add additional ROI extraction logic based on your application.
    roi = img_denoised

    return roi

In [None]:
# Process a sample image and visualize before & after preprocessing
if image_paths:  # Check if image_paths is not empty
    sample_path = image_paths[0]
    original_img = cv2.imread(sample_path, cv2.IMREAD_GRAYSCALE)

    # Check if the image was loaded successfully
    if original_img is not None:
        processed_img = preprocess_image(sample_path)  # Assuming you have defined this function

        fig, axes = plt.subplots(1, 2, figsize=(10, 5))
        axes[0].imshow(original_img, cmap='gray')
        axes[0].set_title("Original (Grayscale)")
        axes[0].axis('off')
        axes[1].imshow(processed_img, cmap='gray')
        axes[1].set_title("Preprocessed")
        axes[1].axis('off')
        plt.show()
    else:
        print(f"Error: Could not load image at {sample_path}")
else:
    print("No images found in the dataset. Skipping preprocessing visualization.")

In [None]:
# Process a sample image and visualize before & after preprocessing
if image_paths:  # Check if image_paths is not empty
    sample_path = image_paths[0]
    original_img = cv2.imread(sample_path, cv2.IMREAD_GRAYSCALE)

    # Check if the image was loaded successfully
    if original_img is not None:
        processed_img = preprocess_image(sample_path)  # Assuming you have defined this function

        fig, axes = plt.subplots(1, 2, figsize=(10, 5))
        axes[0].imshow(original_img, cmap='gray')
        axes[0].set_title("Original (Grayscale)")
        axes[0].axis('off')
        axes[1].imshow(processed_img, cmap='gray')
        axes[1].set_title("Preprocessed")
        axes[1].axis('off')
        plt.show()
    else:
        print(f"Error: Could not load image at {sample_path}")
else:
    print("No images found in the dataset. Skipping preprocessing visualization.")

In [None]:
#Phase 2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from skimage.feature import graycomatrix, graycoprops # Changed 'greycomatrix' to 'graycomatrix' and 'greycoprops' to 'graycoprops'
from skimage.measure import shannon_entropy
from scipy.stats import skew, kurtosis

In [None]:
# Generate synthetic grayscale image data (simulating MRI images)
def generate_synthetic_images(num_images=100, img_size=(128, 128)):
    return [np.random.randint(0, 256, img_size, dtype=np.uint8) for _ in range(num_images)]


In [None]:
# Statistical Analysis
def compute_statistics(images):
    stats = []
    for img in images:
        mean_val = np.mean(img)
        var_val = np.var(img)
        entropy = shannon_entropy(img)
        skewness = skew(img.flatten())
        kurt = kurtosis(img.flatten())
        stats.append([mean_val, var_val, entropy, skewness, kurt])
    return np.array(stats)

In [None]:
# Visualization - PCA & t-SNE
def visualize_dim_reduction(features):
    pca = PCA(n_components=2)
    reduced_pca = pca.fit_transform(features)
    tsne = TSNE(n_components=2, random_state=42)
    reduced_tsne = tsne.fit_transform(features)

    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    axes[0].scatter(reduced_pca[:, 0], reduced_pca[:, 1], alpha=0.7)
    axes[0].set_title("PCA Projection")

    axes[1].scatter(reduced_tsne[:, 0], reduced_tsne[:, 1], alpha=0.7)
    axes[1].set_title("t-SNE Projection")

    plt.show()


In [None]:
# Feature Extraction
def extract_texture_features(images):
    features = []
    for img in images:
        # Changed 'greycomatrix' to 'graycomatrix'
        glcm = graycomatrix(img, [1], [0], symmetric=True, normed=True)
        # Changed 'greycoprops' to 'graycoprops'
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        energy = graycoprops(glcm, 'energy')[0, 0]
        features.append([contrast, energy])
    return np.array(features)

In [None]:
# Main execution
images = generate_synthetic_images()
stats = compute_statistics(images)
texture_features = extract_texture_features(images)
all_features = np.hstack((stats, texture_features))
# Save the features to 'features.npy'
np.save("features.npy", all_features)

In [None]:

# Visualizations
sns.heatmap(np.corrcoef(all_features.T), annot=True, fmt=".2f", cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()

visualize_dim_reduction(all_features)

In [None]:
#Phase 3
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV
import os


In [None]:
# Load Dataset
if os.path.exists("features.npy") and os.path.exists("labels.npy"):
    X_features = np.load("features.npy")
    y_labels = np.load("labels.npy")  # Attempting to load y_labels
    print("✅ Features and labels loaded successfully.")
else:
    # If 'labels.npy' doesn't exist, create it or load it from another source
    # For example, you might have your labels in a list or another file
    # In this example, I'll create a sample labels array:
    y_labels = np.random.randint(0, 2, size=X_features.shape[0])  # Example: binary labels (0 or 1)
    np.save("labels.npy", y_labels)  # Save to 'labels.npy' for future use
    print("⚠️ labels.npy not found. Created sample labels and saved to file.")

In [None]:
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X_features, y_labels, test_size=0.2, random_state=42)


In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import os
import zipfile
import cv2
import numpy as np
from glob import glob
from skimage import exposure, restoration
from collections import Counter

In [None]:
# Define dataset path
dataset_path = "/content/drive/MyDrive/University /Spring 2025/Data Mining and Machine Learning /Project/GAN-Traning Images"


In [None]:
# Check if path exists
if os.path.exists(dataset_path):
    print("✅ Dataset path exists.")
    print("Contents of dataset folder:", os.listdir(dataset_path))
else:
    print("❌ Dataset path does not exist. Check the path.")

In [None]:
# Get sample images
image_files = [f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.png'))][:6]
image_files

In [None]:
# Get sample images for initial visualization (grayscale)
image_files = [f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.png'))]
image_files

# Plot initial sample images (grayscale)
fig, axes = plt.subplots(2, 3, figsize=(12, 8))
num_images_to_plot = min(6, len(image_files)) # Ensure we don't try to plot more than 6 images
for i in range(num_images_to_plot):
    img_name = image_files[i]
    img = Image.open(os.path.join(dataset_path, img_name)).convert("L") # Convert to grayscale [1]
    ax = axes[i // 3, i % 3]
    ax.imshow(img, cmap="gray")
    ax.set_title(img_name)
    ax.axis("off")
plt.show()


In [None]:
# 1. Inspect dataset structure
# We assume the dataset has a structure like: dataset_path/<class_name>/image_files...
classes = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print("Detected classes:", classes)

In [None]:
# Inspect dataset structure and get class information
classes = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print("Detected classes:", classes)
# Get list of image paths and their labels
image_paths = []
labels = []

# Assuming your dataset is structured like: dataset_path/class_name/image.jpg [5, 6]
for cls in classes:
    cls_folder = os.path.join(dataset_path, cls)
    imgs = glob(os.path.join(cls_folder, '*.[jp][pn]g')) # jpg, jpeg, png files [5]
    image_paths.extend(imgs)
    labels.extend([cls]*len(imgs))
print("Total images loaded:", len(image_paths))
print(f"Labels: {labels[:5]}") # Print first 5 labels [7]
print("Class distribution:", Counter(labels))

In [None]:
# Get list of image paths and their labels
image_paths = []
labels = []
for cls in classes:
    cls_folder = os.path.join(dataset_path, cls)
    # Assuming common image file extensions; adjust if necessary.
    imgs = glob(os.path.join(cls_folder, '*.[jp][pn]g'))  # jpg, jpeg, png files
    image_paths.extend(imgs)
    labels.extend([cls]*len(imgs))

print("Total images loaded:", len(image_paths))
print("Class distribution:", Counter(labels))

In [None]:
# List all image files (assuming jpg and png images)
image_paths = glob(os.path.join(dataset_path, "*.[jp][pn]g"))
print("Total images found:", len(image_paths))

In [None]:
# 2. Load and visualize sample images
def load_image(image_path):
    # Read the image in color first (for visualization)
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error loading {image_path}")
    # Convert from BGR (OpenCV default) to RGB
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
# Function to load images in color (BGR to RGB conversion for visualization)
def load_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error loading {image_path}")
        return None
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
# Visualize a few sample images
num_samples = 6  # Number of images to display (you can adjust this)

if len(image_paths) > 0:
    # Create subplots (adjusting layout based on number of samples)
    cols = 3
    rows = (num_samples + cols - 1) // cols  # Calculate required rows
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))

    # Flatten axes for easy iteration if it's a 2D array
    axes = axes.flatten() if isinstance(axes, (list, np.ndarray)) else [axes]

    for i in range(num_samples):
        if i < len(image_paths):
            img = load_image(image_paths[i])
            if img is not None:
                axes[i].imshow(img)
                axes[i].axis('off')
                axes[i].set_title(f"Image {i+1}")
        else:
            # Hide unused subplots
            axes[i].axis('off')
    plt.tight_layout()
    plt.show()
else:
    print("No images found in the dataset.")

In [None]:
# Preprocessing function
def preprocess_image(image_path):
    # Load image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    # Check if image is loaded
    if img is None:
        raise ValueError(f"Image at {image_path} cannot be loaded.")
    # Intensity normalization (contrast stretching) [12]
    p2, p98 = np.percentile(img, (2, 98))
    img_norm = exposure.rescale_intensity(img, in_range=(p2, p98))
    # Noise reduction using Gaussian filtering [12]
    img_denoised = cv2.GaussianBlur(img_norm, (5, 5), 0)
    # ROI extraction (placeholder) [12]
    roi = img_denoised
    return roi

: 

In [None]:
# 3. Preprocessing functions
def preprocess_image(image_path):
    # Load image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Check if image is loaded
    if img is None:
        raise ValueError(f"Image at {image_path} cannot be loaded.")

    # Intensity normalization (contrast stretching)
    p2, p98 = np.percentile(img, (2, 98))
    img_norm = exposure.rescale_intensity(img, in_range=(p2, p98))

    # Noise reduction using Gaussian filtering
    img_denoised = cv2.GaussianBlur(img_norm, (5, 5), 0)

    # ROI extraction: as an example, here we simply return the full image.
    # You might add additional ROI extraction logic based on your application.
    roi = img_denoised

    return roi

In [None]:
# Process a sample image and visualize before & after preprocessing
if image_paths:  # Check if image_paths is not empty
    sample_path = image_paths[0]
    original_img = cv2.imread(sample_path, cv2.IMREAD_GRAYSCALE)

    # Check if the image was loaded successfully
    if original_img is not None:
        processed_img = preprocess_image(sample_path)  # Assuming you have defined this function

        fig, axes = plt.subplots(1, 2, figsize=(10, 5))
        axes[0].imshow(original_img, cmap='gray')
        axes[0].set_title("Original (Grayscale)")
        axes[0].axis('off')
        axes[1].imshow(processed_img, cmap='gray')
        axes[1].set_title("Preprocessed")
        axes[1].axis('off')
        plt.show()
    else:
        print(f"Error: Could not load image at {sample_path}")
else:
    print("No images found in the dataset. Skipping preprocessing visualization.")

In [None]:
# Process a sample image and visualize before & after preprocessing
if image_paths:  # Check if image_paths is not empty
    sample_path = image_paths[0]
    original_img = cv2.imread(sample_path, cv2.IMREAD_GRAYSCALE)

    # Check if the image was loaded successfully
    if original_img is not None:
        processed_img = preprocess_image(sample_path)  # Assuming you have defined this function

        fig, axes = plt.subplots(1, 2, figsize=(10, 5))
        axes[0].imshow(original_img, cmap='gray')
        axes[0].set_title("Original (Grayscale)")
        axes[0].axis('off')
        axes[1].imshow(processed_img, cmap='gray')
        axes[1].set_title("Preprocessed")
        axes[1].axis('off')
        plt.show()
    else:
        print(f"Error: Could not load image at {sample_path}")
else:
    print("No images found in the dataset. Skipping preprocessing visualization.")

In [None]:
#Phase 2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from skimage.feature import graycomatrix, graycoprops # Changed 'greycomatrix' to 'graycomatrix' and 'greycoprops' to 'graycoprops'
from skimage.measure import shannon_entropy
from scipy.stats import skew, kurtosis

In [None]:
# Generate synthetic grayscale image data (simulating MRI images)
def generate_synthetic_images(num_images=100, img_size=(128, 128)):
    return [np.random.randint(0, 256, img_size, dtype=np.uint8) for _ in range(num_images)]


In [None]:
# Statistical Analysis
def compute_statistics(images):
    stats = []
    for img in images:
        mean_val = np.mean(img)
        var_val = np.var(img)
        entropy = shannon_entropy(img)
        skewness = skew(img.flatten())
        kurt = kurtosis(img.flatten())
        stats.append([mean_val, var_val, entropy, skewness, kurt])
    return np.array(stats)

In [None]:
# Visualization - PCA & t-SNE
def visualize_dim_reduction(features):
    pca = PCA(n_components=2)
    reduced_pca = pca.fit_transform(features)
    tsne = TSNE(n_components=2, random_state=42)
    reduced_tsne = tsne.fit_transform(features)

    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    axes[0].scatter(reduced_pca[:, 0], reduced_pca[:, 1], alpha=0.7)
    axes[0].set_title("PCA Projection")

    axes[1].scatter(reduced_tsne[:, 0], reduced_tsne[:, 1], alpha=0.7)
    axes[1].set_title("t-SNE Projection")

    plt.show()


In [None]:
# Feature Extraction
def extract_texture_features(images):
    features = []
    for img in images:
        # Changed 'greycomatrix' to 'graycomatrix'
        glcm = graycomatrix(img, [1], [0], symmetric=True, normed=True)
        # Changed 'greycoprops' to 'graycoprops'
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        energy = graycoprops(glcm, 'energy')[0, 0]
        features.append([contrast, energy])
    return np.array(features)

In [None]:
# Main execution
images = generate_synthetic_images()
stats = compute_statistics(images)
texture_features = extract_texture_features(images)
all_features = np.hstack((stats, texture_features))
# Save the features to 'features.npy'
np.save("features.npy", all_features)

In [None]:

# Visualizations
sns.heatmap(np.corrcoef(all_features.T), annot=True, fmt=".2f", cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()

visualize_dim_reduction(all_features)

In [None]:
#Phase 3
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV
import os


In [None]:
# Load Dataset
if os.path.exists("features.npy") and os.path.exists("labels.npy"):
    X_features = np.load("features.npy")
    y_labels = np.load("labels.npy")
    print("✅ Features and labels loaded successfully.")
else:
    raise FileNotFoundError("features.npy or labels.npy not found!")

In [None]:
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X_features, y_labels, test_size=0.2, random_state=42)


In [None]:
# --- Implement baseline models (Random Forest, SVM, XGBoost) ---
print("\n--- Baseline Model Training ---")
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel='linear', probability=True),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

for name, model in models.items():
    print(f"\nTraining {name}:")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    print(f"{name} Accuracy: {acc:.4f}, AUC-ROC: {auc:.4f}")
    print(classification_report(y_test, y_pred))


In [None]:
import os

dataset_path = "/content/drive/MyDrive/University /Spring 2025/Data Mining and Machine Learning /Project/GAN-Traning Images"

# Check if the dataset path exists
if os.path.exists(dataset_path):
    print(f"✅ Dataset path exists: {dataset_path}")
else:
    print(f"❌ Dataset path does NOT exist: {dataset_path}")


In [None]:

# List the contents of the dataset folder
print("Contents of dataset folder:")
print(os.listdir(dataset_path))

In [None]:
# (Optional) If you expect subdirectories for classes, check for them
subdirectories = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print("Detected subdirectories:", subdirectories)

In [None]:
# (Optional) If you expect image files directly in the dataset path (not recommended for flow_from_directory), list a few
image_files = [f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.jpeg', '.png'))][:10]
print("Sample image files (if any) in the main directory:", image_files)


In [None]:
# --- Deep Learning models (CNNs like EfficientNet, ResNet, Transformer-based models) ---
print("\n--- Deep Learning Model Training (ResNet50) ---")
dataset_path = "/content/drive/MyDrive/University /Spring 2025/Data Mining and Machine Learning /Project/GAN-Traning Images" # Update with actual path

# Ensure your dataset has a subdirectory structure like:
# dataset_path/train/class1/*.jpg
# dataset_path/train/class2/*.jpg
# dataset_path/validation/class1/*.jpg
# dataset_path/validation/class2/*.jpg

train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_data = train_datagen.flow_from_directory(
    dataset_path,  # Main dataset directory
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  # Or 'categorical' for multi-class
    subset='training'  # Use the training subset
)

val_data = train_datagen.flow_from_directory(
    dataset_path,  # Main dataset directory
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  # Or 'categorical' for multi-class
    subset='validation'  # Use the validation subset
)

In [None]:
# Define CNN Model (ResNet50)
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False # Freeze base layers
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=output)

In [None]:
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, precision_recall_fscore_support
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Function to evaluate and display metrics
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)

    if hasattr(model, "predict_proba"):  # For ML models with probability outputs
        y_pred_proba = model.predict_proba(X_test)[:, 1]
    else:
        y_pred_proba = y_pred  # For CNNs, predictions are already probabilities

    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')
    auc = roc_auc_score(y_test, y_pred_proba)

    print(f"Model: {model_name}")
    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}, AUC-ROC: {auc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred))

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Class 0", "Class 1"], yticklabels=["Class 0", "Class 1"])
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(f"Confusion Matrix for {model_name}")
    plt.show()

In [None]:
# --- Implement baseline models (Random Forest, SVM, XGBoost) ---
print("\n--- Baseline Model Training ---")
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel='linear', probability=True),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Train and assign models to variables
rf = models["Random Forest"]  # Assign Random Forest model to 'rf'
svm = models["SVM"]  # Assign SVM model to 'svm'
xgb = models["XGBoost"]  # Assign XGBoost model to 'xgb'

for name, model in models.items():
    print(f"\nTraining {name}:")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    print(f"{name} Accuracy: {acc:.4f}, AUC-ROC: {auc:.4f}")
    print(classification_report(y_test, y_pred))

# ... (Rest of your code) ...

# Evaluate ML Models
evaluate_model(rf, X_test, y_test, "Random Forest")
evaluate_model(svm, X_test, y_test, "SVM")
evaluate_model(xgb, X_test, y_test, "XGBoost")

In [None]:
import numpy as np

def classify_risk(glcm_contrast, entropy, skewness, ml_confidence):
    """
    Rule-based classification of patient risk levels.

    Parameters:
    - glcm_contrast: Measure of texture contrast from MRI
    - entropy: Image entropy, indicating disorder
    - skewness: Statistical measure of asymmetry in image features
    - ml_confidence: Probability score from ML model (0 to 1)

    Returns:
    - Risk level as a string ("Low", "Medium", "High")
    """
    if ml_confidence < 0.5:
        return "Low"
    elif glcm_contrast > 0.6 and entropy > 5.0:
        return "High"
    elif skewness < -0.5 or skewness > 0.5:
        return "Medium"
    elif ml_confidence >= 0.7:
        return "High"
    else:
        return "Medium"


In [None]:

# Example usage with test data
example_patients = [
    (0.5, 4.2, 0.3, 0.8),  # High risk (ML confidence high)
    (0.3, 3.8, -0.2, 0.4), # Low risk (ML confidence low)
    (0.7, 5.5, 0.6, 0.6),  # High risk (high contrast & entropy)
    (0.4, 4.0, -0.6, 0.6)  # Medium risk (skewness out of range)
]

for i, patient in enumerate(example_patients):
    risk_level = classify_risk(*patient)
    print(f"Patient {i+1} Risk Level: {risk_level}")


In [None]:
#Phase 4
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import base64
from io import BytesIO
import streamlit as st
st.title("Test Dashboard")
st.write("If you see this, Streamlit is working!")


In [None]:
# Sample MRI Data & Risk Classification (Replace with real data)
data = pd.DataFrame({
    'Patient ID': [101, 102, 103, 104, 105],
    'Risk Score': [0.2, 0.6, 0.8, 0.4, 0.9],
    'Risk Category': ['Low', 'Medium', 'High', 'Medium', 'High']
})


In [None]:
# Convert MRI Image to Base64 (Replace with actual MRI paths)
def encode_image(image_path):
    if os.path.exists(image_path):
        with open(image_path, "rb") as img_file:
            return "data:image/png;base64," + base64.b64encode(img_file.read()).decode()
    return None

# Sample MRI Image Path (Replace with actual images)
mri_image_path = "sample_mri.png"
encoded_image = encode_image(mri_image_path)

# Dash App Setup
app = dash.Dash(__name__)
app.layout = html.Div([
    html.H1("MRI Clinical Decision Support Dashboard"),

    # Risk Category Filter
    html.Label("Filter by Risk Category:"),
    dcc.Dropdown(
        id='risk-filter',
        options=[{'label': cat, 'value': cat} for cat in data['Risk Category'].unique()],
        value='All',
        clearable=False
    ),

    # Risk Distribution Chart
    dcc.Graph(id='risk-chart'),

    # MRI Image Display
    html.H3("MRI Scan"),
    html.Img(id='mri-image', src=encoded_image, style={'width': '50%'}),

    # Patient Risk Table
    html.H3("Patient Risk Profiles"),
    html.Div(id='risk-table')
])

@app.callback(
    Output('risk-chart', 'figure'),
    Output('risk-table', 'children'),
    Input('risk-filter', 'value')
)
def update_dashboard(selected_risk):
    filtered_data = data if selected_risk == 'All' else data[data['Risk Category'] == selected_risk]
    fig = px.histogram(filtered_data, x='Risk Category', title='Risk Category Distribution', color='Risk Category')

    table = html.Table([
        html.Tr([html.Th(col) for col in filtered_data.columns])
    ] + [
        html.Tr([html.Td(filtered_data.iloc[i][col]) for col in filtered_data.columns]) for i in range(len(filtered_data))
    ])

    return fig, table

if __name__ == '__main__':
    app.run(debug=True)

In [None]:
import streamlit as st
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from PIL import Image
import cv2
import os

In [None]:
# Load pre-trained model
MODEL_PATH = "mri_model.h5"  # Ensure the model path is correct
if os.path.exists(MODEL_PATH):
    model = load_model(MODEL_PATH)
else:
    st.error("Model file not found! Please upload the trained model.")
    st.stop()

# Function to preprocess image
def preprocess_image(image):
    image = image.resize((224, 224))  # Resize to model's expected input size
    image = np.array(image) / 255.0  # Normalize pixel values
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    return image

In [None]:

# Streamlit UI
st.title("AI-Powered MRI Analysis System")
st.write("Upload an MRI scan to analyze and get predictions.")


In [None]:

# Upload image
uploaded_file = st.file_uploader("Choose an MRI image...", type=["jpg", "png", "jpeg"])

if uploaded_file is not None:
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded MRI Image", use_column_width=True)

    # Preprocess and predict
    processed_image = preprocess_image(image)
    prediction = model.predict(processed_image)
    probability = prediction[0][0]  # Assuming binary classification

    # Display results
    st.subheader("Prediction Result")
    if probability > 0.5:
        st.success(f"Positive MRI scan with {probability * 100:.2f}% confidence")
    else:
        st.warning(f"Negative MRI scan with {(1 - probability) * 100:.2f}% confidence")


In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import os
import zipfile
import cv2
import numpy as np
from glob import glob
from skimage import exposure, restoration
from collections import Counter

In [None]:
# Define dataset path
dataset_path = "/content/drive/MyDrive/University /Spring 2025/Data Mining and Machine Learning /Project/GAN-Traning Images"


In [None]:
# Check if path exists
if os.path.exists(dataset_path):
    print("✅ Dataset path exists.")
    print("Contents of dataset folder:", os.listdir(dataset_path))
else:
    print("❌ Dataset path does not exist. Check the path.")

In [None]:
# Get sample images
image_files = [f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.png'))][:6]
image_files

In [None]:
# Get sample images for initial visualization (grayscale)
image_files = [f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.png'))]
image_files

# Plot initial sample images (grayscale)
fig, axes = plt.subplots(2, 3, figsize=(12, 8))
num_images_to_plot = min(6, len(image_files)) # Ensure we don't try to plot more than 6 images
for i in range(num_images_to_plot):
    img_name = image_files[i]
    img = Image.open(os.path.join(dataset_path, img_name)).convert("L") # Convert to grayscale [1]
    ax = axes[i // 3, i % 3]
    ax.imshow(img, cmap="gray")
    ax.set_title(img_name)
    ax.axis("off")
plt.show()


In [None]:
# 1. Inspect dataset structure
# We assume the dataset has a structure like: dataset_path/<class_name>/image_files...
classes = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print("Detected classes:", classes)

In [None]:
# Inspect dataset structure and get class information
classes = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print("Detected classes:", classes)
# Get list of image paths and their labels
image_paths = []
labels = []

# Assuming your dataset is structured like: dataset_path/class_name/image.jpg [5, 6]
for cls in classes:
    cls_folder = os.path.join(dataset_path, cls)
    imgs = glob(os.path.join(cls_folder, '*.[jp][pn]g')) # jpg, jpeg, png files [5]
    image_paths.extend(imgs)
    labels.extend([cls]*len(imgs))
print("Total images loaded:", len(image_paths))
print(f"Labels: {labels[:5]}") # Print first 5 labels [7]
print("Class distribution:", Counter(labels))

In [None]:
# Get list of image paths and their labels
image_paths = []
labels = []
for cls in classes:
    cls_folder = os.path.join(dataset_path, cls)
    # Assuming common image file extensions; adjust if necessary.
    imgs = glob(os.path.join(cls_folder, '*.[jp][pn]g'))  # jpg, jpeg, png files
    image_paths.extend(imgs)
    labels.extend([cls]*len(imgs))

print("Total images loaded:", len(image_paths))
print("Class distribution:", Counter(labels))

In [None]:
# List all image files (assuming jpg and png images)
image_paths = glob(os.path.join(dataset_path, "*.[jp][pn]g"))
print("Total images found:", len(image_paths))

In [None]:
# 2. Load and visualize sample images
def load_image(image_path):
    # Read the image in color first (for visualization)
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error loading {image_path}")
    # Convert from BGR (OpenCV default) to RGB
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
# Function to load images in color (BGR to RGB conversion for visualization)
def load_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error loading {image_path}")
        return None
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
# Visualize a few sample images
num_samples = 6  # Number of images to display (you can adjust this)

if len(image_paths) > 0:
    # Create subplots (adjusting layout based on number of samples)
    cols = 3
    rows = (num_samples + cols - 1) // cols  # Calculate required rows
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))

    # Flatten axes for easy iteration if it's a 2D array
    axes = axes.flatten() if isinstance(axes, (list, np.ndarray)) else [axes]

    for i in range(num_samples):
        if i < len(image_paths):
            img = load_image(image_paths[i])
            if img is not None:
                axes[i].imshow(img)
                axes[i].axis('off')
                axes[i].set_title(f"Image {i+1}")
        else:
            # Hide unused subplots
            axes[i].axis('off')
    plt.tight_layout()
    plt.show()
else:
    print("No images found in the dataset.")

In [None]:
# Preprocessing function
def preprocess_image(image_path):
    # Load image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    # Check if image is loaded
    if img is None:
        raise ValueError(f"Image at {image_path} cannot be loaded.")
    # Intensity normalization (contrast stretching) [12]
    p2, p98 = np.percentile(img, (2, 98))
    img_norm = exposure.rescale_intensity(img, in_range=(p2, p98))
    # Noise reduction using Gaussian filtering [12]
    img_denoised = cv2.GaussianBlur(img_norm, (5, 5), 0)
    # ROI extraction (placeholder) [12]
    roi = img_denoised
    return roi

In [None]:
# 3. Preprocessing functions
def preprocess_image(image_path):
    # Load image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Check if image is loaded
    if img is None:
        raise ValueError(f"Image at {image_path} cannot be loaded.")

    # Intensity normalization (contrast stretching)
    p2, p98 = np.percentile(img, (2, 98))
    img_norm = exposure.rescale_intensity(img, in_range=(p2, p98))

    # Noise reduction using Gaussian filtering
    img_denoised = cv2.GaussianBlur(img_norm, (5, 5), 0)

    # ROI extraction: as an example, here we simply return the full image.
    # You might add additional ROI extraction logic based on your application.
    roi = img_denoised

    return roi

In [None]:
# Process a sample image and visualize before & after preprocessing
if image_paths:  # Check if image_paths is not empty
    sample_path = image_paths[0]
    original_img = cv2.imread(sample_path, cv2.IMREAD_GRAYSCALE)

    # Check if the image was loaded successfully
    if original_img is not None:
        processed_img = preprocess_image(sample_path)  # Assuming you have defined this function

        fig, axes = plt.subplots(1, 2, figsize=(10, 5))
        axes[0].imshow(original_img, cmap='gray')
        axes[0].set_title("Original (Grayscale)")
        axes[0].axis('off')
        axes[1].imshow(processed_img, cmap='gray')
        axes[1].set_title("Preprocessed")
        axes[1].axis('off')
        plt.show()
    else:
        print(f"Error: Could not load image at {sample_path}")
else:
    print("No images found in the dataset. Skipping preprocessing visualization.")

In [None]:
# Process a sample image and visualize before & after preprocessing
if image_paths:  # Check if image_paths is not empty
    sample_path = image_paths[0]
    original_img = cv2.imread(sample_path, cv2.IMREAD_GRAYSCALE)

    # Check if the image was loaded successfully
    if original_img is not None:
        processed_img = preprocess_image(sample_path)  # Assuming you have defined this function

        fig, axes = plt.subplots(1, 2, figsize=(10, 5))
        axes[0].imshow(original_img, cmap='gray')
        axes[0].set_title("Original (Grayscale)")
        axes[0].axis('off')
        axes[1].imshow(processed_img, cmap='gray')
        axes[1].set_title("Preprocessed")
        axes[1].axis('off')
        plt.show()
    else:
        print(f"Error: Could not load image at {sample_path}")
else:
    print("No images found in the dataset. Skipping preprocessing visualization.")

In [None]:
#Phase 2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from skimage.feature import graycomatrix, graycoprops # Changed 'greycomatrix' to 'graycomatrix' and 'greycoprops' to 'graycoprops'
from skimage.measure import shannon_entropy
from scipy.stats import skew, kurtosis

In [None]:
# Generate synthetic grayscale image data (simulating MRI images)
def generate_synthetic_images(num_images=100, img_size=(128, 128)):
    return [np.random.randint(0, 256, img_size, dtype=np.uint8) for _ in range(num_images)]


In [None]:
# Statistical Analysis
def compute_statistics(images):
    stats = []
    for img in images:
        mean_val = np.mean(img)
        var_val = np.var(img)
        entropy = shannon_entropy(img)
        skewness = skew(img.flatten())
        kurt = kurtosis(img.flatten())
        stats.append([mean_val, var_val, entropy, skewness, kurt])
    return np.array(stats)

In [None]:
# Visualization - PCA & t-SNE
def visualize_dim_reduction(features):
    pca = PCA(n_components=2)
    reduced_pca = pca.fit_transform(features)
    tsne = TSNE(n_components=2, random_state=42)
    reduced_tsne = tsne.fit_transform(features)

    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    axes[0].scatter(reduced_pca[:, 0], reduced_pca[:, 1], alpha=0.7)
    axes[0].set_title("PCA Projection")

    axes[1].scatter(reduced_tsne[:, 0], reduced_tsne[:, 1], alpha=0.7)
    axes[1].set_title("t-SNE Projection")

    plt.show()


In [None]:
# Feature Extraction
def extract_texture_features(images):
    features = []
    for img in images:
        # Changed 'greycomatrix' to 'graycomatrix'
        glcm = graycomatrix(img, [1], [0], symmetric=True, normed=True)
        # Changed 'greycoprops' to 'graycoprops'
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        energy = graycoprops(glcm, 'energy')[0, 0]
        features.append([contrast, energy])
    return np.array(features)

In [None]:
# Main execution
images = generate_synthetic_images()
stats = compute_statistics(images)
texture_features = extract_texture_features(images)
all_features = np.hstack((stats, texture_features))
# Save the features to 'features.npy'
np.save("features.npy", all_features)

In [None]:

# Visualizations
sns.heatmap(np.corrcoef(all_features.T), annot=True, fmt=".2f", cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()

visualize_dim_reduction(all_features)

In [None]:
#Phase 3
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.model_selection import train_test_split, GridSearchCV
import os


In [None]:
# Load Dataset
if os.path.exists("features.npy") and os.path.exists("labels.npy"):
    X_features = np.load("features.npy")
    y_labels = np.load("labels.npy")
    print("✅ Features and labels loaded successfully.")
else:
    raise FileNotFoundError("features.npy or labels.npy not found!")

In [None]:
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X_features, y_labels, test_size=0.2, random_state=42)


In [None]:
# --- Implement baseline models (Random Forest, SVM, XGBoost) ---
print("\n--- Baseline Model Training ---")
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel='linear', probability=True),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

for name, model in models.items():
    print(f"\nTraining {name}:")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    print(f"{name} Accuracy: {acc:.4f}, AUC-ROC: {auc:.4f}")
    print(classification_report(y_test, y_pred))


In [None]:
import os

dataset_path = "/content/drive/MyDrive/University /Spring 2025/Data Mining and Machine Learning /Project/GAN-Traning Images"

# Check if the dataset path exists
if os.path.exists(dataset_path):
    print(f"✅ Dataset path exists: {dataset_path}")
else:
    print(f"❌ Dataset path does NOT exist: {dataset_path}")


In [None]:

# List the contents of the dataset folder
print("Contents of dataset folder:")
print(os.listdir(dataset_path))

In [None]:
# (Optional) If you expect subdirectories for classes, check for them
subdirectories = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print("Detected subdirectories:", subdirectories)

In [None]:
# (Optional) If you expect image files directly in the dataset path (not recommended for flow_from_directory), list a few
image_files = [f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.jpeg', '.png'))][:10]
print("Sample image files (if any) in the main directory:", image_files)


In [None]:
# --- Deep Learning models (CNNs like EfficientNet, ResNet, Transformer-based models) ---
print("\n--- Deep Learning Model Training (ResNet50) ---")
dataset_path = "/content/drive/MyDrive/University /Spring 2025/Data Mining and Machine Learning /Project/GAN-Traning Images" # Update with actual path

# Ensure your dataset has a subdirectory structure like:
# dataset_path/train/class1/*.jpg
# dataset_path/train/class2/*.jpg
# dataset_path/validation/class1/*.jpg
# dataset_path/validation/class2/*.jpg

train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_data = train_datagen.flow_from_directory(
    dataset_path,  # Main dataset directory
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  # Or 'categorical' for multi-class
    subset='training'  # Use the training subset
)

val_data = train_datagen.flow_from_directory(
    dataset_path,  # Main dataset directory
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  # Or 'categorical' for multi-class
    subset='validation'  # Use the validation subset
)

In [None]:
# Define CNN Model (ResNet50)
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False # Freeze base layers
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=output)

In [None]:
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, precision_recall_fscore_support
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Function to evaluate and display metrics
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)

    if hasattr(model, "predict_proba"):  # For ML models with probability outputs
        y_pred_proba = model.predict_proba(X_test)[:, 1]
    else:
        y_pred_proba = y_pred  # For CNNs, predictions are already probabilities

    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')
    auc = roc_auc_score(y_test, y_pred_proba)

    print(f"Model: {model_name}")
    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}, AUC-ROC: {auc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred))

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Class 0", "Class 1"], yticklabels=["Class 0", "Class 1"])
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(f"Confusion Matrix for {model_name}")
    plt.show()

In [None]:
# Evaluate ML Models
evaluate_model(rf, X_test, y_test, "Random Forest")
evaluate_model(svm, X_test, y_test, "SVM")
evaluate_model(xgb, X_test, y_test, "XGBoost")

In [None]:
import numpy as np

def classify_risk(glcm_contrast, entropy, skewness, ml_confidence):
    """
    Rule-based classification of patient risk levels.

    Parameters:
    - glcm_contrast: Measure of texture contrast from MRI
    - entropy: Image entropy, indicating disorder
    - skewness: Statistical measure of asymmetry in image features
    - ml_confidence: Probability score from ML model (0 to 1)

    Returns:
    - Risk level as a string ("Low", "Medium", "High")
    """
    if ml_confidence < 0.5:
        return "Low"
    elif glcm_contrast > 0.6 and entropy > 5.0:
        return "High"
    elif skewness < -0.5 or skewness > 0.5:
        return "Medium"
    elif ml_confidence >= 0.7:
        return "High"
    else:
        return "Medium"


In [None]:

# Example usage with test data
example_patients = [
    (0.5, 4.2, 0.3, 0.8),  # High risk (ML confidence high)
    (0.3, 3.8, -0.2, 0.4), # Low risk (ML confidence low)
    (0.7, 5.5, 0.6, 0.6),  # High risk (high contrast & entropy)
    (0.4, 4.0, -0.6, 0.6)  # Medium risk (skewness out of range)
]

for i, patient in enumerate(example_patients):
    risk_level = classify_risk(*patient)
    print(f"Patient {i+1} Risk Level: {risk_level}")


In [None]:
#Phase 4
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import base64
from io import BytesIO
import streamlit as st
st.title("Test Dashboard")
st.write("If you see this, Streamlit is working!")


In [None]:
# Sample MRI Data & Risk Classification (Replace with real data)
data = pd.DataFrame({
    'Patient ID': [101, 102, 103, 104, 105],
    'Risk Score': [0.2, 0.6, 0.8, 0.4, 0.9],
    'Risk Category': ['Low', 'Medium', 'High', 'Medium', 'High']
})


In [None]:
# Convert MRI Image to Base64 (Replace with actual MRI paths)
def encode_image(image_path):
    if os.path.exists(image_path):
        with open(image_path, "rb") as img_file:
            return "data:image/png;base64," + base64.b64encode(img_file.read()).decode()
    return None

# Sample MRI Image Path (Replace with actual images)
mri_image_path = "sample_mri.png"
encoded_image = encode_image(mri_image_path)

# Dash App Setup
app = dash.Dash(__name__)
app.layout = html.Div([
    html.H1("MRI Clinical Decision Support Dashboard"),

    # Risk Category Filter
    html.Label("Filter by Risk Category:"),
    dcc.Dropdown(
        id='risk-filter',
        options=[{'label': cat, 'value': cat} for cat in data['Risk Category'].unique()],
        value='All',
        clearable=False
    ),

    # Risk Distribution Chart
    dcc.Graph(id='risk-chart'),

    # MRI Image Display
    html.H3("MRI Scan"),
    html.Img(id='mri-image', src=encoded_image, style={'width': '50%'}),

    # Patient Risk Table
    html.H3("Patient Risk Profiles"),
    html.Div(id='risk-table')
])

@app.callback(
    Output('risk-chart', 'figure'),
    Output('risk-table', 'children'),
    Input('risk-filter', 'value')
)
def update_dashboard(selected_risk):
    filtered_data = data if selected_risk == 'All' else data[data['Risk Category'] == selected_risk]
    fig = px.histogram(filtered_data, x='Risk Category', title='Risk Category Distribution', color='Risk Category')

    table = html.Table([
        html.Tr([html.Th(col) for col in filtered_data.columns])
    ] + [
        html.Tr([html.Td(filtered_data.iloc[i][col]) for col in filtered_data.columns]) for i in range(len(filtered_data))
    ])

    return fig, table

if __name__ == '__main__':
    app.run(debug=True)

In [None]:
import streamlit as st
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from PIL import Image
import cv2
import os

In [None]:
# Load pre-trained model
MODEL_PATH = "mri_model.h5"  # Ensure the model path is correct
if os.path.exists(MODEL_PATH):
    model = load_model(MODEL_PATH)
else:
    st.error("Model file not found! Please upload the trained model.")
    st.stop()

# Function to preprocess image
def preprocess_image(image):
    image = image.resize((224, 224))  # Resize to model's expected input size
    image = np.array(image) / 255.0  # Normalize pixel values
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    return image

In [None]:

# Streamlit UI
st.title("AI-Powered MRI Analysis System")
st.write("Upload an MRI scan to analyze and get predictions.")


In [None]:

# Upload image
uploaded_file = st.file_uploader("Choose an MRI image...", type=["jpg", "png", "jpeg"])

if uploaded_file is not None:
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded MRI Image", use_column_width=True)

    # Preprocess and predict
    processed_image = preprocess_image(image)
    prediction = model.predict(processed_image)
    probability = prediction[0][0]  # Assuming binary classification

    # Display results
    st.subheader("Prediction Result")
    if probability > 0.5:
        st.success(f"Positive MRI scan with {probability * 100:.2f}% confidence")
    else:
        st.warning(f"Negative MRI scan with {(1 - probability) * 100:.2f}% confidence")


In [None]:
# --- Implement baseline models (Random Forest, SVM, XGBoost) ---
print("\n--- Baseline Model Training ---")
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel='linear', probability=True),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

for name, model in models.items():
    print(f"\nTraining {name}:")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    print(f"{name} Accuracy: {acc:.4f}, AUC-ROC: {auc:.4f}")
    print(classification_report(y_test, y_pred))


In [None]:
import os

dataset_path = "/content/drive/MyDrive/University /Spring 2025/Data Mining and Machine Learning /Project/GAN-Traning Images"

# Check if the dataset path exists
if os.path.exists(dataset_path):
    print(f"✅ Dataset path exists: {dataset_path}")
else:
    print(f"❌ Dataset path does NOT exist: {dataset_path}")


In [None]:

# List the contents of the dataset folder
print("Contents of dataset folder:")
print(os.listdir(dataset_path))

In [None]:
# (Optional) If you expect subdirectories for classes, check for them
subdirectories = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print("Detected subdirectories:", subdirectories)

In [None]:
# (Optional) If you expect image files directly in the dataset path (not recommended for flow_from_directory), list a few
image_files = [f for f in os.listdir(dataset_path) if f.endswith(('.jpg', '.jpeg', '.png'))][:10]
print("Sample image files (if any) in the main directory:", image_files)


In [None]:
# --- Deep Learning models (CNNs like EfficientNet, ResNet, Transformer-based models) ---
print("\n--- Deep Learning Model Training (ResNet50) ---")
dataset_path = "/content/drive/MyDrive/University /Spring 2025/Data Mining and Machine Learning /Project/GAN-Traning Images" # Update with actual path

# Ensure your dataset has a subdirectory structure like:
# dataset_path/train/class1/*.jpg
# dataset_path/train/class2/*.jpg
# dataset_path/validation/class1/*.jpg
# dataset_path/validation/class2/*.jpg

train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_data = train_datagen.flow_from_directory(
    dataset_path,  # Main dataset directory
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  # Or 'categorical' for multi-class
    subset='training'  # Use the training subset
)

val_data = train_datagen.flow_from_directory(
    dataset_path,  # Main dataset directory
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  # Or 'categorical' for multi-class
    subset='validation'  # Use the validation subset
)

In [None]:
# Define CNN Model (ResNet50)
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False # Freeze base layers
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=output)

In [None]:
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, precision_recall_fscore_support
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Function to evaluate and display metrics
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)

    if hasattr(model, "predict_proba"):  # For ML models with probability outputs
        y_pred_proba = model.predict_proba(X_test)[:, 1]
    else:
        y_pred_proba = y_pred  # For CNNs, predictions are already probabilities

    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')
    auc = roc_auc_score(y_test, y_pred_proba)

    print(f"Model: {model_name}")
    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}, AUC-ROC: {auc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred))

    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Class 0", "Class 1"], yticklabels=["Class 0", "Class 1"])
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(f"Confusion Matrix for {model_name}")
    plt.show()

In [None]:
# Evaluate ML Models
evaluate_model(rf, X_test, y_test, "Random Forest")
evaluate_model(svm, X_test, y_test, "SVM")
evaluate_model(xgb, X_test, y_test, "XGBoost")

In [None]:
import numpy as np

def classify_risk(glcm_contrast, entropy, skewness, ml_confidence):
    """
    Rule-based classification of patient risk levels.

    Parameters:
    - glcm_contrast: Measure of texture contrast from MRI
    - entropy: Image entropy, indicating disorder
    - skewness: Statistical measure of asymmetry in image features
    - ml_confidence: Probability score from ML model (0 to 1)

    Returns:
    - Risk level as a string ("Low", "Medium", "High")
    """
    if ml_confidence < 0.5:
        return "Low"
    elif glcm_contrast > 0.6 and entropy > 5.0:
        return "High"
    elif skewness < -0.5 or skewness > 0.5:
        return "Medium"
    elif ml_confidence >= 0.7:
        return "High"
    else:
        return "Medium"


In [None]:

# Example usage with test data
example_patients = [
    (0.5, 4.2, 0.3, 0.8),  # High risk (ML confidence high)
    (0.3, 3.8, -0.2, 0.4), # Low risk (ML confidence low)
    (0.7, 5.5, 0.6, 0.6),  # High risk (high contrast & entropy)
    (0.4, 4.0, -0.6, 0.6)  # Medium risk (skewness out of range)
]

for i, patient in enumerate(example_patients):
    risk_level = classify_risk(*patient)
    print(f"Patient {i+1} Risk Level: {risk_level}")


In [None]:
#Phase 4
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import base64
from io import BytesIO
import streamlit as st
st.title("Test Dashboard")
st.write("If you see this, Streamlit is working!")


In [None]:
# Sample MRI Data & Risk Classification (Replace with real data)
data = pd.DataFrame({
    'Patient ID': [101, 102, 103, 104, 105],
    'Risk Score': [0.2, 0.6, 0.8, 0.4, 0.9],
    'Risk Category': ['Low', 'Medium', 'High', 'Medium', 'High']
})


In [None]:
# Convert MRI Image to Base64 (Replace with actual MRI paths)
def encode_image(image_path):
    if os.path.exists(image_path):
        with open(image_path, "rb") as img_file:
            return "data:image/png;base64," + base64.b64encode(img_file.read()).decode()
    return None

# Sample MRI Image Path (Replace with actual images)
mri_image_path = "sample_mri.png"
encoded_image = encode_image(mri_image_path)

# Dash App Setup
app = dash.Dash(__name__)
app.layout = html.Div([
    html.H1("MRI Clinical Decision Support Dashboard"),

    # Risk Category Filter
    html.Label("Filter by Risk Category:"),
    dcc.Dropdown(
        id='risk-filter',
        options=[{'label': cat, 'value': cat} for cat in data['Risk Category'].unique()],
        value='All',
        clearable=False
    ),

    # Risk Distribution Chart
    dcc.Graph(id='risk-chart'),

    # MRI Image Display
    html.H3("MRI Scan"),
    html.Img(id='mri-image', src=encoded_image, style={'width': '50%'}),

    # Patient Risk Table
    html.H3("Patient Risk Profiles"),
    html.Div(id='risk-table')
])

@app.callback(
    Output('risk-chart', 'figure'),
    Output('risk-table', 'children'),
    Input('risk-filter', 'value')
)
def update_dashboard(selected_risk):
    filtered_data = data if selected_risk == 'All' else data[data['Risk Category'] == selected_risk]
    fig = px.histogram(filtered_data, x='Risk Category', title='Risk Category Distribution', color='Risk Category')

    table = html.Table([
        html.Tr([html.Th(col) for col in filtered_data.columns])
    ] + [
        html.Tr([html.Td(filtered_data.iloc[i][col]) for col in filtered_data.columns]) for i in range(len(filtered_data))
    ])

    return fig, table

if __name__ == '__main__':
    app.run(debug=True)