In [2]:
from sklearn.ensemble import RandomForestClassifier
import cv2
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from PIL import Image  # For handling .webp images

# Possible heart conditions
HEART_CONDITIONS = {
    0: "Normal Heart",
    1: "Ventricular Septal Defect (VSD)",
    2: "Atrial Septal Defect (ASD)",
    3: "Arrhythmia",
    4: "Cardiomyopathy"
}

TREATMENT_RECOMMENDATIONS = {
    "Normal Heart": "No immediate treatment required, but regular check-ups are recommended.",
    "Ventricular Septal Defect (VSD)": "Surgical repair or catheter-based intervention may be required. Consult a cardiologist.",
    "Atrial Septal Defect (ASD)": "Closure via catheter-based procedure or surgery may be required. Consult a specialist.",
    "Arrhythmia": "Medication or an implanted device (pacemaker) may be needed. Consult a cardiologist.",
    "Cardiomyopathy": "Lifestyle changes, medication, or surgery may be required. Seek medical advice."
}

# Convert WebP images to JPG (Optional if OpenCV can't read them)
def convert_webp_to_jpg(image_dir):
    for img_name in os.listdir(image_dir):
        if img_name.endswith(".webp"):
            img_path = os.path.join(image_dir, img_name)
            img = Image.open(img_path).convert("RGB")
            new_path = img_path.replace(".webp", ".jpg")
            img.save(new_path, "JPEG")
            os.remove(img_path)  # Remove the original WebP file

image_dir = "heart_xrays/"
convert_webp_to_jpg(image_dir)  # Convert before processing

# Load and preprocess images (convert to feature vectors)
def load_images_as_features(image_dir, image_size=(128, 128)):
    images = []
    labels = []
    for img_name in os.listdir(image_dir):
        img_path = os.path.join(image_dir, img_name)
        try:
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                print(f"Warning: Could not read {img_path}")
                continue
            img = cv2.resize(img, image_size)
            images.append(img.flatten())  # Flatten image
            labels.append(np.random.randint(0, 5))  # Simulating multi-class labels
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

    return np.array(images), np.array(labels)

# Load images
X_images, y_images = load_images_as_features(image_dir)

# Ensure images are loaded
if len(X_images) == 0:
    raise ValueError("No images found! Check the image directory and format.")

# Split dataset
X_train_img, X_test_img, y_train_img, y_test_img = train_test_split(
    X_images, y_images, test_size=0.2, random_state=42
)

# Train Random Forest for image classification (Multi-Class)
image_model = RandomForestClassifier(n_estimators=100, random_state=42)
image_model.fit(X_train_img, y_train_img)

# Simulate structured data model (Ensure this is trained before use)
structured_model = RandomForestClassifier(n_estimators=50, random_state=42)
X_train_structured = np.random.rand(100, 5)  # Simulated structured data (replace with real data)
y_train_structured = np.random.randint(0, 5, 100)  # Simulated multi-class labels
structured_model.fit(X_train_structured, y_train_structured)

# Function to predict heart condition and provide recommendation
def predict_and_recommend(patient_data, image_path):
    structured_pred = structured_model.predict(pd.DataFrame([patient_data]))[0]

    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError(f"Could not read the image: {image_path}")
    img = cv2.resize(img, (128, 128)).flatten().reshape(1, -1)

    image_pred = image_model.predict(img)[0]

    # Combine predictions (if they differ, prioritize the more severe condition)
    final_prediction = max(structured_pred, image_pred)  # Choose the more severe diagnosis
    condition = HEART_CONDITIONS[final_prediction]
    treatment = TREATMENT_RECOMMENDATIONS[condition]

    return condition, treatment

# Example usage
test_patient_data = np.random.rand(5)  # Simulated structured patient data
test_image_path = "heart_xrays/heartimg1.jpg"  # Replace with actual image path

try:
    condition, treatment = predict_and_recommend(test_patient_data, test_image_path)
    print("Detected Condition:", condition)
    print("Recommended Treatment:", treatment)
except Exception as e:
    print("Error:", e)


ModuleNotFoundError: No module named 'sklearn'