In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the extended dataset
data = pd.read_csv('expanded_vsd_dataset.csv')

# Cleaning Dataset
## Missing Values
data = data.dropna()

## Categorical Columns
categorical_columns = data.select_dtypes(include=['object']).columns
data[categorical_columns] = data[categorical_columns].fillna('Unknown')

## Numeric Columns
numeric_columns = data.select_dtypes(include=['number']).columns
data[numeric_columns] = data[numeric_columns].fillna(data[numeric_columns].mean())

# Define features and labels for heart disease prediction
X = data.drop(columns=['Has VSD', 'Other Condition', 'Severity'])
y_vsd = data['Has VSD']                # Target for VSD detection
y_condition = data['Other Condition']  # Target for other conditions
y_severity = data['Severity']          # Target for VSD severity

# One-hot encoding for categorical features
X = pd.get_dummies(X, columns=['Cholesterol'], drop_first=True)

# Split data into training and testing sets
X_train, X_test, y_vsd_train, y_vsd_test = train_test_split(X, y_vsd, test_size=0.2, random_state=42)
_, _, y_condition_train, y_condition_test = train_test_split(X, y_condition, test_size=0.2, random_state=42)
_, _, y_severity_train, y_severity_test = train_test_split(X, y_severity, test_size=0.2, random_state=42)

# Train Models
## VSD Prediction
vsd_model = DecisionTreeClassifier(random_state=42)
vsd_model.fit(X_train, y_vsd_train)

## Condition Prediction
condition_model = RandomForestClassifier(random_state=42, n_estimators=100)
condition_model.fit(X_train, y_condition_train)

## Severity Prediction
severity_model = RandomForestClassifier(random_state=42, n_estimators=100)
severity_model.fit(X_train, y_severity_train)

# Example patient data
patient_data = [[170, 50.93, 110.15, 2.76, 99.59, 51.79, 76, 0, 0, 90, 75, 1, 1]]
patient_df = pd.DataFrame(patient_data, columns=X.columns)  # Create a DataFrame with matching column names

# Predict VSD
vsd_prediction = vsd_model.predict(patient_df)[0]
vsd_result = "Has VSD" if vsd_prediction == 1 else "VSD absent"
print(f"VSD Prediction: {vsd_result}")

# Predict Condition (if required)
condition_prediction = condition_model.predict(patient_df)[0]
print(f"Condition Prediction: {condition_prediction}")

# Predict Severity
severity_prediction = severity_model.predict(patient_df)[0]
print(f"Severity Prediction: {severity_prediction}")

# Accuracy Evaluation
vsd_accuracy = accuracy_score(y_vsd_test, vsd_model.predict(X_test))
severity_accuracy = accuracy_score(y_severity_test, severity_model.predict(X_test))

print(f"VSD Prediction Accuracy: {vsd_accuracy:.2f}")
print(f"Severity Prediction Accuracy: {severity_accuracy:.2f}")

ModuleNotFoundError: No module named 'flask_cors'