In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('C:\\Users\\Hardik\\Desktop\\ML_Pipeline\\03-supervised-models\\heart_cleveland_upload.csv')

In [None]:
X = df.drop('condition', axis=1)
y = df['condition']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Scale features (optional, good for RF performance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

In [None]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

In [None]:
# Evaluate
y_pred_dt = dt.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)


In [None]:
# Evaluate
y_pred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

In [None]:
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

plt.figure(figsize=(20,10))
plot_tree(dt, filled=True, feature_names=df.columns[:-1], class_names=["No Disease", "Disease"])
plt.title("Decision Tree Structure")
plt.savefig("decision_tree_visualization.png", dpi=300, bbox_inches='tight')  # Save the figure
plt.show()


In [None]:
plt.figure(figsize=(20,10))
plot_tree(rf.estimators_[0], filled=True, feature_names=df.columns[:-1], class_names=["No Disease", "Disease"])
plt.title("First Tree from Random Forest")
plt.savefig("random_forest_tree_0.png", dpi=300, bbox_inches='tight')  # Save the figure
plt.show()


In [None]:
import seaborn as sns
import pandas as pd

importances = pd.Series(rf.feature_importances_, index=df.columns[:-1])
plt.figure(figsize=(10, 6))
importances.nlargest(10).plot(kind='barh')
plt.title("Top 10 Important Features - Random Forest")
plt.xlabel("Feature Importance")
plt.ylabel("Features")
plt.savefig("rf_feature_importance.png", dpi=300, bbox_inches='tight')  # Save the figure
plt.show()
