In [1]:
!apt-get install -y graphviz
!pip install graphviz

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
graphviz is already the newest version (2.42.2-6ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import graphviz

df = pd.read_csv("heart.csv")

for column in df.columns:
    if df[column].dtype == 'object':
        df[column] = LabelEncoder().fit_transform(df[column])

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

dot_data = export_graphviz(dt, out_file=None, feature_names=X.columns, class_names=['0','1'], filled=True)
graph = graphviz.Source(dot_data)
graph.render("decision_tree")

y_pred_dt = dt.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_pred_dt)

dt_pruned = DecisionTreeClassifier(max_depth=4, random_state=42)
dt_pruned.fit(X_train, y_train)
y_pred_pruned = dt_pruned.predict(X_test)
accuracy_pruned = accuracy_score(y_test, y_pred_pruned)

rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)

importances = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)

cv_scores = cross_val_score(rf, X, y, cv=5)


In [3]:
print("Decision Tree Accuracy:", accuracy_dt)
print("Pruned Decision Tree Accuracy:", accuracy_pruned)
print("Random Forest Accuracy:", accuracy_rf)

print("\nFeature Importances (Random Forest):")
print(importances)

print("\nCross-Validation Scores (Random Forest):", cv_scores)
print("Mean CV Score:", cv_scores.mean())


Decision Tree Accuracy: 0.9853658536585366
Pruned Decision Tree Accuracy: 0.8
Random Forest Accuracy: 0.9853658536585366

Feature Importances (Random Forest):
cp          0.135072
ca          0.127327
thalach     0.122169
oldpeak     0.121905
thal        0.110518
age         0.077908
chol        0.074822
trestbps    0.071171
exang       0.057594
slope       0.045782
sex         0.028731
restecg     0.018557
fbs         0.008444
dtype: float64

Cross-Validation Scores (Random Forest): [1.         1.         1.         1.         0.98536585]
Mean CV Score: 0.9970731707317073
