# **Import necessary libraries**

In [9]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# **Load and Explore the Dataset**

In [10]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
column_names = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"]
data = pd.read_csv(url, names=column_names, na_values="?", delimiter=",")

# Drop rows with missing values
data = data.dropna()

# Explore the dataset
print(data.head())
print(data.info())

    age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
0  63.0  1.0  1.0     145.0  233.0  1.0      2.0    150.0    0.0      2.3   
1  67.0  1.0  4.0     160.0  286.0  0.0      2.0    108.0    1.0      1.5   
2  67.0  1.0  4.0     120.0  229.0  0.0      2.0    129.0    1.0      2.6   
3  37.0  1.0  3.0     130.0  250.0  0.0      0.0    187.0    0.0      3.5   
4  41.0  0.0  2.0     130.0  204.0  0.0      2.0    172.0    0.0      1.4   

   slope   ca  thal  target  
0    3.0  0.0   6.0       0  
1    2.0  3.0   3.0       2  
2    2.0  2.0   7.0       1  
3    3.0  0.0   3.0       0  
4    1.0  0.0   3.0       0  
<class 'pandas.core.frame.DataFrame'>
Int64Index: 297 entries, 0 to 301
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       297 non-null    float64
 1   sex       297 non-null    float64
 2   cp        297 non-null    float64
 3   trestbps  297 non-null    float64
 4   chol      297

# **Data Preprocessing**

In [11]:
# Convert categorical variables to one-hot encoding (if needed)
data = pd.get_dummies(data, columns=["cp", "restecg", "slope", "thal"])

# Split data into features (X) and labels (y)
X = data.drop('target', axis=1)
y = data['target']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **Create and Train the Classification Model**

In [12]:
# Initialize the Random Forest Classifier model
model = RandomForestClassifier(random_state=42)

# Fit the model on the training data
model.fit(X_train, y_train)

# **Make Predictions and Evaluate the Model**

In [13]:
# Predict on the test data
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.6166666666666667

Confusion Matrix:
[[36  0  0  0  0]
 [ 7  0  1  1  0]
 [ 2  1  1  1  0]
 [ 1  3  2  0  1]
 [ 2  1  0  0  0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.75      1.00      0.86        36
           1       0.00      0.00      0.00         9
           2       0.25      0.20      0.22         5
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         3

    accuracy                           0.62        60
   macro avg       0.20      0.24      0.22        60
weighted avg       0.47      0.62      0.53        60



# **Plot**

In [14]:
fig = px.scatter_3d(data, x='age', y='thalach', z='chol', color='target', opacity=0.7,
                    labels={'age': 'Age', 'thalach': 'Max Heart Rate', 'chol': 'Cholesterol', 'target': 'Target'})

fig.update_layout(scene=dict(xaxis_title='Age', yaxis_title='Max Heart Rate', zaxis_title='Cholesterol'))

fig.show()