In [None]:
!pip install pandas numpy matplotlib seaborn scikit-learn joblib


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV
import joblib

In [None]:
# Load the dataset
df = pd.read_csv('heart.csv')

# Display first 5 rows
df.head()


In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

In [None]:
sns.countplot(x='target', data=df)
plt.title('Distribution of Heart Disease Cases')
plt.show()

In [None]:
df.isnull().sum()

In [None]:
df = df.drop_duplicates()

In [None]:
X = df.drop(['target', 'age_group'], axis=1)  # Features
y = df['target']  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)

In [None]:
y_pred = rf.predict(X_test_scaled)

In [None]:
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['No Disease', 'Disease'], 
            yticklabels=['No Disease', 'Disease'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()