<a href="https://colab.research.google.com/github/jorden17/Prediction_Sysytem/blob/main/Heart_Disease2_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
# Importing the Dependencies
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [20]:
# Load the Dataset
# Replace this with your actual dataset (e.g., heart disease dataset)
# Example: Using the UCI Heart Disease dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
columns = [
    "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang",
    "oldpeak", "slope", "ca", "thal", "target"
]
data = pd.read_csv(url, header=None, names=columns, na_values="?")


In [21]:
# Drop rows with missing values
data = data.dropna()

In [22]:
# Features and Target
X = data.drop("target", axis=1)
y = (data["target"] > 0).astype(int)  # Binary classification: 1 for heart disease, 0 for no heart disease


In [23]:

# Splitting the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [24]:
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [25]:
# Hyperparameter Tuning for Random Forest
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy', verbose=2)
grid_search.fit(X_train, y_train)


Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.2s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.2s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.4s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.2s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.2s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END max_de

In [26]:

# Best Parameters and Model
best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)


Best Parameters: {'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 200}


In [27]:
# Model Evaluation
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on Test Data:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy on Test Data: 0.8333333333333334

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.91      0.85        32
           1       0.88      0.75      0.81        28

    accuracy                           0.83        60
   macro avg       0.84      0.83      0.83        60
weighted avg       0.84      0.83      0.83        60


Confusion Matrix:
[[29  3]
 [ 7 21]]


In [28]:
# Save the Model and Scaler
joblib.dump(best_model, "heart_disease_model.sav")
joblib.dump(scaler, "scaler.sav")
print("Model and scaler saved as 'heart_disease_model.sav' and 'scaler.sav'.")


Model and scaler saved as 'heart_disease_model.sav' and 'scaler.sav'.


In [29]:
# Download the Model and Scaler
from google.colab import files
files.download("heart_disease_model.sav")
files.download("scaler.sav")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [30]:
import joblib
import numpy as np

# Load the model and scaler
model = joblib.load("heart_disease_model.sav")
scaler = joblib.load("scaler.sav")

# Example input
input_data = (63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1)  # Replace with actual input
input_data_as_numpy_array = np.asarray(input_data).reshape(1, -1)

# Scale the input data
input_data_scaled = scaler.transform(input_data_as_numpy_array)

# Make a prediction
prediction = model.predict(input_data_scaled)

# Output the result
if prediction[0] == 0:
    print("The person does not have heart disease")
else:
    print("The person has heart disease")

The person does not have heart disease


