# Heart Disease Prediction using Logistic Regression
This notebook builds a simple predictive model using logistic regression to determine the likelihood of heart disease based on medical features.

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Load the Dataset

In [None]:
# Path to the dataset (adjust if needed)
data_path = '/content/data.csv'
df = pd.read_csv(data_path)

## Dataset Overview

In [None]:
print("First 5 rows of the dataset:")
print(df.head())

print("\nDataset Summary:")
print(df.info())

print("\nMissing Values in Each Column:")
print(df.isnull().sum())

print("\nTarget Class Distribution:")
print(df['target'].value_counts())

## Feature-Target Separation

In [None]:
features = df.drop('target', axis=1)
labels = df['target']

## Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)

print(f"Feature Set Shape: {features.shape}")
print(f"Training Set Shape: {X_train.shape}")
print(f"Testing Set Shape: {X_test.shape}")

## Model Training

In [None]:
classifier = LogisticRegression()
classifier.fit(X_train, y_train)

## Model Evaluation

In [None]:
train_preds = classifier.predict(X_train)
test_preds = classifier.predict(X_test)

train_accuracy = accuracy_score(y_train, train_preds)
test_accuracy = accuracy_score(y_test, test_preds)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

## Make a Prediction on Sample Data

In [None]:
sample_input = (62, 0, 0, 140, 268, 0, 0, 160, 0, 3.6, 0, 2, 2)
sample_array = np.array(sample_input).reshape(1, -1)

prediction = classifier.predict(sample_array)[0]

if prediction == 1:
    print("Result: The individual is likely to have heart disease.")
else:
    print("Result: The individual is unlikely to have heart disease.")