In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


import warnings
warnings.simplefilter("ignore", category=FutureWarning)
warnings.simplefilter("ignore", category=UserWarning)

In [None]:
df = pd.read_csv('/kaggle/input/heart-disease-prediction-using-logistic-regression/framingham.csv')
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='most_frequent')
df['education'] = imputer.fit_transform(df[['education']])

In [None]:
df.isnull().sum()

In [None]:
smoker_mean = df.loc[df['currentSmoker'] == 1, 'cigsPerDay'].mean()
df.loc[(df['currentSmoker'] == 1) & (df['cigsPerDay'].isnull()), 'cigsPerDay'] = smoker_mean
df.loc[(df['currentSmoker'] == 0) & (df['cigsPerDay'].isnull()), 'cigsPerDay'] = 0.0

In [None]:
df.isnull().sum()

In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='most_frequent')
df['BPMeds'] = imputer.fit_transform(df[['BPMeds']])

In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='median')
df['totChol'] = imputer.fit_transform(df[['totChol']])

In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='median')
df['BMI'] = imputer.fit_transform(df[['BMI']])

In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='median')
df['glucose'] = imputer.fit_transform(df[['glucose']])

In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='median')
df['heartRate'] = imputer.fit_transform(df[['heartRate']])

In [None]:
df.isnull().sum()

In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Matrix")
plt.show()

In [None]:
sns.histplot(df["glucose"], kde=True, bins=30)
plt.title("Glucose Level Distribution")
plt.xlabel("Glucose")
plt.ylabel("Frequency")
plt.show()

In [None]:
sns.scatterplot(data=df, x="BMI", y="glucose")
plt.title("BMI vs Glucose")
plt.xlabel("BMI")
plt.ylabel("Glucose")
plt.show()

In [None]:
sns.boxplot(data=df, x="male", y="glucose")
plt.title("Glucose by Gender")
plt.xlabel("Gender")
plt.ylabel("Glucose")
plt.xticks([0, 1], ["Female", "Male"])
plt.show()

In [None]:
target = "glucose"
X = df.drop(columns=[target])
y = df[target]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel("Actual Glucose")
plt.ylabel("Predicted Glucose")
plt.title("Actual vs Predicted Glucose Values")
plt.grid(True)
plt.show()

In [None]:
from sklearn.tree import DecisionTreeRegressor
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)

In [None]:
y_pred_dt = dt_model.predict(X_test)

In [None]:
mse_dt = mean_squared_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)

print("Decision Tree MSE:", mse_dt)
print("Decision Tree R² Score:", r2_dt)

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred_dt, alpha=0.5, color="green")
plt.xlabel("Actual Glucose")
plt.ylabel("Predicted Glucose")
plt.title("Decision Tree - Actual vs Predicted Glucose")
plt.grid(True)
plt.show()

In [None]:
print("Tree Depth:", dt_model.get_depth())
print("Number of Leaves:", dt_model.get_n_leaves())

In [None]:
results = pd.DataFrame({
    "Model": ["Linear Regression", "Decision Tree"],
    "MSE": [mse, mse_dt],
    "R²": [r2, r2_dt]
})
results

In [None]:
results.set_index("Model").plot(kind="bar", figsize=(8,5), title="Model Comparison")
plt.ylabel("Score")
plt.xticks(rotation=0)
plt.grid(True)
plt.show()

In [None]:
importances = dt_model.feature_importances_
features = X.columns

plt.figure(figsize=(10, 6))
sns.barplot(x=importances, y=features)
plt.title("Feature Importances (Decision Tree)")
plt.xlabel("Importance")
plt.ylabel("Feature")
plt.show()