In [None]:
# Import Libraries

In [None]:
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')

# Preprocessing & modeling
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.impute import SimpleImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, brier_score_loss
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, IsolationForest
import xgboost as xgb
import lightgbm as lgb
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
import joblib

# Unsupervised & explainability
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, AgglomerativeClustering
import umap
import shap

# Deep learning autoencoder
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Paths
PROJECT_DIR = Path('.')
DATA_DIR = PROJECT_DIR / 'data'
MODEL_DIR = PROJECT_DIR / 'models' / 'save'
DATA_DIR.mkdir(parents=True, exist_ok=True)
MODEL_DIR.mkdir(parents=True, exist_ok=True)
print('Model dir:', MODEL_DIR.resolve())

In [None]:
# Load dataset

In [None]:
possible_files = ['framingham.csv', 'Framingham.csv', 'framingham_heart.csv', 'framingham_heart_study.csv']
for f in possible_files:
p = DATA_DIR / f
if p.exists():
df = pd.read_csv(p)
print(f'Loaded {p.name} — shape:', df.shape)
break
else:
raise FileNotFoundError(f"No framingham csv found in {DATA_DIR}. Place the dataset file there with one of names: {possible_files}")

# quick overview
print(df.columns.tolist())
df.head()