# GP Initial Implementation: pyro

In [4]:
# Import needed libraries and modules
from codecarbon import EmissionsTracker
import numpy as np
import torch
import pyro
import pyro.contrib.gp as gp
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

# Fetch dataset from UCI Repository
from ucimlrepo import fetch_ucirepo
heart_disease = fetch_ucirepo(id=45)
df = heart_disease.data.original

In [5]:
# ---------------------------------------------------------------------------- #
#                                PRE-PROCESSING                                #
# ---------------------------------------------------------------------------- #

# --------------------------------- SETTINGS --------------------------------- #

##### SETTINGS #####
PC_Features = True
Random_Seed = 82024
K_Folds = 10
Max_Iterations = 200
####################

# Drop missing values
df = df.dropna()
df = df.reset_index(drop=True)

# Binarize target
df.loc[df['num'] != 0, 'num'] = 1

# Define features and target vectors
X = df.iloc[:,:-1]
y = df['num']

# Separate integer from categorical features
int_features, cat_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak'],\
['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']

# Define preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('int', StandardScaler(), int_features),
        ('cat', OneHotEncoder(), cat_features)
    ])

# Define pipeline depending on whether PCA is requested or not
if PC_Features:
    preprocessor = Pipeline([
        ('preprocessor', preprocessor),
        ('pca', PCA(n_components=12))
    ])

In [6]:
# ---------------------------------------------------------------------------- #
#                                     MODEL                                    #
# ---------------------------------------------------------------------------- #

# Initiate CodeCarbon to track emissions
tracker = EmissionsTracker('GP pyro model', log_level='warning')
tracker.start()

# Set random seed for reproducibility
pyro.set_rng_seed(Random_Seed)

kfold = KFold(n_splits=K_Folds, shuffle=True, random_state=Random_Seed)
roc_aucs, accs = [], []

for train_idx, test_idx in kfold.split(X):
    # Split data into training and testing sets
    train_X, test_X = X.iloc[train_idx], X.iloc[test_idx]
    train_y, test_y = y.iloc[train_idx], y.iloc[test_idx]
    
    # Preprocess data
    train_X = preprocessor.fit_transform(train_X)
    test_X = preprocessor.transform(test_X)
    
    # Convert to PyTorch tensors
    train_X = torch.tensor(train_X, dtype=torch.float32)
    train_y = torch.tensor(train_y.values, dtype=torch.float32)
    test_X = torch.tensor(test_X, dtype=torch.float32)
    test_y = torch.tensor(test_y.values, dtype=torch.float32)
    
    # Clear Pyro parameters for each fold
    pyro.clear_param_store()
    
    # Define model
    kernel = gp.kernels.RBF(input_dim=train_X.shape[-1])
    likelihood = gp.likelihoods.Binary()
    model = gp.models.VariationalGP(
        train_X,
        train_y, 
        kernel,
        likelihood=likelihood,
        whiten=True,
        jitter=1e-04
    )
    
    # Train model
    gp.util.train(model, num_steps=Max_Iterations)
    
    # Evaluate model
    model.eval()
    with torch.no_grad():
        mean, var = model(test_X)
        test_pred = model.likelihood(mean, var)
        pred_probs = test_pred.detach().numpy()
        roc_aucs.append(roc_auc_score(test_y.numpy(), pred_probs))
        accs.append(accuracy_score(test_y.numpy(), (pred_probs > 0.5).astype(int)))

# Calculate and display results
acc = np.mean(accs)
acc_std = np.std(accs)
roc_auc = np.mean(roc_aucs)
roc_auc_std = np.std(roc_aucs)

print(f"Accuracy: {acc:.4f} ± {acc_std:.4f}")
print(f"AUC-ROC: {roc_auc:.4f} ± {roc_auc_std:.4f}")

# Stop emission tracking
_ = tracker.stop()

[codecarbon ERROR @ 19:28:56] Error: Another instance of codecarbon is already running. Turn off the other instance to be able to run this one. Exiting.




Accuracy: 0.7309 ± 0.0657
AUC-ROC: 0.7383 ± 0.0670
