# GP Initial Implementation: pyro

In [1]:
# Import needed libraries and modules
from codecarbon import EmissionsTracker
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import roc_auc_score, accuracy_score
import torch
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
import pyro
import pyro.contrib.gp as gp

# Fetch dataset from UCI Repository
from ucimlrepo import fetch_ucirepo
heart_disease = fetch_ucirepo(id=45)
df = heart_disease.data.original

In [2]:
# ---------------------------------------------------------------------------- #
#                                PRE-PROCESSING                                #
# ---------------------------------------------------------------------------- #

# --------------------------------- SETTINGS --------------------------------- #
Normalize = True
PC_Features = True
Test_Size = 0.2
Random_Seed = 82024
Torch = True
Num_iterations = 500
Cross_Validation = True

# Drop missing values
df = df.dropna()
df = df.reset_index(drop=True)

# Binarize target
df.loc[df['num'] != 0, 'num'] = 1

# Define features and target vectors
X = df.iloc[:,:-1]
y = df['num']

# Normalize if requested
if (Normalize) or (PC_Features):
    int_features, cat_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak'],\
    ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']
    
    preprocessor = ColumnTransformer(
    transformers=[
        ('int', StandardScaler(), int_features),
        ('cat', OneHotEncoder(), cat_features)
    ])
    X = preprocessor.fit_transform(X)
else:
    X = X.values

# Apply PCA if requested
if PC_Features:
    pca = PCA(n_components=12)
    X = pca.fit_transform(X)

# Convert to torch tensor if requested
if Torch:
    X = torch.tensor(X)
    y = torch.tensor(y).double()

if not Cross_Validation:
    # Split train and test data
    index = list(range(y.size))
    train_index, test_index = train_test_split(index, test_size=Test_Size, random_state=Random_Seed)

    train_X = X[train_index]
    train_y = y[train_index]

    test_X = X[test_index]
    test_y = y[test_index]

In [3]:
# ---------------------------------------------------------------------------- #
#                                     MODEL                                    #
# ---------------------------------------------------------------------------- #
# Initiate CodeCarbon to track emissions
tracker = EmissionsTracker('GP pyro model', log_level='warning')
tracker.start()

if Cross_Validation:
    kf = KFold(shuffle=True, random_state=Random_Seed)
    roc_aucs = []
    accuracies = []

    for train_index, test_index in kf.split(X):
        train_X, test_X = X[train_index], X[test_index]
        train_y, test_y = y[train_index], y[test_index]
        
        # Create model
        kernel = gp.kernels.RBF(input_dim=X.shape[-1])
        pyro.clear_param_store()
        likelihood = gp.likelihoods.Binary()
        model = gp.models.VariationalGP(
            train_X,
            train_y, 
            kernel,
            likelihood=likelihood,
            whiten=True,
            jitter=1e-04
        )
        
        # Train the model
        loss = gp.util.train(model, num_steps=Num_iterations)

        # Test model
        mean, var = model(test_X)
        y_pred = model.likelihood(mean, var)
        pred_probs = y_pred.detach().numpy()
        roc_auc = roc_auc_score(test_y.numpy(), pred_probs)
        accuracy = accuracy_score(test_y.numpy(), (pred_probs > 0.5).astype(int))
        
        roc_aucs.append(roc_auc)
        accuracies.append(accuracy)

    # Calculate mean metrics
    roc_auc = np.mean(roc_aucs)
    acc = np.mean(accuracies)
else:
    # Create model
    kernel = gp.kernels.RBF(input_dim=X.shape[1])
    pyro.clear_param_store()
    likelihood = gp.likelihoods.Gaussian # Binary()
    model = gp.models.VariationalGP(
        train_X,
        train_y, 
        kernel,
        likelihood=likelihood,
        whiten=True,
        jitter=1e-03
    )

    # Train model
    loss = gp.util.train(model, num_steps=Num_iterations)

    # Plot loss values
    plt.plot(loss)
    plt.xlabel("Iterations")
    plt.ylabel("Loss")
    plt.show()

    # Test model
    mean, var = model(test_X)
    y_pred = model.likelihood(mean, var)
    pred_probs = y_pred.detach().numpy()

    # Evaluation
    acc = accuracy_score(test_y, y_pred)
    roc_auc = roc_auc_score(test_y.numpy(), pred_probs)

print(f"Accuracy: {acc:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")

tracker.stop()

 Linux OS detected: Please ensure RAPL files exist at \sys\class\powercap\intel-rapl to measure CPU



Accuracy: 0.7911
ROC AUC: 0.7957


1.6384453468841678e-05