In [None]:
!pip install tensorflow
!pip install tensorflow-probability
!pip install numpy



In [None]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
#data = pd.read_csv('https://raw.githubusercontent.com/harpreetSinghGuller/Data-Science-R/master/SAHeart.csv')
df = pd.read_csv("SAHeart.csv")
df["famhist"] = df["famhist"].map({"Absent": 0, "Present": 1})


# Split the dataset into features and target
X = df.drop('chd', axis=1)
y = df['chd']

# Drop any missing values
##df.dropna(inplace=True)


# Normalize the features
## X = (X - X.mean(axis=0)) / X.std(axis=0)



In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Convert the inputs to the expected data type
X_train = tf.cast(X_train, tf.float32)
y_train = tf.cast(y_train, tf.float32)
X_test = tf.cast(X_test, tf.float32)
y_test = tf.cast(y_test, tf.float32)

In [None]:
np.random.seed(0)
tf.random.set_seed(0)


tfd = tfp.distributions
tfb = tfp.bijectors

In [None]:
num_layers = 10
my_bijects = []
ndim = 11  # Adjust this to match the number of features in your dataset

zdist = tfd.MultivariateNormalDiag(loc=[0.0] * ndim)

# loop over desired bijectors and put into list
for i in range(num_layers):
    # Syntax to make a MAF
    anet = tfb.AutoregressiveNetwork(
        params=ndim, hidden_units=[128, 128], activation="relu"
    )
    ab = tfb.MaskedAutoregressiveFlow(anet)
    # Add bijector to list
    my_bijects.append(ab)
    # Now permuate (!important!)
    permute = tfb.Permute(list(range(ndim-1, -1, -1)))  # Adjust this to match the number of features in your dataset
    my_bijects.append(permute)

# put all bijectors into one "chain bijector"
# that looks like one
big_bijector = tfb.Chain(my_bijects)

# make transformed dist
td = tfd.TransformedDistribution(zdist, bijector=big_bijector)

In [None]:
import tensorflow as tf

# Define the loss function
negloglik = lambda y, rv_y: -rv_y.log_prob(y)

# Define the model
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(9),
  tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t[..., :1],
                                                     scale=1e-5 + tf.math.softplus(0.05 * t[...,1:]))),
])

# Compile the model
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)

# Train the model
model.fit(X_train, y_train, epochs=100, verbose=False)


<keras.src.callbacks.History at 0x7d5f022424d0>

In [None]:
# Evaluate the model
model.evaluate(X_test, y_test)



0.6896783709526062

In [None]:
# Make predictions
y_pred = model.predict(X_test)



In [None]:
from sklearn.metrics import classification_report

y = df['chd']
# Convert the predictions to binary format
y_pred_binary = [1 if y > 0.5 else 0 for y in y_pred]

# Print the classification report
print(classification_report(y_test, y_pred_binary))

              precision    recall  f1-score   support

         0.0       0.73      0.86      0.79        59
         1.0       0.65      0.44      0.53        34

    accuracy                           0.71        93
   macro avg       0.69      0.65      0.66        93
weighted avg       0.70      0.71      0.69        93



In [None]:
# Gaussian Mixture Model (GMM)
gmm = GaussianMixture(n_components=5)
gmm.fit(X_train_scaled)

# Support Vector Machine (SVM)
svm = SVC()
svm.fit(X_train_scaled, y_train)

# Logistic Regression (LR)
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

# Random Forest (RF)
rf = RandomForestClassifier()
rf.fit(X_train_scaled, y_train)

# MLP (Multi-Layer Perceptron)
mlp = MLPClassifier()
mlp.fit(X_train_scaled, y_train)

# XGBoost
xgb_model = xgb.XGBClassifier()
xgb_model.fit(X_train_scaled, y_train)


In [None]:
# Evaluate accuracy
models = {
    "GMM": gmm,
    "SVM": svm,
    "LR": lr,
    "RF": rf,
    "MLP": mlp,
    "XGBoost": xgb_model
}

accuracies = {}

for model_name, model in models.items():
    if model_name == "LDA":
        X_test_transformed = model.transform(X_test_scaled)
    else:
        X_test_transformed = X_test_scaled
    y_pred = model.predict(X_test_transformed)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies[model_name] = accuracy

# Print accuracies
for model_name, accuracy in accuracies.items():
    print(f"{model_name}: {accuracy}")

GMM: 0.6129032258064516
SVM: 0.7526881720430108
LR: 0.7849462365591398
RF: 0.7096774193548387
MLP: 0.7419354838709677
XGBoost: 0.7096774193548387
