In [7]:
import pymc as pm
import numpy as np
import pandas as pd

# Load the dataset
data = pd.read_csv('diabetes.csv')

# Assuming 'Outcome' is the binary target variable and all other columns are predictors
X = data.drop('Outcome', axis=1).values
y = data['Outcome'].values

n, p = X.shape  # n is the number of observations, p is the number of features

with pm.Model() as logistic_model:
    # Priors for the regression coefficients
    betas = pm.Normal('betas', mu=0, sigma=10, shape=p)
    # Priors for the intercept
    intercept = pm.Normal('intercept', mu=0, sigma=10)

    # Calculate probabilities using the logistic function
    logits = pm.math.dot(X, betas) + intercept
    p = pm.math.sigmoid(logits)

    # Bernoulli likelihood for the binary outcomes
    y_obs = pm.Bernoulli('y_obs', p=p, observed=y)

    # Perform MCMC
    trace = pm.sample(2000, tune=1000, return_inferencedata=False)
# Continue from the previous PyMC model setup...

with logistic_model:
    # Sample from the posterior
    trace = pm.sample(2000, tune=1000, return_inferencedata=False)

# Extracting the trace values for the betas and the intercept
betas_posterior = trace['betas']
intercept_posterior = trace['intercept']

# You can use the posterior samples of betas and the intercept to further analyze the model or make predictions
# For instance, you can calculate the mean of the posterior distributions
betas_mean = np.mean(betas_posterior, axis=0)
intercept_mean = np.mean(intercept_posterior)

print('Posterior means of betas:', betas_mean)
print('Posterior mean of intercept:', intercept_mean)

# Here you might want to calculate posterior predictive checks or other diagnostics manually using numpy
# For example, calculating the expected probabilities using the posterior means
expected_probabilities = 1 / (1 + np.exp(-(np.dot(X, betas_mean) + intercept_mean)))

# Assuming a threshold of 0.5 for classification
predicted_classes = expected_probabilities >= 0.5

# Posterior analysis
# Note: In practice, you would want to check trace plots, autocorrelation, and other diagnostics to assess convergence.
# Here we will just print the summary which includes the R-hat statistic for convergence diagnostics.




Posterior means of betas: [-0.29160427  0.62069775  0.07978702 -0.74919934 -0.23571071  0.83360848
 -0.33064008  0.32011737]
Posterior mean of intercept: -0.11595224118186646


In [8]:
import pymc as pm
import numpy as np
import pandas as pd

# Load the dataset
data = pd.read_csv('diabetes.csv')

# Assuming 'Outcome' is the binary target variable and all other columns are predictors
X = data.drop('Outcome', axis=1).values
y = data['Outcome'].values

n, p = X.shape  # n is the number of observations, p is the number of features

with pm.Model() as robust_regression_model:
    # Priors for the regression coefficients
    betas = pm.Normal('betas', mu=0, sigma=10, shape=p)
    # Priors for the intercept
    intercept = pm.Normal('intercept', mu=0, sigma=10)

    # Prior for the variance of the errors
    sigma = pm.HalfCauchy('sigma', beta=1)

    # Prior for the degrees of freedom of the t-distribution
    nu = pm.Gamma('nu', alpha=2, beta=0.1)

    # Expected value of outcome
    mu = pm.math.dot(X, betas) + intercept

    # Likelihood (sampling distribution) of observations
    y_obs = pm.StudentT('y_obs', nu=nu, mu=mu, sigma=sigma, observed=y)

    # Perform MCMC
    trace = pm.sample(2000, tune=1000, return_inferencedata=False)
# Extract the trace values for each parameter
betas_trace = trace.get_values('betas')
intercept_trace = trace.get_values('intercept')
sigma_trace = trace.get_values('sigma')
nu_trace = trace.get_values('nu')

# Calculate summary statistics for each parameter
betas_mean = np.mean(betas_trace, axis=0)
intercept_mean = np.mean(intercept_trace)
sigma_mean = np.mean(sigma_trace)
nu_mean = np.mean(nu_trace)

betas_std = np.std(betas_trace, axis=0)
intercept_std = np.std(intercept_trace)
sigma_std = np.std(sigma_trace)
nu_std = np.std(nu_trace)

print('Posterior means of betas:', betas_mean)
print('Posterior standard deviation of betas:', betas_std)
print('Posterior mean of intercept:', intercept_mean)
print('Posterior standard deviation of intercept:', intercept_std)
print('Posterior mean of sigma:', sigma_mean)
print('Posterior standard deviation of sigma:', sigma_std)
print('Posterior mean of nu:', nu_mean)
print('Posterior standard deviation of nu:', nu_std)


Posterior means of betas: [ 2.08422191e-02  6.13065643e-03 -2.37538397e-03  9.75741251e-05
 -1.79293496e-04  1.32545427e-02  1.53349166e-01  2.60393656e-03]
Posterior standard deviation of betas: [0.00538671 0.00052435 0.00079154 0.00112344 0.00015211 0.00208509
 0.04659565 0.00157752]
Posterior mean of intercept: -0.8826710589240888
Posterior standard deviation of intercept: 0.08715451363396334
Posterior mean of sigma: 0.39386327353214684
Posterior standard deviation of sigma: 0.010684959802735344
Posterior mean of nu: 46.928173635929234
Posterior standard deviation of nu: 17.946738566248687
