In [1]:
import pymc as pm
import numpy as np
import arviz as az
import pandas as pd



In [2]:
data = pd.read_csv("CleanDataset.csv")

In [None]:
features = data[['Age', 'MonthlyIncome']].values
features = (features - np.mean(features, axis=0)) / np.std(features, axis=0)

In [5]:
K = 3  # Number of clusters
D = features.shape[1]  # Number of features

In [6]:
with pm.Model() as model:
    # Hyperparameters for the DP
    alpha = pm.Gamma('alpha', 1., 1.)
    # Stick-breaking process to define weights
    beta = pm.Beta('beta', 1., alpha, shape=len(features))
    w = pm.Deterministic('w', pm.math.concatenate([[beta[0]], beta[1:] * (1 - beta[:-1]).cumprod()]))
    
    # Priors for cluster centers and spreads
    mu = pm.Normal('mu', mu=0, sigma=1, shape=(K, D))
    sigma = pm.HalfCauchy('sigma', beta=1, shape=(K, D))
    
    # Observations model, mixture of Gaussians
    y_obs = pm.NormalMixture('y_obs', w=w, mu=mu, sigma=sigma, observed=features)
    
    # Draw posterior samples using variational inference for faster execution
    trace = pm.fit(10000, method='advi').sample(500)


ValueError: Alloc static input type and target shape are incompatible: Vector(float64, shape=(5180,)) vs (3, 2)

In [None]:
# Visualize the results
az.plot_trace(trace)
az.summary(trace)