In [6]:
import pyro
import torch
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam

# Load your spam dataset
# For the sake of this example, let's assume it's a list of tuples (text, label)
data = [("Free money!!!", 1), ("How are you?", 0), ...]

# Convert text to numerical data
vectorizer = CountVectorizer()
X, y = zip(*data)
X = vectorizer.fit_transform(X).toarray()
y = torch.tensor(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


# Define the model and guide
def model(data):
    # Prior
    alpha0 = torch.ones(2)
    beta0 = torch.ones(2)
    f = pyro.sample("latent_fairness", pyro.distributions.Beta(alpha0, beta0))

    # Likelihood
    with pyro.plate("data", len(data)):
        pyro.sample("obs", pyro.distributions.Bernoulli(f), obs=data)


def guide(data):
    # Variational parameters
    alpha_q = pyro.param(
        "alpha_q", torch.tensor(15.0), constraint=pyro.constraints.positive
    )
    beta_q = pyro.param(
        "beta_q", torch.tensor(15.0), constraint=pyro.constraints.positive
    )

    pyro.sample("latent_fairness", pyro.distributions.Beta(alpha_q, beta_q))


# Train the model
svi = SVI(model, guide, Adam({"lr": 0.005}), loss=Trace_ELBO())

num_iterations = 1000
for j in range(num_iterations):
    loss = svi.step(y_train)
    if j % 100 == 0:
        print("[iteration %04d] loss: %.4f" % (j + 1, loss / len(y_train)))

# Test the model
y_pred = []
for i in range(len(X_test)):
    sampled_model = pyro.poutine.trace(guide).get_trace(X_test[i])
    y_pred.append(round(sampled_model.nodes["latent_fairness"]["value"].item()))

# Calculate accuracy
accuracy = (y_test == y_pred).sum().item() / len(y_test)
print("Accuracy: ", accuracy)