# Import libraries

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import laplace, norm
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import OneClassSVM
from hmmlearn import hmm
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

# Data generation (realistic synthetic)

In [2]:
np.random.seed(42)
T, N_train, N_test = 50, 200, 100
alpha_true = 0.8
mu0, sigma0 = 0.0, 1.5
mu1, sigma1 = 1.0, 1.5

def generate_sequence(mu, sigma, alpha, T):
    x = np.zeros(T)
    for t in range(1, T):
        mean_t = mu + alpha * x[t-1]
        x[t] = mean_t + np.random.laplace(0, sigma/np.sqrt(2))
    return x

def generate_dataset(mu, sigma, alpha, T, N):
    return np.array([generate_sequence(mu, sigma, alpha, T) for _ in range(N)])

# gera treino + teste
X0 = generate_dataset(mu0, sigma0, alpha_true, T, N_train+N_test)
X1 = generate_dataset(mu1, sigma1, alpha_true, T, N_train+N_test)
y0 = np.zeros(N_train+N_test); y1 = np.ones(N_train+N_test)

X = np.vstack((X0, X1))
y = np.hstack((y0, y1))
perm = np.random.permutation(len(y))
X, y = X[perm], y[perm]

X_train = np.vstack((X0[:N_train], X1[:N_train]))
y_train = np.hstack((y0[:N_train], y1[:N_train]))
X_test  = np.vstack((X0[N_train:], X1[N_train:]))
y_test  = np.hstack((y0[N_train:], y1[N_train:]))


# Estima parâmetros TNB
def estimate_parameters(X, n_iter=5):
    mu = np.mean(X[:,1:])
    alpha = 0.0
    for _ in range(n_iter):
        num = np.sum((X[:,1:]-mu)*X[:,:-1])
        den = np.sum((X[:,:-1]-mu)**2)
        alpha = 0.0 if den==0 else num/den
        mu = np.mean(X[:,1:] - alpha*X[:,:-1])
    residuals = X[:,1:] - (mu + alpha*X[:,:-1])
    sigma = np.sqrt(np.mean(residuals**2))
    return mu, sigma, alpha

mu0_hat, sigma0_hat, alpha0_hat = estimate_parameters(X_train[y_train==0])
mu1_hat, sigma1_hat, alpha1_hat = estimate_parameters(X_train[y_train==1])

# Define models

In [4]:
# Classic NB (Gaussian)
gnb = GaussianNB().fit(X_train, y_train)
y_nb = gnb.predict(X_test)
p_nb = gnb.predict_proba(X_test)[:,1]

# HMM
models_hmm = {}
for cls, (m_hat, s_hat) in enumerate([(mu0_hat, sigma0_hat),
                                       (mu1_hat, sigma1_hat)]):
    model = hmm.GaussianHMM(n_components=2,
                            covariance_type='diag',
                            n_iter=100,
                            random_state=42)
    # treina concatenando as janelas da classe
    Xc = X_train[y_train == cls]
    Xc_cat = Xc.reshape(-1, 1)
    lengths = [T] * len(Xc)
    model.fit(Xc_cat, lengths)
    models_hmm[cls] = model

# pontua cada janela de teste separadamente
scores0 = []
scores1 = []
for x in X_test:
    x_cat = x.reshape(-1, 1)
    scores0.append(models_hmm[0].score(x_cat, [T]))
    scores1.append(models_hmm[1].score(x_cat, [T]))
scores0 = np.array(scores0)
scores1 = np.array(scores1)

# predições e probabilidades (proxy)
y_hmm = (scores1 > scores0).astype(int)
p_hmm = scores1

# One-Class SVM
oc0 = OneClassSVM(gamma='auto').fit(X_train[y_train==0])
oc1 = OneClassSVM(gamma='auto').fit(X_train[y_train==1])
s0 = oc0.decision_function(X_test)
s1 = oc1.decision_function(X_test)
y_svm = (s1 > s0).astype(int)
p_svm = s1

# Temporal Naive Bayes
def tnb_predict(X):
    preds, scores = [], []
    for x in X:
        ll0 = np.log(0.5); ll1 = np.log(0.5)
        for t in range(1, len(x)):
            ll0 += norm.logpdf(x[t], loc=mu0_hat + alpha0_hat*x[t-1], scale=sigma0_hat)
            ll1 += norm.logpdf(x[t], loc=mu1_hat + alpha1_hat*x[t-1], scale=sigma1_hat)
        preds.append(int(ll1>ll0))
        scores.append(ll1 - ll0)
    return np.array(preds), np.array(scores)

y_tnb, p_tnb = tnb_predict(X_test)

# Monta resultados e salva CSV
models = ["GaussianNB","HMM","OneClassSVM","TNB (Laplace)"]
res = []
for name, y_pred, p in zip(models,[y_nb,y_hmm,y_svm,y_tnb],[p_nb,p_hmm,p_svm,p_tnb]):
    res.append({
        "model":   name,
        "accuracy": accuracy_score(y_test, y_pred),
        "f1":       f1_score(y_test, y_pred),
        "auc":      roc_auc_score(y_test, p)
    })

df = pd.DataFrame(res)
df.to_csv("realistic_benchmark_results.csv", index=False)
print(df)

           model  accuracy        f1     auc
0     GaussianNB     1.000  1.000000  1.0000
1            HMM     0.985  0.985222  0.9720
2    OneClassSVM     0.815  0.843882  0.9935
3  TNB (Laplace)     0.960  0.961165  0.9981
