# Import libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import OneClassSVM
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from hmmlearn import hmm
from scipy.stats import norm

# Data generation (ideal synthetic)

In [2]:
np.random.seed(42)
T = 50
# parâmetros verdadeiros usados na geração
alpha_true = 0.8
mu0, sigma0 = 0.0, 1.0
mu1, sigma1 = 2.0, 1.0
N_train, N_test = 200, 100

# Funções para gerar dados AR(1)
def generate_sequence(mu, sigma, alpha, T, laplace=False):
    x = np.zeros(T)
    for t in range(1, T):
        mean_t = mu + alpha * x[t-1]
        if laplace:
            x[t] = mean_t + np.random.laplace(0, sigma/np.sqrt(2))
        else:
            x[t] = mean_t + np.random.normal(0, sigma)
    return x

def generate_dataset(mu, sigma, alpha, T, N, laplace=False):
    return np.array([generate_sequence(mu, sigma, alpha, T, laplace)
                     for _ in range(N)])

# Gera conjuntos de treino/teste
X0 = generate_dataset(mu0, sigma0, alpha_true, T, N_train + N_test)
X1 = generate_dataset(mu1, sigma1, alpha_true, T, N_train + N_test)
y0 = np.zeros(N_train + N_test)
y1 = np.ones (N_train + N_test)

# embaralha e separa
perm0 = np.random.permutation(N_train + N_test)
perm1 = np.random.permutation(N_train + N_test)
X0, y0 = X0[perm0], y0[perm0]
X1, y1 = X1[perm1], y1[perm1]

X_train = np.vstack((X0[:N_train], X1[:N_train]))
y_train = np.hstack((y0[:N_train], y1[:N_train]))
X_test  = np.vstack((X0[N_train:], X1[N_train:]))
y_test  = np.hstack((y0[N_train:], y1[N_train:]))

# Estimativas de parâmetros
mu0_hat, sigma0_hat, alpha0_hat = mu0, sigma0, alpha_true
mu1_hat, sigma1_hat, alpha1_hat = mu1, sigma1, alpha_true

# Define models

In [4]:
# Classic NB (Gaussian)
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_nb = gnb.predict(X_test)
proba_nb  = gnb.predict_proba(X_test)[:,1]

# Hidden Markov Model (HMM)
models_hmm = {}
for cls, (mu_hat, sigma_hat) in enumerate([
        (mu0_hat, sigma0_hat),
        (mu1_hat, sigma1_hat)
    ]):
    model = hmm.GaussianHMM(n_components=2,
                            covariance_type='diag',
                            n_iter=100,
                            random_state=42)
    model.means_  = np.array([[mu_hat], [mu_hat + 0.5]])
    model.covars_ = np.array([[sigma_hat**2], [sigma_hat**2]])
    Xc = X_train[y_train == cls]
    Xc_concat = Xc.reshape(-1, 1)
    lengths   = [T] * Xc.shape[0]
    model.fit(Xc_concat, lengths)
    models_hmm[cls] = model

scores_hmm = np.zeros((len(X_test), 2))
for cls in [0, 1]:
    model = models_hmm[cls]
    # para cada janela em X_test, calculamos o log-likelihood
    scores_hmm[:, cls] = np.array([
        model.score(x.reshape(-1, 1))
        for x in X_test
    ])

y_pred_hmm = np.argmax(scores_hmm, axis=1)
proba_hmm  = scores_hmm[:, 1]  # proxy de “probabilidade”

# One-Class SVM
ocsvm0 = OneClassSVM(gamma='auto').fit(X_train[y_train==0])
ocsvm1 = OneClassSVM(gamma='auto').fit(X_train[y_train==1])
scores_svm = np.vstack([
    ocsvm0.decision_function(X_test),
    ocsvm1.decision_function(X_test)
]).T
y_pred_svm = np.argmax(scores_svm, axis=1)
proba_svm  = scores_svm[:,1]

# Temporal Naive Bayes (TNB)
def tnb_predict(X, params0, params1, prior=0.5):
    mu0, sigma0, alpha0 = params0
    mu1, sigma1, alpha1 = params1
    preds, scores = [], []
    for x in X:
        ll0 = np.log(prior); ll1 = np.log(prior)
        for t in range(1, len(x)):
            ll0 += norm.logpdf(x[t], loc=mu0+alpha0*x[t-1], scale=sigma0)
            ll1 += norm.logpdf(x[t], loc=mu1+alpha1*x[t-1], scale=sigma1)
        preds.append(0 if ll0>ll1 else 1)
        scores.append(ll1-ll0)
    return np.array(preds), np.array(scores)

params0 = (mu0_hat, sigma0_hat, alpha0_hat)
params1 = (mu1_hat, sigma1_hat, alpha1_hat)
y_pred_tnb, scores_tnb = tnb_predict(X_test, params0, params1)

# Agrupa métricas num DataFrame e salva em CSV
results = []
results.append({
    "model": "GaussianNB",
    "accuracy": accuracy_score(y_test, y_pred_nb),
    "f1":       f1_score(y_test, y_pred_nb),
    "auc":      roc_auc_score(y_test, proba_nb)
})
results.append({
    "model": "HMM",
    "accuracy": accuracy_score(y_test, y_pred_hmm),
    "f1":       f1_score(y_test, y_pred_hmm),
    "auc":      roc_auc_score(y_test, proba_hmm)
})
results.append({
    "model": "OneClassSVM",
    "accuracy": accuracy_score(y_test, y_pred_svm),
    "f1":       f1_score(y_test, y_pred_svm),
    "auc":      roc_auc_score(y_test, proba_svm)
})
results.append({
    "model": "TemporalNB",
    "accuracy": accuracy_score(y_test, y_pred_tnb),
    "f1":       f1_score(y_test, y_pred_tnb),
    "auc":      roc_auc_score(y_test, scores_tnb)
})

df = pd.DataFrame(results, columns=["model", "accuracy", "f1", "auc"])
df.to_csv("ideal_benchmark_results.csv", index=False)

print("Benchmark results saved to benchmark_results.csv")
print(df)



Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'
Even though the 'means_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'm'
Even though the 'covars_' attribute is set, it will be overwritten during initialization because 'init_params' contains 'c'


Benchmark results saved to benchmark_results.csv
         model  accuracy   f1  auc
0   GaussianNB       1.0  1.0  1.0
1          HMM       1.0  1.0  1.0
2  OneClassSVM       1.0  1.0  1.0
3   TemporalNB       1.0  1.0  1.0
