# 04 - GMM Baseline Comparison

Compare GMM clustering with HMM regime detection

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.models.gmm_baseline import train_gmm_model
from src.evaluation.compare_models import compare_models

sns.set_style('whitegrid')
%matplotlib inline

## Train GMM

In [None]:
df_scaled = pd.read_csv('../data/processed/features_scaled.csv')
df_scaled['date'] = pd.to_datetime(df_scaled['date'])

gmm_model, gmm_labels = train_gmm_model(df_scaled, n_components=4)

## Compare with HMM

In [None]:
df_clean = pd.read_csv('../data/processed/btc_clean.csv')
df_clean['date'] = pd.to_datetime(df_clean['date'])

hmm_labels_df = pd.read_csv('../results/tables/hmm_labels.csv')
gmm_labels_df = pd.read_csv('../results/tables/gmm_labels.csv')

comparison = compare_models(hmm_labels_df['state'].values, gmm_labels_df['cluster'].values, df_clean)

## Results Analysis

In [None]:
print('Model Comparison Results:')
print(f'Adjusted Rand Index: {comparison["adjusted_rand_index"]:.4f}')
print(f'Normalized Mutual Info: {comparison["normalized_mutual_info"]:.4f}')
print(f'\nHMM Transitions: {comparison["hmm_transitions"]}')
print(f'GMM Transitions: {comparison["gmm_transitions"]}')
print(f'\nHMM Avg Duration: {comparison["hmm_avg_duration"]:.1f} days')
print(f'GMM Avg Duration: {comparison["gmm_avg_duration"]:.1f} days')