In [5]:
import numpy as np
from hmmlearn import hmm
from sklearn_crfsuite import CRF
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# Toy dataset for sequence tagging
X = [['walk', 'in', 'the', 'park'],
 ['eat', 'apple'],
 ['eat', 'apple', 'in', 'the', 'morning']]
y = [['V', 'P', 'D', 'N'],
 ['V', 'N'],
 ['V', 'N', 'P', 'D', 'N']]
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Hidden Markov Model (HMM)
hmm_model = hmm.MultinomialHMM(n_components=3) # Number of states
hmm_model.fit(np.concatenate(X_train), [len(seq) for seq in X_train], [item for sublist in y_train for item in sublist])
# Conditional Random Fields (CRF)
crf_model = CRF()
crf_model.fit(X_train, y_train)
# Evaluation
print("HMM Results:")
hmm_pred = hmm_model.predict(np.concatenate(X_test), [len(seq) for seq in X_test])
print(classification_report([item for sublist in y_test for item in sublist], [item for sublist in
hmm_pred for item in sublist]))
print("\nCRF Results:")
crf_pred = crf_model.predict(X_test)
print(classification_report([item for sublist in y_test for item in sublist], [item for sublist in crf_pred for item in sublist]))

MultinomialHMM has undergone major changes. The previous version was implementing a CategoricalHMM (a special case of MultinomialHMM). This new implementation follows the standard definition for a Multinomial distribution (e.g. as in https://en.wikipedia.org/wiki/Multinomial_distribution). See these issues for details:
https://github.com/hmmlearn/hmmlearn/issues/335
https://github.com/hmmlearn/hmmlearn/issues/340


TypeError: _AbstractHMM.fit() takes from 2 to 3 positional arguments but 4 were given

In [2]:

pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.3.3-cp311-cp311-win_amd64.whl.metadata (3.1 kB)
Downloading hmmlearn-0.3.3-cp311-cp311-win_amd64.whl (127 kB)
Installing collected packages: hmmlearn
Successfully installed hmmlearn-0.3.3
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
pip install sklearn-crfsuite

Note: you may need to restart the kernel to use updated packages.Collecting sklearn-crfsuite
  Downloading sklearn_crfsuite-0.5.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting python-crfsuite>=0.9.7 (from sklearn-crfsuite)
  Downloading python_crfsuite-0.9.11-cp311-cp311-win_amd64.whl.metadata (4.4 kB)
Collecting tabulate>=0.4.2 (from sklearn-crfsuite)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading sklearn_crfsuite-0.5.0-py2.py3-none-any.whl (10 kB)
Downloading python_crfsuite-0.9.11-cp311-cp311-win_amd64.whl (301 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate, python-crfsuite, sklearn-crfsuite
Successfully installed python-crfsuite-0.9.11 sklearn-crfsuite-0.5.0 tabulate-0.9.0




[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import numpy as np
from hmmlearn import hmm
from sklearn_crfsuite import CRF
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

# Data
x = [['walk', 'in', 'the', 'park'], ['eat', 'apple'], ['eat', 'apple', 'in', 'the', 'morning']]
y = [['V', 'P', 'D', 'N'], ['V', 'N'], ['V', 'N', 'P', 'D', 'N']]

# Encode for HMM
we, le = LabelEncoder(), LabelEncoder()
x_flat, y_flat = [w for s in x for w in s], [t for s in y for t in s]
x_enc, y_enc = [we.fit_transform(x_flat)], [le.fit_transform(y_flat)]

# Split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
xh_train = np.concatenate([we.transform(s) for s in x_train]).reshape(-1, 1)
lengths = [len(s) for s in x_train]

# HMM
model_hmm = hmm.MultinomialHMM(n_components=3, n_iter=100)
model_hmm.fit(xh_train, lengths)
xh_test = np.concatenate([we.transform(s) for s in x_test]).reshape(-1, 1)
pred_hmm = model_hmm.predict(xh_test)

# Flatten true/pred
y_test_flat = le.transform([t for s in y_test for t in s])

y_test_labels = le.inverse_transform(y_test_flat)
pred_labels = le.inverse_transform(pred_hmm)

# Print the classification report with label names
print("HMM:\n", classification_report(y_test_labels, pred_labels, labels=le.classes_))






MultinomialHMM has undergone major changes. The previous version was implementing a CategoricalHMM (a special case of MultinomialHMM). This new implementation follows the standard definition for a Multinomial distribution (e.g. as in https://en.wikipedia.org/wiki/Multinomial_distribution). See these issues for details:
https://github.com/hmmlearn/hmmlearn/issues/335
https://github.com/hmmlearn/hmmlearn/issues/340
Fitting a model with 8 free scalar parameters with only 7 data points will result in a degenerate solution.


HMM:
               precision    recall  f1-score   support

           D       0.00      0.00      0.00         1
           N       0.50      1.00      0.67         1
           P       0.00      0.00      0.00         1
           V       0.00      0.00      0.00         1

    accuracy                           0.25         4
   macro avg       0.12      0.25      0.17         4
weighted avg       0.12      0.25      0.17         4



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [3]:
# CRF
def feats(sent): 
    return [{'word': w} for w in sent]

xf_train = [feats(s) for s in x_train]
xf_test = [feats(s) for s in x_test]
crf = CRF()
crf.fit(xf_train, y_train)
pred_crf = crf.predict(xf_test)

print("CRF:\n", classification_report(
    [t for s in y_test for t in s],
    [t for s in pred_crf for t in s]
))

CRF:
               precision    recall  f1-score   support

           D       1.00      1.00      1.00         1
           N       0.50      1.00      0.67         1
           P       1.00      1.00      1.00         1
           V       0.00      0.00      0.00         1

    accuracy                           0.75         4
   macro avg       0.62      0.75      0.67         4
weighted avg       0.62      0.75      0.67         4



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
