In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine, load_breast_cancer

wine1 = load_wine()
i = wine1.feature_names
wine_Y = wine1.target
wine = pd.DataFrame(data=wine1.data, columns=wine1.feature_names)

breast_cancer = load_breast_cancer()
b = breast_cancer.feature_names
breast_cancer_Y = breast_cancer.target
breast_cancer = pd.DataFrame(data=breast_cancer.data, columns=breast_cancer.feature_names)

ionosphere=pd.read_csv('https://raw.githubusercontent.com/hargurjeet/MachineLearning/Ionosphere/ionosphere_data.csv')
ionosphere.rename(columns={'column_ai':'target'}, inplace=True)
# print(ionosphere)

wine_X = wine[i]
breast_cancer_X = breast_cancer[b]
ionosphere_X = ionosphere.drop(['target'], axis=1)
ionosphere_Y = ionosphere.target

In [3]:
print(wine_X.shape)
print(np.array(wine_X).ravel().shape)
print(wine_Y.shape)

(178, 13)
(2314,)
(178,)


## Hyperparameter tuning for Wine dataset

In [4]:
from hmmlearn.hmm import GMMHMM
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from itertools import product
import numpy as np

state_range_values = [3]
algorithm_values = ['viterbi', 'map']  # Choose the appropriate algorithm
random_state_values = [None]  # Set to None or an integer value for reproducibility
n_iter_values = [150, 200, 300, 500]  # Adjust the number of iterations as needed
tol_values = [1e-3, 1e-4, 1e-5, 1e-6]  # Tolerance threshold for convergence
covariance_type_values = ['spherical']  # Adjust covariance types
init_params_values = ['s', 'm', 'sm']      # Vary initialization parameters
implementation_values = ['log','scaling'] #Choose the forward-backward algorithm
test_size_values = [0.1,0.2,0.3,0.4]


best_model = None
best_score = float('-inf')
best_train_test_ratio = 0.0
best_param_list = []
best_pred_y = 0
best_test_y = 0

for num_of_states,algo_type,random_state,n_iter,covariance_type,tol,test_size,init_params,implementation in product(state_range_values,algorithm_values, random_state_values, n_iter_values, covariance_type_values,tol_values,test_size_values, init_params_values,implementation_values):

    train_X, test_X, train_y, test_y = train_test_split(wine_X,wine_Y,random_state=None,test_size=test_size)
    params = set(np.array(wine_X).ravel())
    model = GMMHMM(n_components=num_of_states,algorithm=algo_type,random_state=random_state,n_iter=n_iter, covariance_type=covariance_type, tol=tol,params=params,implementation=implementation)
    model.fit(train_X)
    pred_y = model.predict(test_y)
    score = accuracy_score(test_y,pred_y)

    if score > best_score:
        best_score = score
        best_train_test_ratio = test_size
        best_model = model
        best_param_list = [num_of_states,algo_type,random_state,n_iter,covariance_type,tol]
        best_pred_y = pred_y
        best_test_y = test_y

KeyError: 0.25

In [5]:
cm = confusion_matrix(best_test_y, best_pred_y)
print("Best Parameter Values: \n")
print(best_param_list)
print("\n")
print("Best Ratio:" + str(best_train_test_ratio))
print("\n")
print(classification_report(best_test_y,best_pred_y))
print("Confusion Matrix: \n")
print(cm)
print("\n")

fig, ax = plt.subplots(figsize=(8, 6))
cm_display = (ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0,1,2]))
cm_display.plot(ax=ax, cmap='pink_r', values_format='d')
  # Add a title and labels
ax.set_title('Confusion Matrix for best model in wine dataset classification using GMMHMM')
ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')

plt.tight_layout()
  # Show the Confusion Matrix
plt.show()

TypeError: Expected sequence or array-like, got <class 'int'>

## Hyperparameter tuning for Breast Cancer dataset

In [None]:
state_range_values = [2]
algorithm_values = ['viterbi', 'map']  # Choose the appropriate algorithm
random_state_values = [None]  # Set to None or an integer value for reproducibility
n_iter_values = [150, 200, 300, 500]  # Adjust the number of iterations as needed
tol_values = [1e-3, 1e-4, 1e-5, 1e-6]  # Tolerance threshold for convergence
covariance_type_values = ['spherical']  # Adjust covariance types
init_params_values = ['s', 'm', 'sm']      # Vary initialization parameters
implementation_values = ['log','scaling'] #Choose the forward-backward algorithm
test_size_values = [0.1,0.2,0.3,0.4]


best_model = None
best_score = float('-inf')
best_train_test_ratio = 0.0
best_param_list = []
best_pred_y = 0
best_test_y = 0

for num_of_states,algo_type,random_state,n_iter,covariance_type,tol,test_size,init_params,implementation in product(state_range_values,algorithm_values, random_state_values, n_iter_values, covariance_type_values,tol_values,test_size_values, init_params_values,implementation_values):

    train_X, test_X, train_y, test_y = train_test_split(wine_X,wine_Y,random_state=None,test_size=test_size)
    params = set(np.array(train_X).ravel())
    model = GMMHMM(n_components=num_of_states,algorithm=algo_type,random_state=random_state,n_iter=n_iter, covariance_type=covariance_type, tol=tol,params=params,implementation=implementation)
    model.fit(train_X)
    pred_y = model.predict(test_y)
    score = accuracy_score(test_y,pred_y)

    if score > best_score:
        best_score = score
        best_train_test_ratio = test_size
        best_model = model
        best_param_list = [num_of_states,algo_type,random_state,n_iter,covariance_type,tol]
        best_pred_y = pred_y
        best_test_y = test_y