In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from hmmlearn.hmm import GaussianHMM
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
data_path = "../data/preprocessed_gnss_data.csv"
# df = pd.read_csv(data_path, sep=";").head(1500)
df = pd.read_csv(data_path, sep=";")

# Select numeric columns
numeric_columns = ['AGC', 'SNR', 'latitude', 'longitude', 'height', 'num_satellites']
X = df[numeric_columns]
y = df['class']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # Map 'legitimate' -> 0, 'jammed' -> 1

# Handle missing values
X.fillna(0, inplace=True)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded)

# Separate sequences by class for HMM training
train_sequences = {label: [] for label in np.unique(y_train)}

# Group data by class for training HMMs
for label in np.unique(y_train):
    train_sequences[label] = X_train[y_train == label]

# Train a Gaussian HMM for each class
hmm_models = {}
for label in np.unique(y_train):
    model = GaussianHMM(n_components=3, covariance_type="diag", n_iter=100, random_state=42)
    model.fit(train_sequences[label])
    hmm_models[label] = model
    print(f"HMM trained for class {label}: {label_encoder.inverse_transform([label])[0]}")

# Predict the class of test sequences
def predict_hmm(models, X):
    predictions = []
    for sample in X:
        log_likelihoods = {label: models[label].score(sample.reshape(-1, 1)) for label in models}
        predicted_label = max(log_likelihoods, key=log_likelihoods.get)
        predictions.append(predicted_label)
    return np.array(predictions)

# Predict on the test set
y_pred = predict_hmm(hmm_models, X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.fillna(0, inplace=True)


HMM trained for class 0: jammed


Model is not converging.  Current: 2020.4263088527498 is not greater than 2020.4641495873902. Delta is -0.03784073464044013


HMM trained for class 1: legitimate
Classification Report:
              precision    recall  f1-score   support

      jammed       0.24      1.00      0.39       327
  legitimate       0.00      0.00      0.00      1011

    accuracy                           0.24      1338
   macro avg       0.12      0.50      0.20      1338
weighted avg       0.06      0.24      0.10      1338

Confusion Matrix:
[[ 327    0]
 [1011    0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
