In [None]:
import scipy.io as scipy
from scipy import stats
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score, make_scorer
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import SVC
import pickle
import time
import pandas as pd
import subprocess
import joblib
import matplotlib.pyplot as plt
import json

In [None]:
def run_bash_command(command):
    try:
        output = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
        print("Output:", output.decode('utf-8'))
    except subprocess.CalledProcessError as e:
        print("Error:", e.output.decode('utf-8'))
        return None

In [None]:
class EvaluateSVM(object):

  SOURCEPATH = '..'
  OFFSET = 1e-64
  MAXITER = 1000
  RANDOMSTATE = 42

  KERNEL = 'poly'
  GAMMA = 10
  DEGREE = 3
  COEF0 = 10
  C = 1


  def __init__(self, method, dataset):
    self.features = None
    self.labels = None
    self.train_indices = []
    self.test_indices = []
    self.model = None
    self.method = method
    self.dataset = dataset

  def load_features(self):
    features_path = f"{self.SOURCEPATH}/data/gmm/{self.dataset.replace('.npy','')}"
    self.features = np.load(f'{features_path}/input_features.npy')
    self.labels = np.load(f'{features_path}/labels.npy')
    print('"get_features" function - features and labels read with success')
    print('\n')

  def partition_data(self):
    # Initialize the StratifiedShuffleSplit object
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

    # Get the train and test indices for the split
    train_index, test_index = next(sss.split(self.features, self.labels))

    # Assign the splits to the respective variables
    self.X_train, self.X_test = self.features[train_index], self.features[test_index]
    self.y_train, self.y_test = self.labels[train_index], self.labels[test_index]

  def load_model(self):

    try:
      model_path = f"{self.SOURCEPATH}/models/single_dataset/{self.dataset.replace('.npy','')}/svm"
      self.model = joblib.load(f'{model_path}/svm_model.pkl')
      print(f'Model loaded from {model_path}')
    except:
      print("Model Not Found!")
      print('Training Model!')
      self.train_model()

  def train_model(self):
    # Train the SVM model
    self.model = SVC(
        kernel=self.KERNEL,
        C=self.C,
        gamma=self.GAMMA,
        degree=self.DEGREE,
        coef0=self.COEF0
    )
    self.model.fit(self.X_train, self.y_train)

    # Save the model
    model_path = f"{self.SOURCEPATH}/models/single_dataset/{self.dataset.replace('.npy','')}/svm"
    try:
        run_bash_command(f"mkdir -p {model_path}")
    except:
        print("Folder already exists!")

    print(f"Saving model on {model_path}")
    joblib.dump(self.model, f'{model_path}/svm_model.pkl')

  def calc_metrics(self, metrics_features, metrics_labels):
     # Predict the training labels
    predictions = self.model.predict(metrics_features)

    # Calculate and display metrics
    accuracy = accuracy_score(metrics_labels, predictions)
    precision = precision_score(metrics_labels, predictions, average=self.method)
    recall = recall_score(metrics_labels, predictions, average=self.method)
    f1 = f1_score(metrics_labels, predictions, average=self.method)
    balanced_accuracy = balanced_accuracy_score(metrics_labels, predictions)

    metrics_dict = {"Accuracy":accuracy,
                 "Precision":precision,
                 "Recall":recall,
                 "F1-Score":f1,
                 "Balanced Accuracy":balanced_accuracy
                 }
    # Compute and plot the confusion matrix
    cm = confusion_matrix(metrics_labels, predictions)

    return metrics_dict, cm

  def save_metrics(self):
    train_metrics, train_cm = self.calc_metrics(self.X_train, self.y_train)
    test_metrics, test_cm = self.calc_metrics(self.X_test, self.y_test)

    disp = ConfusionMatrixDisplay(confusion_matrix=train_cm)
    disp.plot(cmap=plt.cm.Blues)
    plt.title("Confusion Matrix for Training Phase")
    plt.show()

    disp = ConfusionMatrixDisplay(confusion_matrix=test_cm)
    disp.plot(cmap=plt.cm.Blues)
    plt.title("Confusion Matrix for Testing Phase")
    plt.show()

    print(train_metrics)

    print(test_metrics)

    metrics_path = f"{self.SOURCEPATH}/metrics/single_dataset/{self.dataset.replace('.npy','')}"

    try:
      run_bash_command(f"mkdir -p {metrics_path}")
    except:
      print("Folder already exist!")

    np.save(f"{metrics_path}/svm_train_cm.npy", train_cm)
    np.save(f"{metrics_path}/svm_X_train.npy", self.X_train)
    np.save(f"{metrics_path}/svm_test_cm.npy", test_cm)
    np.save(f"{metrics_path}/svm_X_test.npy", self.X_test)
    np.save(f"{metrics_path}/svm_y_train.npy", self.y_train)
    np.save(f"{metrics_path}/svm_y_test.npy", self.y_test)

    with open(f"{metrics_path}/svm_metrics_train.json", "w") as outfile:
        json.dump(train_metrics, outfile, indent=4)

    with open(f"{metrics_path}/svm_metrics_test.json", "w") as outfile:
        json.dump(test_metrics, outfile, indent=4)

  def evaluate_model(self):
    self.load_features()
    self.partition_data()
    self.load_model()
    self.save_metrics()

In [None]:
method='weighted'
dataset = 'window_4000_overlap_1000_hzdr_norm.npy'

EVAL = EvaluateSVM(method, dataset)
EVAL.evaluate_model()

In [None]:
dataset = 'window_4000_overlap_1000_tud_norm.npy'

EVAL = EvaluateSVM(method, dataset)
EVAL.evaluate_model()