# MSMDF Evaluation Notebook

In [1]:
from FingerprintExtractor import FingerprintExtractor, FingerprintConfig, FingerprintSetting, FingerprintSensor, FingerprintDataStream, FingerprintFeature
from ClassifierTrainer import ClassifierTrainer, EvaluationConfig, Classifiers, TrainingMethod, EvaluationMethod

In [None]:
# Configure fingerprint extraction
fingerprint_config = FingerprintConfig(
    data_location="../../../Data/raw data/separated by setting", 
    # Path to the directory containing the data. Saddly I can't provide the data as it is not mine to share. 
    # You have to provide your own dataset organized in the following way:
    # 1. The data should be separated by experimental settings (e.g. 'Setting 1', 'Setting 2', etc.)
    # 2. Each setting should contain a folder for each recording.
    # - Folder names should include the name of the device and an identifier for the recording (e.g. 'Device 1 - 1', 'Device 1 - 2', etc.) 
        
    fingerprint_length=5.0,
    sampling_rate=100,
    
    enabled_settings=set(FingerprintSetting),
    enabled_sensors=set(FingerprintSensor),
    enabled_streams=set(FingerprintDataStream),
    enabled_features=set(FingerprintFeature)
)

# Initialize extractor
extractor = FingerprintExtractor(fingerprint_config, log_level='INFO')

# Extract fingerprints
fingerprints = extractor.extract_fingerprints()

# Print extraction summary
num_total_devices, num_total_fingerprints = extractor.print_extraction_summary(fingerprints)
print("Fingerprint extraction complete.")

# Save extracted fingerprints
extractor.save_extracted_fingerprints(fingerprints, output_dir='../../Fingerprints', format='pickle')

Extracting fingerprints for setting 'on hand'...




Extracting fingerprints for setting 'on desk'...




Extracting fingerprints for setting 'on desk audio'...
Extracting fingerprints for setting 'on hand audio'...
Extracting fingerprints for setting 'walking'...


INFO - Insufficient samples for full fingerprint extraction due to walking recording. Using available data length 460 instead of required 500. [FingerprintExtractor.py:294]



Extraction Summary:
Total number of devices: 2
Total fingerprints extracted: 177

Setting: on hand
  Total fingerprints in setting: 36
  Number of devices in setting: 2
  All devices have the same number of fingerprints: 18
  All devices have the same fingerprint shape: (936,)
Setting: on desk
  Total fingerprints in setting: 36
  Number of devices in setting: 2
  All devices have the same number of fingerprints: 18
  All devices have the same fingerprint shape: (936,)
Setting: on desk audio
  Total fingerprints in setting: 36
  Number of devices in setting: 2
  All devices have the same number of fingerprints: 18
  All devices have the same fingerprint shape: (936,)
Setting: on hand audio
  Total fingerprints in setting: 35
  Number of devices in setting: 2
  Devices have different numbers of fingerprints:
    Device '2109119DG_cf845c21-9cc0-4562-8df4-bd63fc9c7a03': 17 fingerprints
    Device '22101316UG_ac0b3c9f-d699-446c-9f1b-bee63bd76cb3': 18 fingerprints
  All devices have the sa

In [8]:
# Configure classifier training and evaluation
classification_config = EvaluationConfig(        
    num_devices=100,
    training_set_ratio=0.8,
    known_unknown_ratio=1.0,
    
    cv_folds=5,
    random_state=42,
    
    classifiers=set(Classifiers)
)

# Initialize the trainer
trainer = ClassifierTrainer(classification_config, log_level='INFO')

# Load and preprocess the data
X_train, X_test, y_train, y_test, label_encoding = trainer.load_and_preprocess_data(
    data=fingerprints
)

# Train classifiers
trained_classifiers = trainer.train_classifiers(X_train, y_train, method=TrainingMethod.CLASSIC)

# Evaluate classifiers
evaluation_results = trainer.evaluate_classifiers(X_test, y_test, method=EvaluationMethod.CLASSIC)

# Print evaluation summary
trainer.print_evaluation_summary()

INFO - Training set shape: (141, 936) [ClassifierTrainer.py:360]
INFO - Test set shape: (36, 936) [ClassifierTrainer.py:361]
INFO - Number of classes: 2 [ClassifierTrainer.py:362]
INFO - Known devices: 2, Unknown devices: 0
 [ClassifierTrainer.py:363]
INFO - Training classifiers using 'classic' method... [ClassifierTrainer.py:384]
INFO - Training stochastic_gradient_descent ... [ClassifierTrainer.py:400]
INFO - Trained stochastic_gradient_descent successfully. [ClassifierTrainer.py:439]
INFO - Training k_nearest_neighbors ... [ClassifierTrainer.py:400]
INFO - Trained k_nearest_neighbors successfully. [ClassifierTrainer.py:439]
INFO - Training gaussian_naive_bayes ... [ClassifierTrainer.py:400]
INFO - Trained gaussian_naive_bayes successfully. [ClassifierTrainer.py:439]
INFO - Training xgboost ... [ClassifierTrainer.py:400]
ERROR - Failed to train xgboost: Invalid classes inferred from unique values of `y`.  Expected: [0 1], got ['2109119DG_cf845c21-9cc0-4562-8df4-bd63fc9c7a03'
 '221013


Evaluation Summary:
Number of Classifiers: 11
Number of Classes: 2

Classifier: stochastic_gradient_descent
--------------------------------------------------
Overall Metrics:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000
Per-Class Metrics:
  Class 2109119DG_cf845c21-9cc0-4562-8df4-bd63fc9c7a03:
    Precision: 1.0000
    Recall: 1.0000
    F1 Score: 1.0000
    Support: 18.0
  Class 22101316UG_ac0b3c9f-d699-446c-9f1b-bee63bd76cb3:
    Precision: 1.0000
    Recall: 1.0000
    F1 Score: 1.0000
    Support: 18.0
Confusion Matrix:
           Predicted 0 Predicted 1
  Actual 0         18        0
Actual 1          0       18
--------------------------------------------------

Classifier: k_nearest_neighbors
--------------------------------------------------
Overall Metrics:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000
Per-Class Metrics:
  Class 2109119DG_cf845c21-9cc0-4562-8df4-bd63fc9c7a03:
    Precision: 1.0000
    Recall: 1.0000
