## Results of classification

In [1]:
# Given values for TP, TN, FP, FN
TN = 0  # True Negatives
FP = 167    # False Positives
FN = 0    # False Negatives
TP = 433  # True Positives

# Calculate Accuracy
accuracy = (TP + TN) / (TP + TN + FP + FN)

# Calculate Precision
precision = TP / (TP + FP) if (TP + FP) != 0 else 0

# Calculate Recall
recall = TP / (TP + FN) if (TP + FN) != 0 else 0

# Calculate F1-Score
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

# Print the results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1_score:.2f}")


Accuracy: 0.72
Precision: 0.72
Recall: 1.00
F1-Score: 0.84


In [2]:
# Calculating the averages for Accuracy, Precision, Recall, and F1-Score

# Input data
data = {
    "Accuracy": [0.72, 0.72, 0.88, 0.72, 1.00, 0.72, 1.00, 0.92, 1.00, 1.00, 1.00, 1.00],
    "Precision": [0.72, 0.72, 0.86, 0.72, 1.00, 0.72, 1.00, 0.91, 1.00, 1.00, 1.00, 1.00],
    "Recall": [1.00, 1.00, 1.00, 1.00, 1.00, 0.99, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00],
    "F1-Score": [0.84, 0.84, 0.93, 0.84, 1.00, 0.84, 1.00, 0.95, 1.00, 1.00, 1.00, 1.00]
}

# Compute averages
averages = {metric: sum(values) / len(values) for metric, values in data.items()}
averages


{'Accuracy': 0.89,
 'Precision': 0.8874999999999998,
 'Recall': 0.9991666666666666,
 'F1-Score': 0.9366666666666665}

## Analysis of classification results

In [5]:
import os
import numpy as np
import matplotlib.pyplot as plt
from src.utils import save_figure

In [14]:
# Define datasets
dataset1 = 'simulated_data/DS_20_10_10'
dataset2 = 'simulated_data/DS_20_80_10'

# Load the datasets
X1 = np.load(os.path.join(dataset1, "X.npy"))
X2 = np.load(os.path.join(dataset2, "X.npy"))

# Plot the first sample from both datasets for comparison
plt.figure(figsize=(12, 6))
plt.plot(X1[2005], label='White SNR = 20 and ME SNR = 10', alpha=0.7)
plt.plot(X2[2005], label='White SNR = 20 and ME SNR = 80', alpha=0.7)

plt.title("Comparison of sample with activity from two datasets")
plt.xlabel("Time Index")
plt.ylabel("Voltage (uV)")
plt.xlim(0, 1000)
plt.ylim(-1000, 1000)
plt.legend(loc = 'upper right' )
plt.grid(True)
save_figure(name='Classification_act', figdir='./plots', width=3, height=2) 


In [13]:
# Plot the first sample from both datasets for comparison
plt.figure(figsize=(12, 6))
plt.plot(X1[5], label='White SNR = 20 and ME SNR = 10', alpha=0.7)
plt.plot(X2[5], label='White SNR = 20 and ME SNR = 80', alpha=0.7)

plt.title("Comparison of sample without activity from two datasets")
plt.xlabel("Time Index")
plt.ylabel("Voltage (uV)")
plt.xlim(0, 1000)
plt.ylim(-1000, 1000)
plt.legend(loc='upper right')
plt.grid(True)
save_figure(name='Classification_no_act', figdir='./plots', width=3, height=2) 

In [12]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import r2_score, root_mean_squared_error, accuracy_score, mean_absolute_percentage_error, recall_score, precision_score, f1_score
from sklearn.model_selection import train_test_split

dataset1 = 'simulated_data/DS_20_10_10'

classifier = RandomForestClassifier()
X = np.load(os.path.join(dataset1, "X.npy"))
y_reg = np.load(os.path.join(dataset1, "y_reg.npy"))
y_class = [0 if value < 5 else 1 for value in y_reg]

X_train, X_temp, y_class_train, y_class_temp, y_reg_train, y_reg_temp = train_test_split(
X, y_class, y_reg, test_size=0.3, random_state=42)
X_val, X_test, y_class_val, y_class_test, y_reg_val, y_reg_test = train_test_split(
X_temp, y_class_temp, y_reg_temp, test_size=0.5, random_state=42)

classifier.fit(X_train, np.array(y_class_train))

y_class_train_pred = classifier.predict(X_train)
y_class_val_pred = classifier.predict(X_val)
y_class_test_pred = classifier.predict(X_test)

accuracy_train = accuracy_score(y_class_train, y_class_train_pred)
accuracy_test = accuracy_score(y_class_test, y_class_test_pred)

print("Train and test Classifier Accuracy:", accuracy_train, accuracy_test)




Train and test Classifier Accuracy: 1.0 1.0


In [9]:
# Get probabilities for each class (for example, class 1)
y_class_prob = classifier.predict_proba(X_test)[:, 1]  # Class 1 probability

# Verify if the classifier outputs probabilities
print(f"Predicted probabilities for class 1: {y_class_prob[:10]}")


Predicted probabilities for class 1: [0.84 0.96 0.75 0.85 0.97 0.43 0.72 0.8  0.78 0.77]


In [18]:
# Get feature importance
feature_importances = classifier.feature_importances_

# Plot feature importance
plt.figure(figsize=(12, 6))
plt.plot(feature_importances, label="Feature Importance", color = 'magenta')
plt.title("DS: White SNR = 20 ME SNR = 80 - Feature Importance")
plt.xlabel("Time Point Index")
plt.ylabel("Importance")
plt.legend()
# plt.grid(True)
save_figure(name='Classificaton_FI_DS_80_10', figdir='./plots', width=3, height=2) 


In [29]:
plt.figure(figsize=(12, 6))
plt.plot(feature_importances*15000, label="Feature Importance", color = 'magenta',alpha=1)
plt.title("Comparison of feature importance and sample without activity")
plt.xlabel("Time Point Index")
plt.ylabel("Importance")
plt.plot(X1[5], label='DS: White SNR = 20 and ME SNR = 10', alpha=0.5)
plt.legend()
save_figure(name='FI_overlay', figdir='./plots', height=4, width=6)

## Plots of simulated data

In [9]:
# Define datasets
dataset1 = 'simulated_data/DS_-10_10_10'
dataset2 = 'simulated_data/DS_-10_80_10'
dataset3 = 'simulated_data/DS_0_10_10'
dataset4 = 'simulated_data/DS_0_80_10'
dataset5 = 'simulated_data/DS_10_10_10'
dataset6 = 'simulated_data/DS_10_80_10'
dataset7 = 'simulated_data/DS_20_10_10'
dataset8 = 'simulated_data/DS_20_80_10'
dataset9 = 'simulated_data/DS_50_10_10'
dataset10 = 'simulated_data/DS_50_80_10'
dataset11 = 'simulated_data/DS_80_10_10'
dataset12 = 'simulated_data/DS_80_80_10'

# Load the datasets
X1 = np.load(os.path.join(dataset1, "X.npy"))
X2 = np.load(os.path.join(dataset2, "X.npy"))
X3 = np.load(os.path.join(dataset3, "X.npy"))
X4 = np.load(os.path.join(dataset4, "X.npy"))
X5 = np.load(os.path.join(dataset5, "X.npy"))
X6 = np.load(os.path.join(dataset6, "X.npy"))
X7 = np.load(os.path.join(dataset7, "X.npy"))
X8 = np.load(os.path.join(dataset8, "X.npy"))
X9 = np.load(os.path.join(dataset9, "X.npy"))
X10 = np.load(os.path.join(dataset10, "X.npy"))
X11 = np.load(os.path.join(dataset11, "X.npy"))
X12 = np.load(os.path.join(dataset12, "X.npy"))

In [22]:
# Create a 3x2 figure layout
fig, axs = plt.subplots(3, 2, figsize=(12, 9),constrained_layout=True)

# Plot the first sample from both datasets for comparison
# You can use different axes (axs[row, col]) for each subplot

axs[0, 0].plot(X1[2005], label='White SNR = -10 and ME SNR = 10', alpha=0.7)
axs[0, 0].plot(X2[2005], label='White SNR = -10 and ME SNR = 80', alpha=0.7)
axs[0, 0].set_xlabel("Time Index")
axs[0, 0].set_ylabel("Voltage (uV)")
axs[0, 0].set_xlim(0, 2700)
axs[0, 0].set_ylim(-5000, 5000)
axs[0, 0].legend(loc='upper right')
axs[0, 0].grid(True)

axs[0, 1].plot(X3[2005], label='White SNR = 0 and ME SNR = 10', alpha=0.7)
axs[0, 1].plot(X4[2005], label='White SNR = 0 and ME SNR = 80', alpha=0.7)
axs[0, 1].set_xlabel("Time Index")
axs[0, 1].set_ylabel("Voltage (uV)")
# axs[0, 1].set_xlim(0, 2700)
axs[0, 1].set_ylim(-5000, 5000)
axs[0, 1].legend(loc='upper right')
axs[0, 1].grid(True)

axs[1, 0].plot(X5[2005], label='White SNR = 10 and ME SNR = 10', alpha=0.7)
axs[1, 0].plot(X6[2005], label='White SNR = 10 and ME SNR = 80', alpha=0.7)
axs[1, 0].set_xlabel("Time Index")
axs[1, 0].set_ylabel("Voltage (uV)")
# axs[1, 0].set_xlim(0, 2700)
axs[1, 0].set_ylim(-5000, 5000)
axs[1, 0].legend(loc='upper right')
axs[1, 0].grid(True)

axs[1, 1].plot(X7[2005], label='White SNR = 20 and ME SNR = 10', alpha=0.7)
axs[1, 1].plot(X8[2005], label='White SNR = 20 and ME SNR = 80', alpha=0.7)
axs[1, 1].set_xlabel("Time Index")
axs[1, 1].set_ylabel("Voltage (uV)")
# axs[1, 1].set_xlim(0, 2700)
axs[1, 1].set_ylim(-5000, 5000)
axs[1, 1].legend(loc='upper right')
axs[1, 1].grid(True)

axs[2, 0].plot(X9[2005], label='White SNR = 50 and ME SNR = 10', alpha=0.7)
axs[2, 0].plot(X10[2005], label='White SNR = 50 and ME SNR = 80', alpha=0.7)
axs[2, 0].set_xlabel("Time Index")
axs[2, 0].set_ylabel("Voltage (uV)")
# axs[2, 0].set_xlim(0, 2700)
axs[2, 0].set_ylim(-5000, 5000)
axs[2, 0].legend(loc='upper right')
axs[2, 0].grid(True)

axs[2, 1].plot(X11[2005], label='White SNR = 80 and ME SNR = 10', alpha=0.7)
axs[2, 1].plot(X12[2005], label='White SNR = 80 and ME SNR = 80', alpha=0.7)
axs[2, 1].set_xlabel("Time Index")
axs[2, 1].set_ylabel("Voltage (uV)")
#axs[2, 1].set_xlim(0, 2700)
axs[2, 1].set_ylim(-5000, 5000)
axs[2, 1].legend(loc='upper right')
axs[2, 1].grid(True)

plt.suptitle("Comparison of Simulated Datasets with varying White and ME SNRs")

# Show the plot
save_figure(name='sim_data_plot', figdir='./plots', width=6, height=5) 
