# Random Forrest

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import torch
from scipy.signal import iirfilter, filtfilt
#from data_analyzer import DataAnalyzer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import dill
import joblib

"""
Date: 2024-06-20 15:47:56,,,,,,
Calibration: XL_ODR: 6667Hz, XL_FS: 16g, GY_ODR: 6667Hz, GY_FS: 2000dps,,,
Time(s),Acceleration X (g),Acceleration Y (g),Acceleration Z (g),Angular Momentum X (dps),Angular Momentum Y (dps),Angular Momentum Z (dps)
0.000083,0.693359,-0.720215,0.14209,-3.540039,0.366211,-3.051758
0.00098,0.70752,-0.755371,0.086914,1.953125,0.732422,-1.159668
"""

#dill.load_session('rf_env.db')

class TimeSeriesDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels, seq_len, step):
        self.data = data
        self.labels = labels
        self.seq_len = seq_len
        self.step = step

    def __len__(self):
        return len(self.data) - (self.seq_len - 1) * self.step

    def __getitem__(self, idx):
        indices = [idx + i * self.step for i in range(self.seq_len)]
        x_seq = self.data[indices]
        y_val = self.labels[indices[-1]]
        return x_seq, y_val

In [None]:
# Load training data
train_data = pd.read_csv(r"../measurement/processed_data/train_wo_around.csv", header=2)
X_train = train_data[["Time(s)", "Acceleration X (g)", "Acceleration Y (g)", "Acceleration Z (g)", "Angular Momentum X (dps)", "Angular Momentum Y (dps)", "Angular Momentum Z (dps)"]]
y_train = train_data["Stand detected"]

sampling_frequency = 1 / (train_data["Time(s)"].diff().mean())
cutoff_frequency = 100

# Apply IIR filter to training data (excluding time series)
b, a = iirfilter(4, Wn=cutoff_frequency, fs=sampling_frequency, btype="low", ftype="butter")
X_train_filtered = X_train.copy()
X_train_filtered.iloc[:, 1:] = filtfilt(b, a, X_train.iloc[:, 1:], axis=0)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_filtered.iloc[:, 1:])

# Increase the weight of specific features
feature_weights = np.array([1, 1, 1, 1, 1, 1])  # Higher weights for Acc X, Acc Y, and Angular Z
X_train_weighted = X_train_scaled * feature_weights

# Convert data to float32
X_train_weighted = X_train_weighted.astype(np.float32)

sequence_length = 10
step = 10
test_dataset = TimeSeriesDataset(X_train_weighted, y_train, sequence_length, step)

X_seq_all = []
y_seq_all = []

In [None]:
for i in range(len(test_dataset)):
    x_seq, y_val = test_dataset[i]
    X_seq_all.append(x_seq.flatten())  
    y_seq_all.append(y_val.item())

# Build RandomForest model
model = RandomForestClassifier(n_estimators=11, random_state=24)
model.fit(X_seq_all, y_seq_all)

joblib.dump(model, 'model/random_forest_model.pkl')

In [None]:
model = joblib.load('model/random_forest_model.pkl')

# Load test data
test_data = pd.read_csv(r"../measurement/processed_data/moving/backward.csv", header=2)

test_data["Time(s)"] = test_data["Time(s)"] - test_data.iloc[0]["Time(s)"]
test_data = test_data[test_data["Time(s)"] < 50]
test_data["Time(s)"] = test_data["Time(s)"] - test_data.iloc[0]["Time(s)"]

X_test = test_data[["Time(s)", "Acceleration X (g)", "Acceleration Y (g)", "Acceleration Z (g)", "Angular Momentum X (dps)", "Angular Momentum Y (dps)", "Angular Momentum Z (dps)"]]

# Apply IIR filter to test data (excluding time series)
X_test_filtered = X_test.copy()
X_test_filtered.iloc[:, 1:] = filtfilt(b, a, X_test.iloc[:, 1:], axis=0)

# Standardize the test data
X_test_scaled = scaler.transform(X_test_filtered.iloc[:, 1:])

test_dataset = TimeSeriesDataset(X_test_scaled, torch.zeros(len(X_test_scaled)), sequence_length, step)

X_test_all = []

for i in range(len(test_dataset)):
    x_seq, y_val = test_dataset[i]
    X_test_all.append(x_seq.flatten())  

predictions = model.predict(X_test_all)

pred_array = np.full(len(test_data), np.nan)
for i, pred in enumerate(predictions):
    index = i + (sequence_length - 1) * step
    if index < len(pred_array):
        pred_array[index] = pred

test_data["predicted_heel_button"] = pred_array

In [None]:
# Plot Angular Momentum Z axis (filtered and scaled)
fig = go.Figure()
# [:, 5]
fig.add_trace(go.Scatter(x=test_data["Time(s)"], y=X_test_scaled[:, 0], mode="lines", name="AC X (Filtered & Scaled)"))
fig.add_trace(go.Scatter(x=test_data["Time(s)"], y=X_test_scaled[:, 1], mode="lines", name="AC Y (Filtered & Scaled)"))
fig.add_trace(go.Scatter(x=test_data["Time(s)"], y=X_test_scaled[:, 2], mode="lines", name="AC Z (Filtered & Scaled)"))
fig.add_trace(go.Scatter(x=test_data["Time(s)"], y=X_test_scaled[:, 3], mode="lines", name="AM X (Filtered & Scaled)"))
fig.add_trace(go.Scatter(x=test_data["Time(s)"], y=X_test_scaled[:, 4], mode="lines", name="AM Y (Filtered & Scaled)"))
fig.add_trace(go.Scatter(x=test_data["Time(s)"], y=X_test_scaled[:, 5], mode="lines", name="AM Z (Filtered & Scaled)"))
fig.add_trace(go.Scatter(x=test_data["Time(s)"], y=test_data["Heel Button"], mode="lines", name="Heel Button"))
fig.add_trace(go.Scatter(x=test_data["Time(s)"], y=test_data["Stand detected"], mode="lines", name="Analytical"))
fig.add_trace(go.Scatter(x=test_data["Time(s)"], y=test_data["predicted_heel_button"], mode="lines", name="Predicted Heel Button"))

fig.show()

In [None]:
test_data["Time(s)"] = test_data["Time(s)"] - test_data.iloc[0]["Time(s)"]
#test_data.to_csv("../measurement/processed_data/comparison/rf_backward.csv")

test_data = test_data[test_data["Time(s)"] > 0.3]
test_data["Time(s)"] = test_data["Time(s)"] - test_data.iloc[0]["Time(s)"]

equal_values = (test_data["predicted_heel_button"] == test_data["Stand detected"]).sum()
print(f"{equal_values} out of {test_data.shape[0]} entries are equal, which is {equal_values / test_data.shape[0] * 100:.2f}%")

def count_artifacts(pred_signal, time_vector, threshold_0=0.05, threshold_1=0.15):
    """
    Counts artifacts in a binary step signal based on duration thresholds.
    
    Args:
        pred_signal (np.array): Array of predicted values (0 or 1).
        time_vector (np.array): Array of corresponding time values.
        threshold_0 (float): Minimum duration in seconds for a valid 0 segment.
        threshold_1 (float): Minimum duration in seconds for a valid 1 segment.
        
    Returns:
        Tuple (count_zero, count_one): Number of artifact segments for 0 and 1.
    """
    n = len(pred_signal)
    count_zero = 0
    count_one = 0
    start_idx = 0

    while start_idx < n:
        current = pred_signal[start_idx]
        end_idx = start_idx + 1
        while end_idx < n and pred_signal[end_idx] == current:
            end_idx += 1

        # Compute segment duration
        duration = time_vector[end_idx - 1] - time_vector[start_idx]

        if current == 0 and duration < threshold_0:
            count_zero += 1
        elif current == 1 and duration < threshold_1:
            count_one += 1

        start_idx = end_idx

    return count_zero, count_one

count_zero, count_one = count_artifacts(test_data["predicted_heel_button"].values, test_data["Time(s)"].values)
print(f"Number of short 0 segments (artifacts): {count_zero}")
print(f"Number of short 1 segments (artifacts): {count_one}")

print(f" {equal_values / test_data.shape[0] * 100:.2f}; {count_zero}; {count_one};")