In [31]:
from Autolabeler import workout
import pandas as pd
import os
import warnings
import numpy as np
import joblib 
from pycaret.classification import *

# Load Test Data

In [72]:
folder_path = "LABELED"
os.makedirs(folder_path, exist_ok=True)
file_names = []
for name in os.listdir(folder_path):
    file_path = f"{folder_path}/{name}"
    file_names.append(file_path)
#Ahora tengo que especificar mis features 
signals = ["accX", "accY", "accZ", "gyroX", "gyroY", "gyroZ", "magnX", "magnY", "magnZ", "linAccX", "linAccY", "linAccZ"]
data = workout(filelist=file_names[3:4], signals= signals)
df= pd.concat(data)

In [73]:
df.shape

(6960, 31)

In [74]:
X = df.drop(['peaks', 'first_sample','last_sample', 'first_sample_closest_peak', 
             'last_sample_closest_peak','exercise','exercising_periods'], axis=1)
X.columns

Index(['accX', 'accY', 'accZ', 'gyroX', 'gyroY', 'gyroZ', 'magnX', 'magnY',
       'magnZ', 'linAccX', 'linAccY', 'linAccZ', 'accX_mod', 'accY_mod',
       'accZ_mod', 'gyroX_mod', 'gyroY_mod', 'gyroZ_mod', 'magnX_mod',
       'magnY_mod', 'magnZ_mod', 'linAccX_mod', 'linAccY_mod', 'linAccZ_mod'],
      dtype='object')

# Extract SQUAT TIME RANGE

In [75]:
np.unique(df.exercising_periods, return_counts=True)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25]),
 array([4459,   97,  100,  108,  105,   99,  100,   97,  100,   99,   98,
         100,   95,   98,   97,  101,  103,  106,  100,  101,   99,   98,
         103,   93,  103,  101], dtype=int64))

In [53]:
def extract_squat_time_range(df_workout):
    # Identify start and end times
    squats_detection = pd.concat([df_workout[ df_workout.first_sample == True ], df_workout[ df_workout.last_sample == True ]]).sort_index()
    
    start_times = squats_detection[squats_detection['first_sample']].index
    end_times = squats_detection[squats_detection['last_sample']].index
    
    # Assuming each start is followed by an end, pair them
    paired_times = list(zip(start_times, end_times))
    
    # Create new DataFrame
    return pd.DataFrame(paired_times, columns=['start', 'end'])

In [76]:
extract_squat_time_range(df)

Unnamed: 0,start,end
0,1970-01-01 00:00:00.380,1970-01-01 00:00:01.293
1,1970-01-01 00:00:02.945,1970-01-01 00:00:03.889
2,1970-01-01 00:00:05.487,1970-01-01 00:00:06.503
3,1970-01-01 00:00:08.262,1970-01-01 00:00:09.252
4,1970-01-01 00:00:10.984,1970-01-01 00:00:11.915
5,1970-01-01 00:00:13.504,1970-01-01 00:00:14.444
6,1970-01-01 00:00:15.957,1970-01-01 00:00:16.870
7,1970-01-01 00:00:18.356,1970-01-01 00:00:19.296
8,1970-01-01 00:00:20.970,1970-01-01 00:00:21.901
9,1970-01-01 00:00:23.575,1970-01-01 00:00:24.501


# Auxiliary Functions

In [27]:
def prepare_data(chunk_df):
    signals_name = ["accX", "accY", "accZ", "gyroX", "gyroY", "gyroZ", "magnX", "magnY", "magnZ", "linAccX", "linAccY", "linAccZ"]
    stats_dict = {}
    series = chunk_df[signals_name]
    for column in series.columns:
        mean = series[column].mean()
        std = series[column].std()
        median = series[column].median()
    
        stats_dict[f"{column}_mean"] = mean
        stats_dict[f"{column}_std"] = std
        stats_dict[f"{column}_median"] = median
        
    return pd.DataFrame.from_dict(stats_dict, orient="index").T

In [28]:
def predict_pycaret(pipeline_ml, ventana_df):
    output = prepare_data(ventana_df)
    prediction_df = predict_model(pipeline_ml, output, raw_score=True)
    y_pred_value = prediction_df.prediction_label.iloc[0]
    output['start'] = ventana_df.index[0]
    output['end'] = ventana_df.index[-1]
    output['prediction'] = y_pred_value
    return output[['start', 'end', 'prediction']]

# Load Model

In [10]:
# Load the model
pipeline = load_model(model_name="squats_traditional")

Transformation Pipeline and Model Successfully Loaded


# Check Data

In [16]:
tamano_ventana = 170 
nuevo_df_predicciones = pd.DataFrame()

for i in range(0, len(X) - tamano_ventana + 1, tamano_ventana):
    warnings.filterwarnings("ignore")
    print(f"Ventana: {i} - {i + tamano_ventana}")
    ventana = X.iloc[i:i + tamano_ventana]
    
    X1 = predict_pycaret(pipeline, ventana)
    nuevo_df_predicciones = pd.concat([nuevo_df_predicciones, X1], ignore_index=True)

Ventana: 0 - 170
Ventana: 170 - 340
Ventana: 340 - 510
Ventana: 510 - 680
Ventana: 680 - 850
Ventana: 850 - 1020
Ventana: 1020 - 1190
Ventana: 1190 - 1360
Ventana: 1360 - 1530
Ventana: 1530 - 1700
Ventana: 1700 - 1870
Ventana: 1870 - 2040
Ventana: 2040 - 2210
Ventana: 2210 - 2380
Ventana: 2380 - 2550
Ventana: 2550 - 2720
Ventana: 2720 - 2890
Ventana: 2890 - 3060
Ventana: 3060 - 3230
Ventana: 3230 - 3400
Ventana: 3400 - 3570
Ventana: 3570 - 3740
Ventana: 3740 - 3910
Ventana: 3910 - 4080
Ventana: 4080 - 4250
Ventana: 4250 - 4420
Ventana: 4420 - 4590
Ventana: 4590 - 4760
Ventana: 4760 - 4930
Ventana: 4930 - 5100
Ventana: 5100 - 5270
Ventana: 5270 - 5440
Ventana: 5440 - 5610
Ventana: 5610 - 5780
Ventana: 5780 - 5950
Ventana: 5950 - 6120
Ventana: 6120 - 6290
Ventana: 6290 - 6460
Ventana: 6460 - 6630
Ventana: 6630 - 6800
Ventana: 6800 - 6970
Ventana: 6970 - 7140
Ventana: 7140 - 7310
Ventana: 7310 - 7480
Ventana: 7480 - 7650
Ventana: 7650 - 7820
Ventana: 7820 - 7990
Ventana: 7990 - 8160


In [17]:
nuevo_df_predicciones.shape

(48, 3)

In [18]:
np.unique(nuevo_df_predicciones.prediction,return_counts=True)

(array(['NO_EXERCISE', 'SQUAT'], dtype=object), array([45,  3], dtype=int64))

In [19]:
squats_detection = nuevo_df_predicciones[nuevo_df_predicciones.prediction == 'SQUAT'][['start', 'end', 'prediction']]
squats_detection

Unnamed: 0,start,end,prediction
5,1970-01-01 00:00:08.083,1970-01-01 00:00:09.690,SQUAT
7,1970-01-01 00:00:11.315,1970-01-01 00:00:12.922,SQUAT
12,1970-01-01 00:00:19.402,1970-01-01 00:00:21.009,SQUAT


In [20]:
squats_detection = nuevo_df_predicciones[nuevo_df_predicciones.prediction == ('NO_EXERCISE'
                                                                              ''
                                                                              '')][['start', 'end', 'prediction']]
squats_detection

Unnamed: 0,start,end,prediction
0,1970-01-01 00:00:00.000,1970-01-01 00:00:01.607,NO_EXERCISE
1,1970-01-01 00:00:01.616,1970-01-01 00:00:03.223,NO_EXERCISE
2,1970-01-01 00:00:03.232,1970-01-01 00:00:04.838,NO_EXERCISE
3,1970-01-01 00:00:04.847,1970-01-01 00:00:06.454,NO_EXERCISE
4,1970-01-01 00:00:06.467,1970-01-01 00:00:08.074,NO_EXERCISE
6,1970-01-01 00:00:09.699,1970-01-01 00:00:11.306,NO_EXERCISE
8,1970-01-01 00:00:12.935,1970-01-01 00:00:14.542,NO_EXERCISE
9,1970-01-01 00:00:14.551,1970-01-01 00:00:16.157,NO_EXERCISE
10,1970-01-01 00:00:16.166,1970-01-01 00:00:17.773,NO_EXERCISE
11,1970-01-01 00:00:17.782,1970-01-01 00:00:19.389,NO_EXERCISE


# Check Data Moving with XGBoost

In [55]:
def predict_xgboost(modelo_input, ventana_df):
    features = ['mean_accX', 'mean_accZ', 'mean_linAccZ', 'mean_accZ_mod',
       'mean_gyroX_mod', 'mean_linAccX_mod', 'std_accZ', 'std_gyroX', 'std_gyroZ',
       'std_magnX', 'std_linAccX', 'std_accZ_mod', 'std_linAccX_mod',
       'median_accX', 'median_accZ', 'median_linAccZ', 'median_accZ_mod',
       'median_gyroX_mod', 'median_linAccX_mod']
    promedio_serie = ventana_df.mean()
    std_serie = ventana_df.std()
    median_serie = ventana_df.median()
    nuevo_df = pd.DataFrame().append(pd.concat([promedio_serie.add_prefix('mean_'), std_serie.add_prefix('std_'), median_serie.add_prefix('median_')]), ignore_index=True)

    X1 = nuevo_df[features].copy()
    y_pred = modelo_input.predict(X1)
    X1['start'] = ventana_df.index[0]
    X1['end'] = ventana_df.index[-1]
    X1['prediction'] = y_pred[0]    
    return X1[['start', 'end', 'prediction']]

In [77]:
wing_size = 70
test_size = X.shape[0]
window_predictions = pd.DataFrame()
pos = wing_size
limit = test_size - wing_size
squats = 0
modelo = joblib.load('modeloXGB.pkl')
while pos <= limit:
    start = pos - wing_size
    end = pos + wing_size
    window = X.iloc[start:end]
    X1 = predict_xgboost(modelo, window)
    window_predictions = pd.concat([window_predictions, X1], ignore_index=True)
    if X1.prediction.iloc[0] == 1:
        pos += 2*wing_size
        squats += 1
    else:
        pos += 1
    print(f"Step {pos+1}/{test_size}. Squats: {squats}")

Step 72/6960. Squats: 0
Step 73/6960. Squats: 0
Step 74/6960. Squats: 0
Step 75/6960. Squats: 0
Step 76/6960. Squats: 0
Step 77/6960. Squats: 0
Step 78/6960. Squats: 0
Step 79/6960. Squats: 0
Step 80/6960. Squats: 0
Step 81/6960. Squats: 0
Step 82/6960. Squats: 0
Step 83/6960. Squats: 0
Step 84/6960. Squats: 0
Step 85/6960. Squats: 0
Step 86/6960. Squats: 0
Step 87/6960. Squats: 0
Step 88/6960. Squats: 0
Step 89/6960. Squats: 0
Step 90/6960. Squats: 0
Step 91/6960. Squats: 0
Step 92/6960. Squats: 0
Step 93/6960. Squats: 0
Step 94/6960. Squats: 0
Step 95/6960. Squats: 0
Step 96/6960. Squats: 0
Step 97/6960. Squats: 0
Step 98/6960. Squats: 0
Step 99/6960. Squats: 0
Step 100/6960. Squats: 0
Step 101/6960. Squats: 0
Step 102/6960. Squats: 0
Step 103/6960. Squats: 0
Step 104/6960. Squats: 0
Step 105/6960. Squats: 0
Step 106/6960. Squats: 0
Step 107/6960. Squats: 0
Step 108/6960. Squats: 0
Step 109/6960. Squats: 0
Step 110/6960. Squats: 0
Step 111/6960. Squats: 0
Step 112/6960. Squats: 0
Ste

In [78]:
window_predictions[window_predictions.prediction == 1]

Unnamed: 0,start,end,prediction
46,1970-01-01 00:00:00.434,1970-01-01 00:00:01.759,1
103,1970-01-01 00:00:02.300,1970-01-01 00:00:03.621,1
237,1970-01-01 00:00:04.896,1970-01-01 00:00:06.217,1
387,1970-01-01 00:00:07.645,1970-01-01 00:00:08.965,1
531,1970-01-01 00:00:10.335,1970-01-01 00:00:11.659,1
657,1970-01-01 00:00:12.859,1970-01-01 00:00:14.180,1
783,1970-01-01 00:00:15.379,1970-01-01 00:00:16.700,1
891,1970-01-01 00:00:17.729,1970-01-01 00:00:19.049,1
1027,1970-01-01 00:00:20.343,1970-01-01 00:00:21.664,1
1161,1970-01-01 00:00:22.939,1970-01-01 00:00:24.260,1
