# Importing Libraries

In [None]:
from fitizens_libraries.load_and_process_training_data import load_training_data
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pycaret.classification import *

# Creating Auxiliary Functions
### Min Max Paper: [Evaluating squat performance with a single inertial measurement unit](https://drive.google.com/file/d/1E2-rFIW66C2mi87BNUPmUFCgFs8VFoRs/view?usp=sharing)
### Descriptive Statistics Paper [Technology in Rehabilitation: Evaluating the Single Leg Squat Exercise with Wearable Inertial Measurement Units](https://drive.google.com/file/d/1OsCXUDf5jiAYZUt5Wwcr00iw7utEVx38/view)
### Traditional vs DeepLearning Paper: [Automatic Classification of Squat Posture Using Inertial Sensors: Deep Learning Approach](https://www.mdpi.com/1424-8220/20/2/361)

In [None]:
def create_custom_dataframe(series):
    df =  series[["linAccX", "linAccY", "linAccZ", "gyroX", "gyroY", "gyroZ", "magnX", "magnY", "magnZ"]]
    return df

In [None]:
def create_training_data_min_max(df, target):
    stats_dict = {}

    series = create_custom_dataframe(df)
    for column in series.columns:
        max_value = series[column].max()
        min_value = series[column].min()
        range_value = max_value - min_value
    
        stats_dict[f"{column}_max"] = max_value
        stats_dict[f"{column}_min"] = min_value
        stats_dict[f"{column}_range"] = range_value
        
    stats_dict["target"] = target
    return stats_dict

In [None]:
def create_training_data_mean_std_med(df, target):
    stats_dict = {}

    series = create_custom_dataframe(df)
    for column in series.columns:
        mean = series[column].mean()
        std = series[column].std()
        median = series[column].median()
    
        stats_dict[f"{column}_mean"] = mean
        stats_dict[f"{column}_std"] = std
        stats_dict[f"{column}_median"] = median
        
    stats_dict["target"] = target
    return stats_dict

# Loading Data

In [None]:
folder_path = "labeled"
os.makedirs(folder_path, exist_ok=True)
file_names = [f"{folder_path}/{name}" for name in os.listdir(folder_path)]
signals = ["accX", "accY", "accZ", "gyroX", "gyroY", "gyroZ", "magnX", "magnY", "magnZ", "linAccX", "linAccY", "linAccZ"]

data, wk = load_training_data(filelist=file_names,
                         signals= signals,
                          target_exercise="SQUAT", other_exercises=[], is_peak_minima=True)
data[0]

# Creating new data

In [None]:
data_info = [create_training_data_mean_std_med(info["series"], info["target"]) for info in data]
data_custom = pd.DataFrame(data_info)
data_custom.head()

# EDA

In [None]:
data_custom.shape

In [None]:
data_custom.isnull().sum()

In [None]:
data_custom.target.value_counts()

In [None]:
corr_matrix = data_custom.drop(columns=["target"],axis=1).corr()
corr_matrix

In [None]:
# Create a figure and a set of subplots
df_box = data_custom.drop(columns=["target"],axis=1)
fig, axes = plt.subplots(nrows=len(df_box.columns), ncols=1, figsize=(8, 4 * len(df_box.columns)))

# Create a box plot for each column
for i, col in enumerate(df_box.columns):
    sns.boxplot(x=df_box[col], ax=axes[i])

# Add a title (optional)
fig.suptitle('Box Plots', fontsize=16)

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

# ML with Pycaret

In [None]:
data_dev = data_custom.sample(frac=0.95, random_state=786)
data_prod = data_custom.drop(data_dev.index)

data_dev.reset_index(inplace=True, drop=True)
data_prod.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data_dev.shape))
print('Simulated data For Production ' + str(data_prod.shape))

In [None]:
model = setup(
    # Basic options
    data = data_dev,
    target = "target",
    train_size = 0.7,
    preprocess = True,
    
    # Dealing with multicollinearity
    remove_multicollinearity = True,
    multicollinearity_threshold = 0.9,
        
    # Feature normalization with outliers
    normalize = True,
    normalize_method = 'robust',
        
    # Paralellization options
    n_jobs = - 1,
    use_gpu = False,
    
    # Imbalance Dataset
    fix_imbalance=True,
    
    # Feature Importance
    feature_selection = True,
    n_features_to_select= 10
)

In [None]:
models = compare_models(sort="F1", fold=2)
models

In [None]:
cnt_models_df = pull()
cnt_models_df

In [None]:
clf = create_model('et', fold = 2)

In [None]:
tuned_clf = tune_model(clf, optimize = 'F1', fold = 2)

In [None]:
print("Total of features: ", len(tuned_clf.feature_importances_))

In [None]:
evaluate_model(tuned_clf)

# Save Model

In [None]:
# We finalize model (we train on the whole set)
model_final = finalize_model(tuned_clf)

In [None]:
# Export the PyCaret model
save_model(model_final, 'squats_traditional')

# Use in Production

In [None]:
# Load the model
pipeline = load_model(model_name="squats_traditional")

In [None]:
# Utilizar modelo
prediction = predict_model(pipeline, data_prod, raw_score=True)

In [None]:
prediction

# Scores

In [None]:
true_labels = prediction["target"]
predictions = prediction["prediction_label"]

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(true_labels, predictions)
positive_class_label = "SQUAT"
precision = precision_score(true_labels, predictions, pos_label=positive_class_label)
recall = recall_score(true_labels, predictions, pos_label=positive_class_label)
f1 = f1_score(true_labels, predictions, pos_label=positive_class_label)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1: {f1}")

In [None]:
from sklearn.metrics import roc_auc_score

predictions_prob = prediction["prediction_score_SQUAT"]
binary_true_labels = true_labels.map({"SQUAT": 1, "NO_EXERCISE": 0})
auc_score = roc_auc_score(binary_true_labels, predictions_prob)

print(f"AUC: {auc_score}")