In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.signal import find_peaks
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, recall_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, cross_val_score
from joblib import Parallel, delayed
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import xgboost as xgb
from sklearn.ensemble import VotingClassifier
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

### Leer archivos

In [None]:
# Data loading and preprocessing
data_train = pd.read_csv('C:/Users/carlo/Desktop/Tesis/Predicciones/Machine_Learning/Archivos_CSV_con_etiquetas/Tortu_Erika_Completo_ordenado_train_Balanceado_QuietaComiendoCaminando.csv')
data_test = pd.read_csv('C:/Users/carlo/Desktop/Tesis/Predicciones/Machine_Learning/Archivos_CSV_con_etiquetas/Tortu_Erika_Completo_ordenado_test_Balanceado_QuietaComiendoCaminando.csv')
data_train['dateTime_UTC'] = pd.to_datetime(data_train['dateTime_UTC'], errors='coerce')
data_test['dateTime_UTC'] = pd.to_datetime(data_test['dateTime_UTC'], errors='coerce')
df_train = data_train[data_train['dateTime_UTC'].notna()]
df_test = data_test[data_test['dateTime_UTC'].notna()]
df_train = df_train.sort_values(by=['dateTime_UTC'], ignore_index=True)
df_test = df_test.sort_values(by=['dateTime_UTC'], ignore_index=True)

### Funciones creacion ventanas y calculo features

In [12]:
import numpy as np
import pandas as pd
from scipy import stats
from scipy.signal import find_peaks


def compute_features(x_df, y_df, z_df, gx_df, gy_df, gz_df, window_size):
    X = pd.DataFrame()
    acc_magnitude = np.sqrt(x_df**2 + y_df**2 + z_df**2)
    gyro_magnitude = np.sqrt(gx_df**2 + gy_df**2 + gz_df**2)
    for name, df in zip(
        ['x', 'y', 'z', 'gx', 'gy', 'gz', 'acc_mag', 'gyro_mag'],
        [x_df, y_df, z_df, gx_df, gy_df, gz_df, acc_magnitude, gyro_magnitude]
    ):
        X[f'{name}_mean'] = df.mean(axis=1)
        X[f'{name}_std'] = df.std(axis=1)
        X[f'{name}_median'] = df.median(axis=1)
        X[f'{name}_mad'] = np.median(np.abs(df - np.median(df, axis=1)[:, None]), axis=1)
        X[f'{name}_iqr'] = df.apply(lambda x: np.percentile(x, 75) - np.percentile(x, 25), axis=1)
        X[f'{name}_peak_count'] = df.apply(lambda x: len(find_peaks(x)[0]), axis=1)
        X[f'{name}_energy'] = np.sum(df**2, axis=1) / window_size
    X['sma'] = (np.sum(np.abs(x_df), axis=1) + np.sum(np.abs(y_df), axis=1) + np.sum(np.abs(z_df), axis=1)) / window_size
    X['sma_gyro'] = (np.sum(np.abs(gx_df), axis=1) + np.sum(np.abs(gy_df), axis=1) + np.sum(np.abs(gz_df), axis=1)) / window_size

    # FFT Features
    def fft_basic_features(df):
        fft_vals = np.abs(np.fft.rfft(df, axis=1))
        return {
            'fft_mean': fft_vals.mean(axis=1),
            'fft_std': fft_vals.std(axis=1),
            'fft_max': fft_vals.max(axis=1),
            'fft_energy': np.sum(fft_vals**2, axis=1)
        }
    for name, df in zip(['x', 'y', 'z', 'gx', 'gy', 'gz'], [x_df, y_df, z_df, gx_df, gy_df, gz_df]):
        fft_feats = fft_basic_features(df)
        for k, v in fft_feats.items():
            X[f'{name}_{k}'] = v

    # Entropy
    def shannon_entropy(signal):
        hist, _ = np.histogram(signal, bins=10, density=True)
        hist += 1e-12
        return -np.sum(hist * np.log2(hist))
    for name, df in zip(['x', 'y', 'z', 'gx', 'gy', 'gz'], [x_df, y_df, z_df, gx_df, gy_df, gz_df]):
        X[f'{name}_entropy'] = df.apply(shannon_entropy, axis=1)

    # Correlation features
    X['corr_xy'] = [np.corrcoef(x, y)[0,1] for x, y in zip(x_df.values, y_df.values)]
    X['corr_xz'] = [np.corrcoef(x, z)[0,1] for x, z in zip(x_df.values, z_df.values)]
    X['corr_yz'] = [np.corrcoef(y, z)[0,1] for y, z in zip(y_df.values, z_df.values)]

    # Zero crossing rate
    def zero_crossings(signal):
        return ((signal[:, :-1] * signal[:, 1:]) < 0).sum(axis=1)
    X['x_zero_cross'] = zero_crossings(x_df.values)
    X['y_zero_cross'] = zero_crossings(y_df.values)
    X['z_zero_cross'] = zero_crossings(z_df.values)

    X.replace([np.inf, -np.inf], 0, inplace=True)
    X.fillna(0, inplace=True)
    return X

In [None]:
def create_windows(df, window_size, step_size):
    x_list, y_list, z_list, gx_list, gy_list, gz_list, labels = [], [], [], [], [], [], []
    for i in range(0, df.shape[0] - window_size, step_size):
        xs = df['ax'].values[i: i + window_size]
        ys = df['ay'].values[i: i + window_size]
        zs = df['az'].values[i: i + window_size]
        gx = df['gx'].values[i: i + window_size]
        gy = df['gy'].values[i: i + window_size]
        gz = df['gz'].values[i: i + window_size]
        label = df['Actividades'][i: i + window_size].mode()[0]
        x_list.append(xs)
        y_list.append(ys)
        z_list.append(zs)
        gx_list.append(gx)
        gy_list.append(gy)
        gz_list.append(gz)
        labels.append(label)
    return x_list, y_list, z_list, gx_list, gy_list, gz_list, labels

def remove_correlated_features(X_train, X_test, threshold=0.9):
    corr_matrix = X_train.corr().abs()
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    X_train_reduced = X_train.drop(columns=to_drop)
    X_test_reduced = X_test.drop(columns=to_drop)
    return X_train_reduced, X_test_reduced, to_drop

def smote(X,y):
    smote = SMOTE(random_state=21)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    return X_resampled, y_resampled

def tune_logistic_regression(X, y):
    param_grid = {
        'C': [0.01, 0.1, 1, 10, 100],
        'solver': ['lbfgs', 'liblinear'],
        'max_iter': [100, 200, 300],
    }
    lr = LogisticRegression(random_state=21)
    grid_search = GridSearchCV(estimator=lr, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X, y)
    return grid_search.best_estimator_, grid_search.best_params_

def tune_random_forest(X, y):
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [None, 10, 20]
    }
    rf = RandomForestClassifier(random_state=21)
    grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X, y)
    return grid_search.best_estimator_, grid_search.best_params_

def tune_svm(X, y):
    param_grid = {
        'C': [0.1, 1, 10],
        'gamma': ['scale', 0.01, 0.1]
    }
    svm = SVC(kernel='rbf', random_state=21, probability=True)
    grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X, y)
    return grid_search.best_estimator_, grid_search.best_params_

def tune_xgb(X, y):
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 1.0],
        'colsample_bytree': [0.8, 1.0]
    }
    xgb_clf = xgb.XGBClassifier(eval_metric='mlogloss', random_state=42)
    grid_search = GridSearchCV(estimator=xgb_clf, param_grid=param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=2)
    grid_search.fit(X, y)
    return grid_search.best_estimator_, grid_search.best_params_

def tune_voting(X,y, base_models):
    voting_clf = VotingClassifier(
        estimators=[
            ('lr', base_models['lr']),
            ('rf', base_models['rf']),
            ('svm', base_models['svm']),
            ('xgb', base_models['xgb'])
        ],
        voting='hard',
        weights=[3, 10, 3, 8]  # Adjust weights as needed
    )
    voting_clf.fit(X, y)
    return voting_clf

def calcular_static_dynamic(df, freq=14.45354719, window_seconds=10):
    window_size = int(freq * window_seconds)
    df['sax'] = df['ax'].rolling(window=window_size, center=True, min_periods=1).mean()
    df['say'] = df['ay'].rolling(window=window_size, center=True, min_periods=1).mean()
    df['saz'] = df['az'].rolling(window=window_size, center=True, min_periods=1).mean()
    df['sgx'] = df['gx'].rolling(window=window_size, center=True, min_periods=1).mean()
    df['sgy'] = df['gy'].rolling(window=window_size, center=True, min_periods=1).mean()
    df['sgz'] = df['gz'].rolling(window=window_size, center=True, min_periods=1).mean()
    df['ax'] = np.abs(df['ax'] - df['sax'])
    df['ay'] = np.abs(df['ay'] - df['say'])
    df['az'] = np.abs(df['az'] - df['saz'])
    df['gx'] = np.abs(df['gx'] - df['sgx'])
    df['gy'] = np.abs(df['gy'] - df['sgy'])
    df['gz'] = np.abs(df['gz'] - df['sgz'])
    return df

def encode_labels(labels, diccionario_codificacion):
    return np.array([diccionario_codificacion[str(label)] for label in labels])

def plot_confusion_matrix(
        y_true, y_pred, labels, title,
        num_fontsize=14, label_fontsize=12, title_fontsize=14,
        cbar_fontsize=10):

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    ax = sns.heatmap(
        cm,
        annot=True,
        fmt='d',
        cmap='Blues',
        xticklabels=labels,
        yticklabels=labels,
        annot_kws={"size": num_fontsize}
    )

    cbar = ax.collections[0].colorbar
    cbar.ax.tick_params(labelsize=cbar_fontsize)

    plt.title(title, fontsize=title_fontsize)
    plt.xlabel('Predicciones', fontsize=label_fontsize)
    plt.ylabel('Valores reales', fontsize=label_fontsize)
    plt.xticks(fontsize=label_fontsize)
    plt.yticks(fontsize=label_fontsize)
    plt.show()

def plot_feature_importance(model, feature_names, top_n=20):
    importances = model.feature_importances_
    indices = np.argsort(importances)[::-1][:top_n]
    plt.figure(figsize=(10, 6))
    plt.title("Feature Importances")
    plt.bar(range(top_n), importances[indices])
    plt.xticks(range(top_n), [feature_names[i] for i in indices], rotation=90)
    plt.tight_layout()
    plt.show()

### Varío tamaño ventana, creo modelos, predigo, calculo accuracy, precisión, recall, f1-score

In [None]:
def evaluate_window(i):
    window_size = i + 2  # Evitar ventana de tamaño 0
    step_size = window_size // 2
    x_list_train, y_list_train, z_list_train, gx_list_train, gy_list_train, gz_list_train, train_labels = create_windows(df_train, window_size, step_size)
    x_df_train = pd.DataFrame(x_list_train)
    y_df_train = pd.DataFrame(y_list_train)
    z_df_train = pd.DataFrame(z_list_train)
    gx_df_train = pd.DataFrame(gx_list_train)
    gy_df_train = pd.DataFrame(gy_list_train)
    gz_df_train = pd.DataFrame(gz_list_train)
    X_train = compute_features(x_df_train, y_df_train, z_df_train, gx_df_train, gy_df_train, gz_df_train, window_size)

    x_list_test, y_list_test, z_list_test, gx_list_test, gy_list_test, gz_list_test, test_labels = create_windows(df_test, window_size, step_size)
    x_df_test = pd.DataFrame(x_list_test)
    y_df_test = pd.DataFrame(y_list_test)
    z_df_test = pd.DataFrame(z_list_test)
    gx_df_test = pd.DataFrame(gx_list_test)
    gy_df_test = pd.DataFrame(gy_list_test)
    gz_df_test = pd.DataFrame(gz_list_test)
    X_test = compute_features(x_df_test, y_df_test, z_df_test, gx_df_test, gy_df_test, gz_df_test, window_size)

    # Label encoding
    diccionario_codificacion = {'Quieta': 0, 'Caminando': 1, 'Comiendo': 2}
    #diccionario_codificacion = {'Quieto': 0, 'Movimiento_1': 1, 'Movimiento_2': 2, 'Movimiento_3': 3, 'Movimiento_4': 4}
    train_labels = encode_labels(train_labels, diccionario_codificacion)
    test_labels = encode_labels(test_labels, diccionario_codificacion)

    # Feature selection (remove correlated features)
    X_train_reduced, X_test_reduced, to_drop = remove_correlated_features(X_train, X_test, threshold=0.9)
    # print(f"Features eliminadas por correlación > 0.9:\n{to_drop}")

    # Scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_reduced)
    X_test_scaled = scaler.transform(X_test_reduced)

    # #SMOTE (balancing the dataset)
    # X_train_scaled, train_labels = smote(X_train_scaled, train_labels)

    # Model training and hyperparameter tuning
    lr_best, lr_params = tune_logistic_regression(X_train_scaled, train_labels)
    rf_best, rf_params = tune_random_forest(X_train_scaled, train_labels)
    svm_best, svm_params = tune_svm(X_train_scaled, train_labels)
    xgb_best, xgb_params = tune_xgb(X_train_scaled, train_labels)
    voting_clf= tune_voting(X_train_scaled, train_labels, base_models={'lr': lr_best, 'rf': rf_best, 'svm': svm_best, 'xgb': xgb_best})

    print("Best Logistic Regression params:", lr_params)
    print("Best Random Forest params:", rf_params)
    print("Best SVM params:", svm_params)
    print("Best XGB params:", xgb_params)

    # Evaluation
    pred_lr = lr_best.predict(X_test_scaled)
    pred_rf = rf_best.predict(X_test_scaled)
    pred_svm = svm_best.predict(X_test_scaled)
    pred_xgb = xgb_best.predict(X_test_scaled)
    pred_voting = voting_clf.predict(X_test_scaled)

    acc_lr = accuracy_score(test_labels, pred_lr)
    acc_rf = accuracy_score(test_labels, pred_rf)
    acc_svm = accuracy_score(test_labels, pred_svm)
    acc_xgb = accuracy_score(test_labels, pred_xgb)
    acc_voting = accuracy_score(test_labels, pred_voting)

    prec_lr, rec_lr, fsc_lr, _ = precision_recall_fscore_support(test_labels, pred_lr, average=None, zero_division=0)
    prec_rf, rec_rf, fsc_rf, _ = precision_recall_fscore_support(test_labels, pred_rf, average=None, zero_division=0)
    prec_svm, rec_svm, fsc_svm, _ = precision_recall_fscore_support(test_labels, pred_svm, average=None, zero_division=0)
    prec_xgb, rec_xgb, fsc_xgb, _ = precision_recall_fscore_support(test_labels, pred_xgb, average=None, zero_division=0)
    prec_voting, rec_voting, fsc_voting, _ = precision_recall_fscore_support(test_labels, pred_voting, average=None, zero_division=0)

    print(f"Window size: {window_size}, RF Accuracy: {acc_rf:.4f}, Params: {rf_params}")
    return {
        'window': window_size,
        # LR
        'acc_lr': acc_lr, 'prec_lr': prec_lr, 'rec_lr': rec_lr, 'fsc_lr': fsc_lr,
        # RF
        'acc_rf': acc_rf, 'prec_rf': prec_rf, 'rec_rf': rec_rf, 'fsc_rf': fsc_rf,
        # KNN
        # 'acc_knn': acc_knn, 'prec_knn': prec_knn, 'rec_knn': rec_knn, 'fsc_knn': fsc_knn,
        # SVM
        'acc_svm': acc_svm, 'prec_svm': prec_svm, 'rec_svm': rec_svm, 'fsc_svm': fsc_svm,
        # XGB
        'acc_xgb': acc_xgb, 'prec_xgb': prec_xgb, 'rec_xgb': rec_xgb, 'fsc_xgb': fsc_xgb,
        # Voting
        'acc_voting': acc_voting, 'prec_voting': prec_voting, 'rec_voting': rec_voting, 'fsc_voting': fsc_voting
    }

# Paralelización
resultados = Parallel(n_jobs=-1, verbose=10)(delayed(evaluate_window)(i) for i in range(98))

# # Extraer resultados
accuracyX_lr = [r['acc_lr'] for r in resultados]
accuracyX_rf = [r['acc_rf'] for r in resultados]
# accuracyX_knn = [r['acc_knn'] for r in resultados]
accuracyX_svm = [r['acc_svm'] for r in resultados]

# Logistic Regression
precisionX_lr = [r['prec_lr'] for r in resultados]
recallX_lr = [r['rec_lr'] for r in resultados]
fscoreX_lr = [r['fsc_lr'] for r in resultados]


# Random Forest
precisionX_rf = [r['prec_rf'] for r in resultados]
recallX_rf = [r['rec_rf'] for r in resultados]
fscoreX_rf = [r['fsc_rf'] for r in resultados]

# Support Vector Machine
precisionX_svm = [r['prec_svm'] for r in resultados]
recallX_svm = [r['rec_svm'] for r in resultados]
fscoreX_svm = [r['fsc_svm'] for r in resultados]

# XGBoost
precisionX_xgb = [r['prec_xgb'] for r in resultados]
recallX_xgb = [r['rec_xgb'] for r in resultados]
fscoreX_xgb = [r['fsc_xgb'] for r in resultados]

# voting
precisionX_voting = [r['prec_voting'] for r in resultados]
recallX_voting = [r['rec_voting'] for r in resultados]
fscoreX_voting = [r['fsc_voting'] for r in resultados]

windows = [r['window'] for r in resultados]

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed: 13.5min
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 22.2min
[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed: 30.5min
[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed: 36.9min
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 45.0min
[Parallel(n_jobs=-1)]: Done  66 tasks      | elapsed: 50.1min
[Parallel(n_jobs=-1)]: Done  77 out of  98 | elapsed: 53.2min remaining: 14.5min
[Parallel(n_jobs=-1)]: Done  87 out of  98 | elapsed: 55.7min remaining:  7.0min
[Parallel(n_jobs=-1)]: Done  98 out of  98 | elapsed: 64.5min finished


## Precisión

In [None]:
import matplotlib.pyplot as plt

# Variables de precisión para cada actividad random forest
precision_caminando_rf = [array[0] for array in precisionX_rf]
precision_quieta_rf = [array[1] for array in precisionX_rf]
precision_come_rf = [array[2] for array in precisionX_rf]

# Variables de precisión para cada actividad regresión logística
precision_caminando_lr = [array[0] for array in precisionX_lr]
precision_quieta_lr = [array[1] for array in precisionX_lr]
precision_come_lr = [array[2] for array in precisionX_lr]

# Variables de precisión para SVM 
precision_caminando_svm = [array[0] for array in precisionX_svm]
precision_quieta_svm = [array[1] for array in precisionX_svm]
precision_come_svm = [array[2] for array in precisionX_svm]

#Variables de precisión para XGBoost
precision_caminando_xgb = [array[0] for array in precisionX_xgb]
precision_quieta_xgb = [array[1] for array in precisionX_xgb]
precision_come_xgb = [array[2] for array in precisionX_xgb]

# Variables de precisión para Voting
precision_caminando_voting = [array[0] for array in precisionX_voting]
precision_quieta_voting = [array[1] for array in precisionX_voting]
precision_come_voting = [array[2] for array in precisionX_voting]

# Crear los gráficos
fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, 1, sharex=True, sharey=True, figsize=(10, 12))

# Gráfico para Random Forest
ax1.plot(windows, precision_caminando_rf, label='Caminando', color='red')
ax1.plot(windows, precision_quieta_rf, label='Quieta', color='blue')
ax1.plot(windows, precision_come_rf, label='Comiendo', color='gray')
ax1.set_ylabel('Precisión', fontsize=22)
ax1.set_title('Random Forest', fontsize=22)

# Gráfico para Regresión Logística
ax2.plot(windows, precision_caminando_lr, label='Caminando', color='red')
ax2.plot(windows, precision_quieta_lr, label='Quieta', color='blue')
ax2.plot(windows, precision_come_lr, label='Comiendo', color='gray')
ax2.set_ylabel('Precisión', fontsize=22)
ax2.set_title('Regresión Logística', fontsize=22)


# Gráfico para SVM
ax3.plot(windows, precision_caminando_svm, label='Caminando', color='red')
ax3.plot(windows, precision_quieta_svm, label='Quieta', color='blue')
ax3.plot(windows, precision_come_svm, label='Comiendo', color='gray')
ax3.set_ylabel('Precisión', fontsize=22)
ax3.set_title('SVM', fontsize=22)

#Grafico para XGBoost
ax4.plot(windows, precision_caminando_xgb, label='Caminando', color='red')
ax4.plot(windows, precision_quieta_xgb, label='Quieta', color='blue')
ax4.plot(windows, precision_come_xgb, label='Comiendo', color='gray')
ax4.set_ylabel('Precisión', fontsize=22)
ax4.set_title('XGBoost', fontsize=22)

#Grafico para voting
ax5.plot(windows, precision_caminando_voting, label='Caminando', color='red')
ax5.plot(windows, precision_quieta_voting, label='Quieta', color='blue')
ax5.plot(windows, precision_come_voting, label='Comiendo', color='gray')
ax5.set_xlabel('Tamaño de ventanas [N° de mediciones]', fontsize=22)
ax5.set_ylabel('Precisión', fontsize=22)
ax5.set_title('Voting', fontsize=22)

# Personalizar los ticks de los ejes
for ax in [ax1, ax2, ax3, ax4, ax5]:
    ax.tick_params(axis='x', labelsize=18)
    ax.tick_params(axis='y', labelsize=18)

# Añadir leyenda
ax1.legend(loc=(0.6, 0), fontsize=20)
# ax2.legend(loc=(0.6, 0), fontsize=20)
# ax3.legend(loc=(0.6, 0), fontsize=20)
# ax4.legend(loc=(0.6, 0), fontsize=20)

# Mostrar gráfico
plt.ylim(0, 1)  # Ajustar límites del eje y
plt.tight_layout()  # Asegura que las etiquetas no se superpongan
plt.show()


## Recall

In [None]:
import matplotlib.pyplot as plt

# Variables de recall para cada actividad random forest
recall_caminando_rf = [array[0] for array in recallX_rf]
recall_quieta_rf = [array[1] for array in recallX_rf]
recall_come_rf = [array[2] for array in recallX_rf]

# Variables de recall para cada actividad regresión logística
recall_caminando_lr = [array[0] for array in recallX_lr]
recall_quieta_lr = [array[1] for array in recallX_lr]
recall_come_lr = [array[2] for array in recallX_lr]

# Variables de recall para SVM
recall_caminando_svm = [array[0] for array in recallX_svm]
recall_quieta_svm = [array[1] for array in recallX_svm]
recall_come_svm = [array[2] for array in recallX_svm]

#Variables de recall para XGBoost
recall_caminando_xgb = [array[0] for array in recallX_xgb]
recall_quieta_xgb = [array[1] for array in recallX_xgb]
recall_come_xgb = [array[2] for array in recallX_xgb]

#Variables de recall para Voting
recall_caminando_voting = [array[0] for array in recallX_voting]
recall_quieta_voting = [array[1] for array in recallX_voting]
recall_come_voting = [array[2] for array in recallX_voting]

# Crear los gráficos
fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, 1, sharex=True, sharey=True, figsize=(10, 12))

# Gráfico para Random Forest
ax1.plot(windows, recall_caminando_rf, label='Caminando', color='red')
ax1.plot(windows, recall_quieta_rf, label='Quieta', color='blue')
ax1.plot(windows, recall_come_rf, label='Comiendo', color='gray')
ax1.set_ylabel('Recall', fontsize=22)
ax1.set_title('Random Forest', fontsize=22)

#Gráfico para Regresión Logística
ax2.plot(windows, recall_caminando_lr, label='Caminando', color='red')
ax2.plot(windows, recall_quieta_lr, label='Quieta', color='blue')
ax2.plot(windows, recall_come_lr, label='Comiendo', color='gray')
ax2.set_ylabel('Recall', fontsize=22)
ax2.set_title('Regresión Logística', fontsize=22)


#Gráfico para SVM
ax3.plot(windows, recall_caminando_svm, label='Caminando', color='red')
ax3.plot(windows, recall_quieta_svm, label='Quieta', color='blue')
ax3.plot(windows, recall_come_svm, label='Comiendo', color='gray')
ax3.set_ylabel('Recall', fontsize=22)
ax3.set_title('SVM', fontsize=22)

#Gráfico para XGBoost
ax4.plot(windows, recall_caminando_xgb, label='Caminando', color='red')
ax4.plot(windows, recall_quieta_xgb, label='Quieta', color='blue')
ax4.plot(windows, recall_come_xgb, label='Comiendo', color='gray')
ax4.set_ylabel('Recall', fontsize=22)
ax4.set_title('XGBoost', fontsize=22)

#Gráfico para voting
ax5.plot(windows, recall_caminando_voting, label='Caminando', color='red')
ax5.plot(windows, recall_quieta_voting, label='Quieta', color='blue')
ax5.plot(windows, recall_come_voting, label='Comiendo', color='gray')
ax5.set_xlabel('Tamaño de ventanas [N° de mediciones]', fontsize=22)
ax5.set_ylabel('Recall', fontsize=22)
ax5.set_title('Voting', fontsize=22)

# Personalizar los ticks de los ejes
for ax in [ax1,ax2, ax3, ax4, ax5]:
    ax.tick_params(axis='x', labelsize=18)
    ax.tick_params(axis='y', labelsize=18)

# Añadir leyenda
ax1.legend(loc=(0.6, 0), fontsize=20)
# ax2.legend(loc=(0.6, 0), fontsize=20)
# # ax3.legend(loc=(0.6, 0), fontsize=20)
# ax4.legend(loc=(0.6, 0), fontsize=20)

# Mostrar gráfico
plt.ylim(0, 1)  # Ajustar límites del eje y
plt.tight_layout()  # Asegura que las etiquetas no se superpongan
plt.show()


## f1_score

In [None]:
import matplotlib.pyplot as plt

# Variables de f1-score para cada actividad random forest
fscore_caminando_rf = [array[0] for array in fscoreX_rf]
fscore_quieta_rf = [array[1] for array in fscoreX_rf]
fscore_come_rf = [array[2] for array in fscoreX_rf]

# Variables de f1-score para cada actividad regresión logística
fscore_caminando_lr = [array[0] for array in fscoreX_lr]
fscore_quieta_lr = [array[1] for array in fscoreX_lr]
fscore_come_lr = [array[2] for array in fscoreX_lr]

# Variables de f1-score para SVM 
fscore_caminando_svm = [array[0] for array in fscoreX_svm]
fscore_quieta_svm = [array[1] for array in fscoreX_svm]
fscore_come_svm = [array[2] for array in fscoreX_svm]

# Variables de f1-score para XGBoost
fscore_caminando_xgb = [array[0] for array in fscoreX_xgb]
fscore_quieta_xgb = [array[1] for array in fscoreX_xgb]
fscore_come_xgb = [array[2] for array in fscoreX_xgb]

# Variables de f1-score para Voting
fscore_caminando_voting = [array[0] for array in fscoreX_voting]
fscore_quieta_voting = [array[1] for array in fscoreX_voting]
fscore_come_voting = [array[2] for array in fscoreX_voting]

# Crear los gráficos
fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, 1, sharex=True, sharey=True, figsize=(10, 12))

# Gráfico para Random Forest
ax1.plot(windows, fscore_caminando_rf, label='Caminando', color='red')
ax1.plot(windows, fscore_quieta_rf, label='Quieta', color='blue')
ax1.plot(windows, fscore_come_rf, label='Comiendo', color='gray')
ax1.set_ylabel('f$_1$-score', fontsize=22)
ax1.set_title('Random Forest', fontsize=22)

# Gráfico para Regresión Logística
ax2.plot(windows, fscore_caminando_lr, label='Caminando', color='red')
ax2.plot(windows, fscore_quieta_lr, label='Quieta', color='blue')
ax2.plot(windows, fscore_come_lr, label='Comiendo', color='gray')
ax2.set_ylabel('f$_1$-score', fontsize=22)
ax2.set_title('Regresión Logística', fontsize=22)


# Gráfico para SVM
ax3.plot(windows, fscore_caminando_svm, label='Caminando', color='red')
ax3.plot(windows, fscore_quieta_svm, label='Quieta', color='blue')
ax3.plot(windows, fscore_come_svm, label='Comiendo', color='gray')
ax3.set_ylabel('f$_1$-score', fontsize=22)
ax3.set_title('SVM', fontsize=22)

# Gráfico para XGBoost
ax4.plot(windows, fscore_caminando_xgb, label='Caminando', color='red')
ax4.plot(windows, fscore_quieta_xgb, label='Quieta', color='blue')
ax4.plot(windows, fscore_come_xgb, label='Comiendo', color='gray')
ax4.set_ylabel('f$_1$-score', fontsize=22)
ax4.set_title('XGBoost', fontsize=22)

# Gráfico para Voting
ax5.plot(windows, fscore_caminando_voting, label='Caminando', color='red')
ax5.plot(windows, fscore_quieta_voting, label='Quieta', color='blue')
ax5.plot(windows, fscore_come_voting, label='Comiendo', color='gray')
ax5.set_xlabel('Tamaño de ventanas [N° de mediciones]', fontsize=22)
ax5.set_ylabel('f$_1$-score', fontsize=22)
ax5.set_title('Voting', fontsize=22)

# Personalizar los ticks de los ejes
for ax in [ax1, ax2, ax3, ax4, ax5]:
    ax.tick_params(axis='x', labelsize=18)
    ax.tick_params(axis='y', labelsize=18)


# Añadir leyenda
ax1.legend(loc=(0.6, 0), fontsize=20)
# ax2.legend(loc=(0.6, 0), fontsize=20)
# # ax3.legend(loc=(0.6, 0), fontsize=20)
# ax4.legend(loc=(0.6, 0), fontsize=20)

# Mostrar gráfico
plt.ylim(0, 1)  # Ajustar límites del eje y
plt.tight_layout()  # Asegura que las etiquetas no se superpongan
plt.show()


## Accuracy vs tamaño ventanas

In [None]:
import matplotlib.pyplot as plt

# Variables de accuracy para cada modelo
accuracyX_lr = [r['acc_lr'] for r in resultados]
accuracyX_rf = [r['acc_rf'] for r in resultados]
accuracyX_svm = [r['acc_svm'] for r in resultados]  # Asegúrate de que estas variables están definidas
accuracyX_xgb = [r['acc_xgb'] for r in resultados]
accuracyX_voting = [r['acc_voting'] for r in resultados]

# Crear el gráfico para Accuracy de todos los modelos
fig, axs = plt.subplots(1, 1, figsize=(10, 6))  # Un solo gráfico (compartido)

# Gráficos de Accuracy para los 4 modelos
axs.plot(windows, accuracyX_lr, label='Regresión logística', color='blue')
axs.plot(windows, accuracyX_rf, label='Random Forest', color='green')
axs.plot(windows, accuracyX_svm, label='SVM', color='orange')
axs.plot(windows, accuracyX_xgb, label='XGBoost', color='purple')
axs.plot(windows, accuracyX_voting, label='Voting', color='brown')

# Establecer título y etiquetas
axs.set_title('Accuracy de los modelos vs tamaño de ventana', fontsize=20)
axs.set_xlabel('Tamaño de ventanas [N° de mediciones]', fontsize=20)
axs.set_ylabel('Accuracy', fontsize=20)

# Establecer tamaño de fuente de los ticks
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)

# Añadir leyenda
axs.legend(fontsize=14)
plt.grid(True)
# Mostrar gráfico
plt.ylim(0, 1.1)  
plt.tight_layout()  
plt.show()
