In [1]:
%matplotlib inline
%matplotlib qt
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.signal import find_peaks
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

In [2]:
# Etiquetas de las clases
labels = ['Caminando', 'Quieta', 'Comiendo']
warnings.filterwarnings('ignore')

### TRAIN
# reading raw data file
data_train = pd.read_csv('Archivos_CSV_con_etiquetas/train_tortuga.csv')
data_train = data_train.drop(['gx','gy','gz'], axis=1)
# removing null values
data_train = data_train.dropna()
data_train.shape
# drop the rows where timestamp is 0
df = data_train[pd.to_datetime(data_train['dateTime_UTC']) != 0]
# now arrange data in ascending order of the user and timestamp
df = df.sort_values(by = ['dateTime_UTC'], ignore_index=True)


### TEST
# reading raw data file
data_test = pd.read_csv('Archivos_CSV_con_etiquetas/test_tortuga.csv')
data_test = data_test.drop(['gx','gy','gz'], axis=1)
# removing null values
data_test = data_test.dropna()
data_test.shape
# drop the rows where timestamp is 0
df_test = data_test[pd.to_datetime(data_test['dateTime_UTC']) != 0]
# now arrange data in ascending order of the user and timestamp
df_test = df_test.sort_values(by = ['dateTime_UTC'], ignore_index=True)

In [3]:
accuracyX_lr = []
accuracyX_rf = []
precisionX_rf = []
precisionX_lr = []
recallX_lr = []
recallX_rf = []
fscoreX_lr = []
fscoreX_rf = []
precisionX_rf_W = []
precisionX_lr_W = []
recallX_lr_W = []
recallX_rf_W = []
fscoreX_lr_W = []
fscoreX_rf_W = []
windows = []

for i in range(11):
    
    step_size=1+48*i
    
    window_size=2*step_size
    df_train = data_train
    df_test=data_test
    windows.append(window_size)


    ### X TRAIN
    x_list_train = []
    y_list_train = []
    z_list_train = []
    train_labels = []

    # creating overlaping windows of size window-size
    for i in range(0, df_train.shape[0] - window_size, step_size):
        xs = df_train['ax'].values[i: i + window_size]
        ys = df_train['ay'].values[i: i + window_size]
        zs = df_train['az'].values[i: i + window_size]
        label = stats.mode(df_train['Actividades'][i: i + window_size])[0][0]

        x_list_train.append(xs)
        y_list_train.append(ys)
        z_list_train.append(zs)
        train_labels.append(label)

    # Statistical Features on raw x, y and z in time domain
    X_train = pd.DataFrame()

    # mean
    X_train['x_mean'] = pd.Series(x_list_train).apply(lambda x: x.mean())
    X_train['y_mean'] = pd.Series(y_list_train).apply(lambda x: x.mean())
    X_train['z_mean'] = pd.Series(z_list_train).apply(lambda x: x.mean())

    # std dev
    X_train['x_std'] = pd.Series(x_list_train).apply(lambda x: x.std())
    X_train['y_std'] = pd.Series(y_list_train).apply(lambda x: x.std())
    X_train['z_std'] = pd.Series(z_list_train).apply(lambda x: x.std())

    # avg absolute diff
    X_train['x_aad'] = pd.Series(x_list_train).apply(lambda x: np.mean(np.absolute(x - np.mean(x))))
    X_train['y_aad'] = pd.Series(y_list_train).apply(lambda x: np.mean(np.absolute(x - np.mean(x))))
    X_train['z_aad'] = pd.Series(z_list_train).apply(lambda x: np.mean(np.absolute(x - np.mean(x))))

    # min
    X_train['x_min'] = pd.Series(x_list_train).apply(lambda x: x.min())
    X_train['y_min'] = pd.Series(y_list_train).apply(lambda x: x.min())
    X_train['z_min'] = pd.Series(z_list_train).apply(lambda x: x.min())

    # max
    X_train['x_max'] = pd.Series(x_list_train).apply(lambda x: x.max())
    X_train['y_max'] = pd.Series(y_list_train).apply(lambda x: x.max())
    X_train['z_max'] = pd.Series(z_list_train).apply(lambda x: x.max())



    # median
    X_train['x_median'] = pd.Series(x_list_train).apply(lambda x: np.median(x))
    X_train['y_median'] = pd.Series(y_list_train).apply(lambda x: np.median(x))
    X_train['z_median'] = pd.Series(z_list_train).apply(lambda x: np.median(x))

    # median abs dev 
    X_train['x_mad'] = pd.Series(x_list_train).apply(lambda x: np.median(np.absolute(x - np.median(x))))
    X_train['y_mad'] = pd.Series(y_list_train).apply(lambda x: np.median(np.absolute(x - np.median(x))))
    X_train['z_mad'] = pd.Series(z_list_train).apply(lambda x: np.median(np.absolute(x - np.median(x))))

    # interquartile range
    X_train['x_IQR'] = pd.Series(x_list_train).apply(lambda x: np.percentile(x, 75) - np.percentile(x, 25))
    X_train['y_IQR'] = pd.Series(y_list_train).apply(lambda x: np.percentile(x, 75) - np.percentile(x, 25))
    X_train['z_IQR'] = pd.Series(z_list_train).apply(lambda x: np.percentile(x, 75) - np.percentile(x, 25))

    # negtive count
    X_train['x_neg_count'] = pd.Series(x_list_train).apply(lambda x: np.sum(x < 0))
    X_train['y_neg_count'] = pd.Series(y_list_train).apply(lambda x: np.sum(x < 0))
    X_train['z_neg_count'] = pd.Series(z_list_train).apply(lambda x: np.sum(x < 0))

    # positive count
    X_train['x_pos_count'] = pd.Series(x_list_train).apply(lambda x: np.sum(x > 0))
    X_train['y_pos_count'] = pd.Series(y_list_train).apply(lambda x: np.sum(x > 0))
    X_train['z_pos_count'] = pd.Series(z_list_train).apply(lambda x: np.sum(x > 0))

    # values above mean
    X_train['x_above_mean'] = pd.Series(x_list_train).apply(lambda x: np.sum(x > x.mean()))
    X_train['y_above_mean'] = pd.Series(y_list_train).apply(lambda x: np.sum(x > x.mean()))
    X_train['z_above_mean'] = pd.Series(z_list_train).apply(lambda x: np.sum(x > x.mean()))

    # number of peaks
    X_train['x_peak_count'] = pd.Series(x_list_train).apply(lambda x: len(find_peaks(x)[0]))
    X_train['y_peak_count'] = pd.Series(y_list_train).apply(lambda x: len(find_peaks(x)[0]))
    X_train['z_peak_count'] = pd.Series(z_list_train).apply(lambda x: len(find_peaks(x)[0]))



    # energy
    X_train['x_energy'] = pd.Series(x_list_train).apply(lambda x: np.sum(x**2)/window_size)
    X_train['y_energy'] = pd.Series(y_list_train).apply(lambda x: np.sum(x**2)/window_size)
    X_train['z_energy'] = pd.Series(z_list_train).apply(lambda x: np.sum(x**2)/window_size)

    # avg resultant
    X_train['avg_result_accl'] = [i.mean() for i in ((pd.Series(x_list_train)**2 + pd.Series(y_list_train)**2 + pd.Series(z_list_train)**2)**0.5)]

    # signal magnitude area
    X_train['sma'] =    pd.Series(x_list_train).apply(lambda x: np.sum(abs(x)/window_size)) + pd.Series(y_list_train).apply(lambda x: np.sum(abs(x)/window_size)) \
                    + pd.Series(z_list_train).apply(lambda x: np.sum(abs(x)/window_size))



    ### X TEST
    x_list_test = []
    y_list_test = []
    z_list_test = []
    test_labels = []

    # creating overlaping windows of size window-size
    for i in range(0, df_test.shape[0] - window_size, step_size):
        xs = df_test['ax'].values[i: i + window_size]
        ys = df_test['ay'].values[i: i + window_size]
        zs = df_test['az'].values[i: i + window_size]
        label = stats.mode(df_test['Actividades'][i: i + window_size])[0][0]

        x_list_test.append(xs)
        y_list_test.append(ys)
        z_list_test.append(zs)
        test_labels.append(label)

    # Statistical Features on raw x, y and z in time domain
    X_test = pd.DataFrame()

    # mean
    X_test['x_mean'] = pd.Series(x_list_test).apply(lambda x: x.mean())
    X_test['y_mean'] = pd.Series(y_list_test).apply(lambda x: x.mean())
    X_test['z_mean'] = pd.Series(z_list_test).apply(lambda x: x.mean())

    # std dev
    X_test['x_std'] = pd.Series(x_list_test).apply(lambda x: x.std())
    X_test['y_std'] = pd.Series(y_list_test).apply(lambda x: x.std())
    X_test['z_std'] = pd.Series(z_list_test).apply(lambda x: x.std())

    # avg absolute diff
    X_test['x_aad'] = pd.Series(x_list_test).apply(lambda x: np.mean(np.absolute(x - np.mean(x))))
    X_test['y_aad'] = pd.Series(y_list_test).apply(lambda x: np.mean(np.absolute(x - np.mean(x))))
    X_test['z_aad'] = pd.Series(z_list_test).apply(lambda x: np.mean(np.absolute(x - np.mean(x))))

    # min
    X_test['x_min'] = pd.Series(x_list_test).apply(lambda x: x.min())
    X_test['y_min'] = pd.Series(y_list_test).apply(lambda x: x.min())
    X_test['z_min'] = pd.Series(z_list_test).apply(lambda x: x.min())

    # max
    X_test['x_max'] = pd.Series(x_list_test).apply(lambda x: x.max())
    X_test['y_max'] = pd.Series(y_list_test).apply(lambda x: x.max())
    X_test['z_max'] = pd.Series(z_list_test).apply(lambda x: x.max())


    # median
    X_test['x_median'] = pd.Series(x_list_test).apply(lambda x: np.median(x))
    X_test['y_median'] = pd.Series(y_list_test).apply(lambda x: np.median(x))
    X_test['z_median'] = pd.Series(z_list_test).apply(lambda x: np.median(x))

    # median abs dev 
    X_test['x_mad'] = pd.Series(x_list_test).apply(lambda x: np.median(np.absolute(x - np.median(x))))
    X_test['y_mad'] = pd.Series(y_list_test).apply(lambda x: np.median(np.absolute(x - np.median(x))))
    X_test['z_mad'] = pd.Series(z_list_test).apply(lambda x: np.median(np.absolute(x - np.median(x))))

    # interquartile range
    X_test['x_IQR'] = pd.Series(x_list_test).apply(lambda x: np.percentile(x, 75) - np.percentile(x, 25))
    X_test['y_IQR'] = pd.Series(y_list_test).apply(lambda x: np.percentile(x, 75) - np.percentile(x, 25))
    X_test['z_IQR'] = pd.Series(z_list_test).apply(lambda x: np.percentile(x, 75) - np.percentile(x, 25))

    # negtive count
    X_test['x_neg_count'] = pd.Series(x_list_test).apply(lambda x: np.sum(x < 0))
    X_test['y_neg_count'] = pd.Series(y_list_test).apply(lambda x: np.sum(x < 0))
    X_test['z_neg_count'] = pd.Series(z_list_test).apply(lambda x: np.sum(x < 0))

    # positive count
    X_test['x_pos_count'] = pd.Series(x_list_test).apply(lambda x: np.sum(x > 0))
    X_test['y_pos_count'] = pd.Series(y_list_test).apply(lambda x: np.sum(x > 0))
    X_test['z_pos_count'] = pd.Series(z_list_test).apply(lambda x: np.sum(x > 0))

    # values above mean
    X_test['x_above_mean'] = pd.Series(x_list_test).apply(lambda x: np.sum(x > x.mean()))
    X_test['y_above_mean'] = pd.Series(y_list_test).apply(lambda x: np.sum(x > x.mean()))
    X_test['z_above_mean'] = pd.Series(z_list_test).apply(lambda x: np.sum(x > x.mean()))

    # number of peaks
    X_test['x_peak_count'] = pd.Series(x_list_test).apply(lambda x: len(find_peaks(x)[0]))
    X_test['y_peak_count'] = pd.Series(y_list_test).apply(lambda x: len(find_peaks(x)[0]))
    X_test['z_peak_count'] = pd.Series(z_list_test).apply(lambda x: len(find_peaks(x)[0]))

    

    # energy
    X_test['x_energy'] = pd.Series(x_list_test).apply(lambda x: np.sum(x**2)/window_size)
    X_test['y_energy'] = pd.Series(y_list_test).apply(lambda x: np.sum(x**2)/window_size)
    X_test['z_energy'] = pd.Series(z_list_test).apply(lambda x: np.sum(x**2)/window_size)

    # avg resultant
    X_test['avg_result_accl'] = [i.mean() for i in ((pd.Series(x_list_test)**2 + pd.Series(y_list_test)**2 + pd.Series(z_list_test)**2)**0.5)]

    # signal magnitude area
    X_test['sma'] =    pd.Series(x_list_test).apply(lambda x: np.sum(abs(x)/window_size)) + pd.Series(y_list_test).apply(lambda x: np.sum(abs(x)/window_size)) \
                    + pd.Series(z_list_test).apply(lambda x: np.sum(abs(x)/window_size))

    

    act = np.array(train_labels)
    act_test = np.array(test_labels)


    ### Modelo regresión lineal
    # standardization
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_data_lr = scaler.transform(X_train)
    X_test_data_lr = scaler.transform(X_test)
    lr = LogisticRegression(random_state = 21)
    lr.fit(X_train_data_lr, act)
    act_pred_lr = lr.predict(X_test_data_lr)
    accuracyX = accuracy_score(act_test, act_pred_lr)
    accuracyX_lr.append(accuracyX)


    # Guarda valores de precisión, recall y fscore (un valor para cada actividad por ventana)
    precision_recall_fscore_lr = precision_recall_fscore_support(act_test,act_pred_lr,average=None)
    precisionX_lr.append(precision_recall_fscore_lr[0])
    recallX_lr.append(precision_recall_fscore_lr[1])
    fscoreX_lr.append(precision_recall_fscore_lr[2])
    # Guarda valores de precisión, recall y fscore pesados (un valor para cada ventana)
    precision_recall_fscore_lr_weighted = precision_recall_fscore_support(act_test,act_pred_lr,average='weighted')
    precisionX_lr_W.append(precision_recall_fscore_lr_weighted[0])
    recallX_lr_W.append(precision_recall_fscore_lr_weighted[1])
    fscoreX_lr_W.append(precision_recall_fscore_lr_weighted[2])
    
    act = np.array(train_labels)
    act_test = np.array(test_labels)

    #Descomentar de acá al final si se quieren graficar las matrices de confusión para cada tamaño de ventana

    # # Generar la matriz de confusión
    # confusion_matrix_lf = confusion_matrix(act_test, act_pred_lr)

    # # Crear figura y ejes
    # fig, ax1 = plt.subplots()
    # im = ax1.imshow(confusion_matrix_lf, cmap='YlGnBu')  # Cambiar el colormap aquí

    # # Mostrar todas las etiquetas de las clases
    # ax1.set_xticks(np.arange(len(labels)))
    # ax1.set_yticks(np.arange(len(labels)))
    # ax1.set_xticklabels(labels)
    # ax1.set_yticklabels(labels)

    # # Rotar las etiquetas para que sean legibles
    # plt.setp(ax1.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

    # # Mostrar los valores de cada celda
    # for i in range(len(labels)):
    #     for j in range(len(labels)):
    #         text=ax1.text(j, i, confusion_matrix_lf[i, j], ha="center", va="center", color="black")

    # # Configuración del título y etiquetas de los ejes
    # plt.title("Confusion matrix Regresión lineal")
    # plt.xlabel('Predicted label')
    # plt.ylabel('True label')

    # # Agregar la barra de referencia al lado
    # plt.colorbar(im, ax=ax1)

    # # Eliminar las líneas divisorias entre los cuadrados
    # plt.gca().spines['top'].set_visible(False)
    # plt.gca().spines['right'].set_visible(False)
    # plt.gca().spines['bottom'].set_visible(False)
    # plt.gca().spines['left'].set_visible(False)
    # plt.tick_params(axis=u'both', which=u'both',length=0)
    # ax1.grid(False)
    # plt.show()
    

    
    
    
    # standardization
    X_train_data_rf = scaler.transform(X_train)
    X_test_data_rf = scaler.transform(X_test)
    #Entrenar el modelo
    rf = RandomForestClassifier(random_state=21)
    rf.fit(X_train_data_rf, act)
    act_pred_rf = rf.predict(X_test_data_rf)
    accuracyX = accuracy_score(act_test, act_pred_rf)
    accuracyX_rf.append(accuracyX)
    # recallX= recall_score(act_test,act_pred_rf, average = None)
    # recallX_rf.append(recallX)
    precision_recall_fscore_rf = precision_recall_fscore_support(act_test,act_pred_rf,average=None)
    precisionX_rf.append(precision_recall_fscore_rf[0])
    recallX_rf.append(precision_recall_fscore_rf[1])
    fscoreX_rf.append(precision_recall_fscore_rf[2])
    precision_recall_fscore_rf_weighted = precision_recall_fscore_support(act_test,act_pred_rf,average='weighted')
    precisionX_rf_W.append(precision_recall_fscore_rf_weighted[0])
    recallX_rf_W.append(precision_recall_fscore_rf_weighted[1])
    fscoreX_rf_W.append(precision_recall_fscore_rf_weighted[2])


    #Descomentar de acá al final si se quieren graficar las matrices de confusión para cada tamaño de ventana

    # # Generar la matriz de confusión
    # confusion_matrix_rf = confusion_matrix(act_test, act_pred_rf)

    # # Crear figura y ejes
    # fig, ax2 = plt.subplots()
    # im = ax2.imshow(confusion_matrix_rf, cmap='YlGnBu')  # Cambiar el colormap aquí

    # # Mostrar todas las etiquetas de las clases
    # ax2.set_xticks(np.arange(len(labels)))
    # ax2.set_yticks(np.arange(len(labels)))
    # ax2.set_xticklabels(labels)
    # ax2.set_yticklabels(labels)

    # # Rotar las etiquetas para que sean legibles
    # plt.setp(ax2.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

    # # Mostrar los valores de cada celda
    # for i in range(len(labels)):
    #     for j in range(len(labels)):
    #         text=ax2.text(j, i, confusion_matrix_rf[i, j], ha="center", va="center", color="black")

    # # Configuración del título y etiquetas de los ejes
    # plt.title("Confusion matrix Random Forest")
    # plt.xlabel('Predicted label')
    # plt.ylabel('True label')

    # # Agregar la barra de referencia al lado
    # plt.colorbar(im, ax=ax2)

    # # Eliminar las líneas divisorias entre los cuadrados
    # plt.gca().spines['top'].set_visible(False)
    # plt.gca().spines['right'].set_visible(False)
    # plt.gca().spines['bottom'].set_visible(False)
    # plt.gca().spines['left'].set_visible(False)
    # plt.tick_params(axis=u'both', which=u'both',length=0)
    # ax2.grid(False)
    # plt.show()

## Precisión

In [4]:
%matplotlib qt
precision_caminando_rf = [array[0] for array in precisionX_rf]
precision_quieta_rf = [array[1] for array in precisionX_rf]
precision_come_rf = [array[2] for array in precisionX_rf]
precision_caminando_lr = [array[0] for array in precisionX_lr]
precision_quieta_lr = [array[1] for array in precisionX_lr]
precision_come_lr = [array[2] for array in precisionX_lr]

fig, (ax1, ax2) = plt.subplots(2,1, sharex=True, sharey=True)
#axs.plot(windows, recallX_lr, label='Regresión lineal')
ax1.plot(windows, precision_caminando_rf, label='Caminando', color='red')
ax1.plot(windows, precision_quieta_rf, label='Quieta', color='blue')
ax1.plot(windows, precision_come_rf, label='Comiendo', color='gray')


ax1.set_ylabel('Precisión', fontsize=18)
ax1.set_title('Random forest', fontsize=18)


#axs.plot(windows, recallX_lr, label='Regresión lineal')
ax2.plot(windows, precision_caminando_lr, color='red')
ax2.plot(windows, precision_quieta_lr, color='blue')
ax2.plot(windows, precision_come_lr, color='gray')


ax2.set_xlabel('Tamaño de ventanas [N° de mediciones]', fontsize=18)
ax2.set_ylabel('Precisión', fontsize=18)
ax2.set_title('Regresión lineal',fontsize=18)
ax1.tick_params(axis='x', labelsize=12)
ax1.tick_params(axis='y', labelsize=12)
ax2.tick_params(axis='x', labelsize=12)
ax2.tick_params(axis='y', labelsize=12)


#Añade lineas verticales
ylim1 = ax1.get_ylim()
ylim2 = ax2.get_ylim()
ax1.axvline(x=98, color='red', linestyle='--', linewidth=1)
ax1.text(98, ylim1[0] - (ylim1[1] - ylim1[0]) * 0.1, '98', fontsize=12, color='red', ha='center')
ax2.axvline(x=578, color='red', linestyle='--', linewidth=1)
ax2.text(578, ylim2[0] - (ylim2[1] - ylim2[0]) * 0.1, '578', fontsize=12, color='red', ha='center')

current_labels = plt.gca().get_xticks()
new_labels = [label for label in current_labels if label != 600 and label!=1000 and label!=1200 and label!=-200]

# Establecer las nuevas etiquetas del eje x
plt.gca().set_xticks(new_labels)


ax1.legend(loc=(0.6,0), fontsize=13)
plt.show()

## Recall

In [6]:
%matplotlib qt
recall_caminando_rf = [array[0] for array in recallX_rf]
recall_quieta_rf = [array[1] for array in recallX_rf]
recall_come_rf = [array[2] for array in recallX_rf]
recall_caminando_lr = [array[0] for array in recallX_lr]
recall_quieta_lr = [array[1] for array in recallX_lr]
recall_come_lr = [array[2] for array in recallX_lr]

fig, (ax1, ax2) = plt.subplots(2,1, sharex=True, sharey=True)
#axs.plot(windows, recallX_lr, label='Regresión lineal')
ax1.plot(windows, recall_caminando_rf, label='Caminando', color='red')
ax1.plot(windows, recall_quieta_rf, label='Quieta', color='blue')
ax1.plot(windows, recall_come_rf, label='Comiendo', color='gray')


ax1.set_ylabel('Recall', fontsize=18)
ax1.set_title('Random forest', fontsize=18)


#axs.plot(windows, recallX_lr, label='Regresión lineal')
ax2.plot(windows, recall_caminando_lr, color='red')
ax2.plot(windows, recall_quieta_lr, color='blue')
ax2.plot(windows, recall_come_lr, color='gray')


ax2.set_xlabel('Tamaño de ventanas [N° de mediciones]', fontsize=18)
ax2.set_ylabel('Recall', fontsize=18)
ax2.set_title('Regresión lineal',fontsize=18)
ax1.tick_params(axis='x', labelsize=12)
ax1.tick_params(axis='y', labelsize=12)
ax2.tick_params(axis='x', labelsize=12)
ax2.tick_params(axis='y', labelsize=12)


#Añade lineas verticales
ylim1 = ax1.get_ylim()
ylim2 = ax2.get_ylim()
ax1.axvline(x=98, color='red', linestyle='--', linewidth=1)
ax1.text(98, ylim1[0] - (ylim1[1] - ylim1[0]) * 0.1, '98', fontsize=12, color='red', ha='center')
ax2.axvline(x=578, color='red', linestyle='--', linewidth=1)
ax2.text(578, ylim2[0] - (ylim2[1] - ylim2[0]) * 0.1, '578', fontsize=12, color='red', ha='center')

current_labels = plt.gca().get_xticks()
new_labels = [label for label in current_labels if label != 600 and label!=1000 and label!=1200 and label!=-200]

# Establecer las nuevas etiquetas del eje x
plt.gca().set_xticks(new_labels)


ax1.legend(loc=(0.6,0), fontsize=13)
plt.show()

## f1_score

In [7]:
%matplotlib qt
fscore_caminando_rf = [array[0] for array in fscoreX_rf]
fscore_quieta_rf = [array[1] for array in fscoreX_rf]
fscore_come_rf = [array[2] for array in fscoreX_rf]
fscore_caminando_lr = [array[0] for array in fscoreX_lr]
fscore_quieta_lr = [array[1] for array in fscoreX_lr]
fscore_come_lr = [array[2] for array in fscoreX_lr]

fig, (ax1, ax2) = plt.subplots(2,1, sharex=True, sharey=True)
#axs.plot(windows, recallX_lr, label='Regresión lineal')
ax1.plot(windows, fscore_caminando_rf, label='Caminando', color='red')
ax1.plot(windows, fscore_quieta_rf, label='Quieta', color='blue')
ax1.plot(windows, fscore_come_rf, label='Comiendo', color='gray')


ax1.set_ylabel('f$_1$-score', fontsize=18)
ax1.set_title('Random forest', fontsize=18)


#axs.plot(windows, recallX_lr, label='Regresión lineal')
ax2.plot(windows, fscore_caminando_lr, color='red')
ax2.plot(windows, fscore_quieta_lr, color='blue')
ax2.plot(windows, fscore_come_lr, color='gray')


ax2.set_xlabel('Tamaño de ventanas [N° de mediciones]', fontsize=18)
ax2.set_ylabel('f$_1$-score', fontsize=18)
ax2.set_title('Regresión lineal',fontsize=18)
ax1.tick_params(axis='x', labelsize=12)
ax1.tick_params(axis='y', labelsize=12)
ax2.tick_params(axis='x', labelsize=12)
ax2.tick_params(axis='y', labelsize=12)


#Añade lineas verticales
ylim1 = ax1.get_ylim()
ylim2 = ax2.get_ylim()
ax1.axvline(x=98, color='red', linestyle='--', linewidth=1)
ax1.text(98, ylim1[0] - (ylim1[1] - ylim1[0]) * 0.1, '98', fontsize=12, color='red', ha='center')
ax2.axvline(x=578, color='red', linestyle='--', linewidth=1)
ax2.text(578, ylim2[0] - (ylim2[1] - ylim2[0]) * 0.1, '578', fontsize=12, color='red', ha='center')

current_labels = plt.gca().get_xticks()
new_labels = [label for label in current_labels if label != 600 and label!=1000 and label!=1200 and label!=-200]

# Establecer las nuevas etiquetas del eje x
plt.gca().set_xticks(new_labels)


ax1.legend(loc=(0.45,0), fontsize=13)
plt.show()

## Accuracy vs tamaño ventanas

In [8]:
fig, axs = plt.subplots(1, sharex=True, sharey=True)
axs.plot(windows, accuracyX_lr, label='Regresión lineal')
axs.plot(windows, accuracyX_rf, label='Random Forest')

#Añade lineas verticales
ylim = axs.get_ylim()
axs.axvline(x=98, color='red', linestyle='--', linewidth=1)
axs.text(98, ylim[0] - (ylim[1] - ylim[0]) * 0.05, '98', fontsize=12, color='red', ha='center')
axs.axvline(x=578, color='red', linestyle='--', linewidth=1)
axs.text(578, ylim[0] - (ylim[1] - ylim[0]) * 0.05, '578', fontsize=12, color='red', ha='center')

current_labels = plt.gca().get_xticks()
new_labels = [label for label in current_labels if label != 600 and label!=1000 and label!=1200 and label!=-200]
# Establecer las nuevas etiquetas del eje x
plt.gca().set_xticks(new_labels)

axs.set_title('Accuracy de los modelos vs tamaño de ventana', fontsize=20)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel('Tamaño de ventanas [N° de mediciones]', fontsize=20)
plt.ylabel('Accuracy', fontsize=20)
plt.ylim(0.75,1)
plt.legend(fontsize=20)
plt.show()