In [19]:
import pandas as pd

# --- Paso 1: Cargar los archivos de características de cada sujeto ---
path_processed = '../data/processed/'

# Asumiendo que tus archivos se llaman así
df_sujeto1 = pd.read_parquet(path_processed + 'final_feature_data.parquet')
df_sujeto2 = pd.read_parquet(path_processed + 'final_feature_data_sujeto_03.parquet')
#df_sujeto3 = pd.read_parquet(path_processed + 'final_feature_data.parquet')


print(f"Muestras del Sujeto 1: {df_sujeto1.shape}")
print(f"Muestras del Sujeto 2: {df_sujeto2.shape}")

# --- Paso 2: Crear una lista con los DataFrames ---
lista_de_dataframes = [df_sujeto1, df_sujeto2]

# --- Paso 3: Fusionar (concatenar) la lista de DataFrames ---
# ignore_index=True es clave para crear un nuevo índice limpio de 0 hasta el final
df_combinado = pd.concat(lista_de_dataframes, ignore_index=True)

# --- Paso 4: Verificar el resultado ---
print("\n¡DataFrames fusionados exitosamente!")
print(f"Total de muestras combinadas: {df_combinado.shape}")

# Opcional: Revisa que las clases de ambos sujetos estén presentes
print("\nConteo de clases en el DataFrame combinado:")
print(df_combinado['Clase'].value_counts())

Muestras del Sujeto 1: (104804, 17)
Muestras del Sujeto 2: (118825, 17)

¡DataFrames fusionados exitosamente!
Total de muestras combinadas: (223629, 17)

Conteo de clases en el DataFrame combinado:
Clase
basal        61439
reposo       39318
adelante     30718
atras        30718
derecha      30718
izquierda    30718
Name: count, dtype: int64


In [20]:
#Se observa mucho sesgo hacía el estado basal, procedere entonces a cortar los datos
df_combinado[df_combinado['Clase']=='basal']

Unnamed: 0,RMS_EXG Channel 0,WL_EXG Channel 0,RMS_EXG Channel 1,WL_EXG Channel 1,RMS_EXG Channel 2,WL_EXG Channel 2,RMS_EXG Channel 3,WL_EXG Channel 3,RMS_EXG Channel 4,WL_EXG Channel 4,RMS_EXG Channel 5,WL_EXG Channel 5,RMS_EXG Channel 6,WL_EXG Channel 6,RMS_EXG Channel 7,WL_EXG Channel 7,Clase
0,0.872088,0.000024,1.280368,0.005878,2.132244,0.000353,1.998439,0.001438,1.576958,0.039054,2.803140,0.336786,2.207718,0.017805,1.643290,0.003151,basal
1,0.875658,0.010704,1.296320,0.056102,2.129443,0.008945,2.000426,0.008114,1.582736,0.041188,2.780527,0.568515,2.203041,0.022946,1.643290,0.004727,basal
2,0.877819,0.012219,1.308854,0.074208,2.126982,0.013190,2.002133,0.010964,1.576158,0.079162,2.687670,0.915731,2.203554,0.034384,1.644015,0.007624,basal
3,0.876508,0.025244,1.305130,0.129838,2.125780,0.014571,2.001433,0.019581,1.571421,0.083091,2.648316,1.012881,2.204254,0.036342,1.644218,0.008781,basal
4,0.873829,0.036180,1.294588,0.179433,2.124787,0.015717,1.999626,0.027641,1.576304,0.131261,2.706917,1.510910,2.201815,0.053817,1.643536,0.013694,basal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135518,1.979015,30.085650,1.762115,5.220856,1.129650,18.468762,1.233465,3.499096,1.640961,20.842917,1.933590,26.899315,0.493070,4.955789,1.718296,5.860882,basal
135519,1.975596,29.876495,1.763256,5.186828,1.131367,18.434639,1.233859,3.507753,1.640014,20.882112,1.938584,26.851671,0.496603,4.923560,1.718640,5.844395,basal
135520,1.971761,29.939241,1.764469,5.196744,1.134022,18.417066,1.234145,3.485460,1.638608,20.821910,1.944106,26.904127,0.499981,4.932387,1.718881,5.870016,basal
135521,1.968472,29.820284,1.765545,5.166294,1.136968,18.441464,1.234353,3.504771,1.637179,20.881985,1.949411,26.787185,0.503347,4.901088,1.719343,5.853046,basal


In [21]:
# Nueva partición: entrenamiento, validación y test
from sklearn.model_selection import train_test_split
df_basal_1,df_basal_2=train_test_split(df_combinado[df_combinado['Clase']=='basal'],test_size=0.5)
#X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.5, random_state=42

In [24]:
df_combinado_reducido=df_combinado[df_combinado['Clase']!='basal']
new_df_combinado=pd.concat([df_basal_1,df_combinado_reducido],ignore_index=True)

In [25]:
print(new_df_combinado['Clase'].value_counts())

Clase
reposo       39318
basal        30719
adelante     30718
atras        30718
derecha      30718
izquierda    30718
Name: count, dtype: int64


In [26]:
# Asumo que la variable 'id_archivo' de la celda de carga de datos sigue disponible.
# Su valor debería ser algo como "_sujeto_02"

# 1. Construir el nombre del archivo final dinámicamente
path_final = f'../data/processed/final_feature_datacombinado_0_3_basal_reduced.parquet'

print(f"Guardando el DataFrame de características en: {path_final}")
# Asegúrate de usar el nombre correcto de tu DataFrame final y limpio
print(f"Forma del DataFrame a guardar: {new_df_combinado.shape}")

# 2. Guardar en formato Parquet
new_df_combinado.to_parquet(path_final)

print(f"\n¡Archivo de características para el sujeto 0 y 3 guardado exitosamente! ✅")

Guardando el DataFrame de características en: ../data/processed/final_feature_datacombinado_0_3_basal_reduced.parquet
Forma del DataFrame a guardar: (192909, 17)

¡Archivo de características para el sujeto 0 y 3 guardado exitosamente! ✅
