In [11]:
import h5py
import numpy as np
import pandas as pd

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute


In [2]:
#Leer activity_labels.txt

activity_map = {}
with open('activity_labels.txt', 'r') as f:
    for line in f:
        parts = line.strip().split()
        label_id = int(parts[0])
        label_name = " ".join(parts[1:])
        activity_map[label_id] = label_name

print("Activity Map:", activity_map)


Activity Map: {1: 'WALKING', 2: 'WALKING_UPSTAIRS', 3: 'WALKING_DOWNSTAIRS', 4: 'SITTING', 5: 'STANDING', 6: 'LAYING'}


In [9]:
#Leer archivos HDF5 (train.h5 CON y) y concatenar canales en un solo array

import h5py
import numpy as np

def load_data_from_h5(filename, has_labels=True):

    with h5py.File(filename, 'r') as f:
        print(f"Keys en {filename}:", list(f.keys()))

        if has_labels:
            y_data = f['y'][:]
        else:
            y_data = None

        body_acc_x   = f['body_acc_x'][:]
        body_acc_y   = f['body_acc_y'][:]
        body_acc_z   = f['body_acc_z'][:]
        body_gyro_x  = f['body_gyro_x'][:]
        body_gyro_y  = f['body_gyro_y'][:]
        body_gyro_z  = f['body_gyro_z'][:]
        total_acc_x  = f['total_acc_x'][:]
        total_acc_y  = f['total_acc_y'][:]
        total_acc_z  = f['total_acc_z'][:]

        # Concatenar por columnas
        X_data = np.concatenate([
            body_acc_x, body_acc_y, body_acc_z,
            body_gyro_x, body_gyro_y, body_gyro_z,
            total_acc_x, total_acc_y, total_acc_z
        ], axis=1)

    return X_data, y_data

X_train, y_train = load_data_from_h5('train.h5', has_labels=True)
print("Shape de X_train:", X_train.shape)
print("Shape de y_train:", y_train.shape)

X_test, _ = load_data_from_h5('test.h5', has_labels=False)
print("Shape de X_test:", X_test.shape)


Keys en train.h5: ['body_acc_x', 'body_acc_y', 'body_acc_z', 'body_gyro_x', 'body_gyro_y', 'body_gyro_z', 'total_acc_x', 'total_acc_y', 'total_acc_z', 'y']
Shape de X_train: (7352, 1152)
Shape de y_train: (7352,)
Keys en test.h5: ['body_acc_x', 'body_acc_y', 'body_acc_z', 'body_gyro_x', 'body_gyro_y', 'body_gyro_z', 'total_acc_x', 'total_acc_y', 'total_acc_z']


KeyError: "Unable to synchronously open object (object 'y' doesn't exist)"

In [4]:
# Preparar datos de TRAIN para TSFresh
import pandas as pd
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute

def reshape_for_tsfresh(X, y=None):

    n_muestras, n_features = X.shape
    df_list = []
    for i in range(n_muestras):
        tmp = pd.DataFrame({
            'id': np.repeat(i, n_features),
            'time': np.arange(n_features),
            'value': X[i, :]
        })
        df_list.append(tmp)
    df_concat = pd.concat(df_list, ignore_index=True)

    if y is not None:
        return df_concat, pd.Series(y, name='target')
    else:
        return df_concat

df_train, y_train_series = reshape_for_tsfresh(X_train, y_train)
print("df_train shape:", df_train.shape)
print("y_train_series shape:", y_train_series.shape)


df_train shape: (8469504, 3)
y_train_series shape: (7352,)


In [5]:
# Extracción de características
extracted_features_train = extract_features(
    df_train,
    column_id='id',
    column_sort='time',
    column_value='value'
)

impute(extracted_features_train)

# Seleccionar características relevantes basadas en y_train_series
selected_features_train = select_features(extracted_features_train, y_train_series)

print("extracted_features_train.shape =", extracted_features_train.shape)
print("selected_features_train.shape   =", selected_features_train.shape)


Feature Extraction: 100%|██████████| 30/30 [23:57<00:00, 47.92s/it]  


extracted_features_train.shape = (7352, 783)
selected_features_train.shape   = (7352, 645)


In [6]:
# Preprocesar X_test con las MISMAS features
df_test = reshape_for_tsfresh(X_test, y=None)  # sin etiquetas
print("df_test shape:", df_test.shape)

extracted_features_test = extract_features(
    df_test,
    column_id='id',
    column_sort='time',
    column_value='value'
)

impute(extracted_features_test)

selected_features_test = extracted_features_test[selected_features_train.columns]
print("extracted_features_test.shape =", extracted_features_test.shape)
print("selected_features_test.shape   =", selected_features_test.shape)


df_test shape: (3394944, 3)


Feature Extraction: 100%|██████████| 30/30 [09:55<00:00, 19.83s/it]  


extracted_features_test.shape = (2947, 783)
selected_features_test.shape   = (2947, 645)


In [7]:

# ENTRENAR MODELO SVM CON TRAIN Y PREDICCIONES EN TEST
from sklearn.svm import SVC

svm_model = SVC(kernel='rbf', C=1000.0, gamma='scale', random_state=42)
svm_model.fit(selected_features_train, y_train_series)

y_pred = svm_model.predict(selected_features_test)

print("Predicciones en test (primeras 10):", y_pred[:10])


Predicciones en test (primeras 10): [5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]


In [8]:
# Generar submission CSV
sample_ids = range(1, len(X_test) + 1)

predicted_labels = y_pred.astype(int)  # Convertir a enteros

# 3) Construir el DataFrame con encabezados requeridos
submission_df = pd.DataFrame({
    'ID': sample_ids,
    'Activity': predicted_labels
})

submission_df.to_csv('submission5.csv', index=False)

print("Archivo 'submission.csv' generado correctamente.")


Archivo 'submission.csv' generado correctamente.
