## Libraries

In [1]:
import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

# Read Data (extract_features)

In [2]:
path = "./processed-data/pyTS/rocket_full_trainset.csv"
selected_features = pd.read_csv(path, header=None)
selected_features = selected_features.values

In [3]:
with h5py.File('./human-activity/train.h5', 'r') as f:
  y = f['y'][()]

target = pd.DataFrame(y, columns=['target'])
target  = target["target"]
#target = target.values.reshape(-1,1)

In [4]:
seed = 42
X_train, X_test, y_train, y_test = train_test_split(selected_features, target, test_size=0.2, random_state=seed)

# MODEL

In [5]:
from models.dt import *

In [None]:
import time
from sklearn.decomposition import PCA

# Crear un archivo CSV y escribir el encabezado
with open('accuracy_results_dt.csv', 'w') as f:
    f.write('n_components,accuracy,time\n')

# Variar el número de componentes desde 10 hasta 100 de 10 en 10
for n_components in range(10, 11, 10):
    start_time = time.time()
    
    # Aplicar PCA
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)

    # Entrenar el árbol de decisión
    dt = DT(X_train_pca, y_train)
    dt.create_DT()

    # Predecir con el árbol de decisión
    predictions = np.array([dt.predict(x) for x in X_test_pca])

    # Calcular el accuracy
    accuracy = sum(predictions == y_test) / len(y_test) * 100

    # Calcular el tiempo de ejecución
    end_time = time.time()
    elapsed_time = end_time - start_time

    # Guardar los resultados en el archivo CSV
    with open('accuracy_results_dt.csv', 'a') as f:
        f.write(f'{n_components},{accuracy:.2f},{elapsed_time:.2f}\n')

    print(f'Componentes: {n_components}, Accuracy: {accuracy:.2f}%, Tiempo: {elapsed_time:.2f} segundos')

print("Resultados guardados en 'accuracy_results_dt.csv'")


In [7]:
n_components = 20

pca = PCA(n_components=n_components)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Entrenar el árbol de decisión
dt = DT(X_train_pca, y_train)
dt.create_DT()

# Predecir con el árbol de decisión
predictions = np.array([dt.predict(x) for x in X_test_pca])

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


def matriz_confusion(y_pred, Tipo, y_test):
    matrix = confusion_matrix(y_test, y_pred)
    print(matrix)
    etiquetas = ["WALKING", 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING', 'STANDING', 'LAYING']
    f2 = pd.DataFrame(matrix, index=etiquetas, columns=etiquetas)
    
    plt.figure(figsize=(10, 7))  
    ax = sns.heatmap(f2, annot=True, cbar=True, cmap="YlGnBu", fmt='d', linewidths=.5, linecolor='black')  # Mostrar los números como enteros y añadir líneas de separación
    
    plt.title("Confusion Matrix " + Tipo)
    plt.xlabel("Predicted")
    plt.ylabel("Real")
    plt.tight_layout()
    plt.show()

    
matriz_confusion(predictions, "DT", y_test)
