In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
import tensorflow as tf
from tensorflow.keras import layers, models
import os

In [7]:
#ARQUIVOS
ATTRIBUTES_FILE = './dataset/attributes.csv'
LABELS_FILE = './dataset/label.csv'          

CLASS_NAMES = {
    0: 'Normal',
    1: 'Charge',
    2: 'Discharge',
    3: 'Friction',
    4: 'Charge Discharge',
    5: 'Charge Friction',
    6: 'Discharge Friction',
    7: 'Charge Discharge Friction'
}


In [8]:
# Configuração para os dados aleatorios se repetirem
np.random.seed(42)
tf.random.set_seed(42)


**carrega_dados**

X.shape[0]: número de linhas do array numPy com todos os dados de entrada. nesse caso, seria a quantidade de amostras disponíveis.

X.shape[1]: número de colunas do array numPy com todos os dados de entrada. nesse caso, seria a quantidade de colunas do csv de amostras. ou seja, quantas variações de tempo uma única amostra possui. 

X: 
| Amostra | feature_1 | feature_2 | ... | feature_201  |
|:---:|:---:|:---:|:---:|:---:|
| amostra 1 | valor_pressão_f1 | valor_pressão_f2 | ... | valor_pressão_f201 |
| amostra 2 | valor_pressão_f1 | valor_pressão_f2 | ... | valor_pressão_f201 |
| amostra 3 | valor_pressão_f1 | valor_pressão_f2 | ... | valor_pressão_f201 |
|  ...      |       ...        |         ...      | ... |           ...      |

tf.keras.utils.to_categorical(y, num_classes=num_classes): converte rótulos de classes inteiros para formato one-hot encoded. é armazenado em uma array numPy. seu tamanho é linhas_amostras x nro_classes. 

y_categorical:
| Amostra | Neurônio 1 (Normal) | Neurônio 2 (Charge) | ... | Neurônio 8 (All Faults) |
|:---:|:---:|:---:|:---:|:---:|
| Linha 1 | 1 | 0 | ... | 0 |
| Linha 2 | 0 | 0 | ... | 1 |
| Linha 3 | 0 | 0 | ... | 0 |
|  ...    | 0 | 0 | ... | 0 |

In [None]:
def carrega_dados(attr_caminho, lbl_caminho):

    print(f"Lendo atributos: {attr_caminho}")
    print(f"Lendo labels: {lbl_caminho}")
    
    X_df = pd.read_csv(attr_caminho)
    y_df = pd.read_csv(lbl_caminho)

    # converte para vetor
    X = X_df.values
    y = y_df.values.flatten() 

    num_classes = len(np.unique(y))
    num_features = X.shape[1]
    
    print(f"Dataset carregado com sucesso:")
    print(f" - Amostras: {X.shape[0]}")
    print(f" - Features (Timestamps): {num_features}")
    print(f" - Classes únicas encontradas: {num_classes} {np.unique(y)}")

    # One-hot para redes neurais
    y_categorical = tf.keras.utils.to_categorical(y, num_classes=num_classes)
    
    return X, y_categorical, y, num_features, num_classes


**constroi_mlp**



In [5]:
# MODELOS (Tópico 3.4)

def constroi_mlp(config_name, shape, num_classes):
    model = models.Sequential()
    model.add(layers.InputLayer(shape=shape))
    
    if config_name == '4N':
        model.add(layers.Dense(4, activation='relu'))
    elif config_name == '8N':
        model.add(layers.Dense(8, activation='relu'))
    elif config_name == '16N':
        model.add(layers.Dense(16, activation='relu'))
    elif config_name == '32N':
        model.add(layers.Dense(32, activation='relu'))
    elif config_name == '16-8N':
        model.add(layers.Dense(16, activation='relu'))
        model.add(layers.Dense(8, activation='relu'))
        
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model


In [6]:
def constroi_cnn(config_name, shape, num_classes):
    model = models.Sequential()
    model.add(layers.InputLayer(shape=shape))
    
    if config_name == 'M1':
        model.add(layers.Conv1D(filters=1, kernel_size=8, padding='same'))
        model.add(layers.AveragePooling1D(pool_size=4))
        model.add(layers.Flatten())
        model.add(layers.Dense(16, activation='relu'))
        
    elif config_name == 'M2':
        model.add(layers.Conv1D(filters=2, kernel_size=8, padding='same'))
        model.add(layers.AveragePooling1D(pool_size=4))
        model.add(layers.Flatten())
        model.add(layers.Dense(16, activation='relu'))
        
    elif config_name == 'M3':
        model.add(layers.Conv1D(filters=1, kernel_size=16, padding='same'))
        model.add(layers.AveragePooling1D(pool_size=4))
        model.add(layers.Flatten())
        model.add(layers.Dense(16, activation='relu'))
        
    elif config_name == 'M4':
        model.add(layers.Conv1D(filters=1, kernel_size=8, padding='same'))
        model.add(layers.AveragePooling1D(pool_size=8))
        model.add(layers.Flatten())
        model.add(layers.Dense(16, activation='relu'))
        
    elif config_name == 'M5':
        model.add(layers.Conv1D(filters=1, kernel_size=8, padding='same'))
        model.add(layers.AveragePooling1D(pool_size=4))
        model.add(layers.Conv1D(filters=1, kernel_size=8, padding='same'))
        model.add(layers.AveragePooling1D(pool_size=2))
        model.add(layers.Flatten())
        model.add(layers.Dense(16, activation='relu'))

    model.add(layers.Dense(num_classes, activation='softmax'))
    return model


In [None]:
# EXPERIMENTOS
    
X, y_cat, y_integers, n_features, n_classes = carrega_dados(ATTRIBUTES_FILE, LABELS_FILE)

# divisao treino/teste (10% Teste)
X_train, X_test, y_train, y_test, y_train_int, y_test_int = train_test_split(
    X, y_cat, y_integers, test_size=0.10, stratify=y_integers, random_state=42
)

# CNN: (samples, timesteps, features=1)
X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

mlp_shape = (n_features,)
cnn_shape = (n_features, 1)

EPOCHS = 50 
BATCH_SIZE = 32
K_FOLDS = 10

resultados = []
skf = StratifiedKFold(n_splits=K_FOLDS, shuffle=True, random_state=42)

print(f"\nIniciando Validação Cruzada (K={K_FOLDS})...")



Lendo atributos: ./dataset/attributes.csv
Lendo labels: ./dataset/label.csv
Dataset carregado com sucesso:
 - Amostras: 8000
 - Features (Timestamps): 201
 - Classes únicas encontradas: 8 [0 1 2 3 4 5 6 7]

Iniciando Validação Cruzada (K=10)...


In [None]:
# MLP
confiiguracoes_mlp = ['4N', '8N', '16N', '32N', '16-8N']
for cfg in confiiguracoes_mlp:
    print(f"Avaliando MLP: {cfg}")
    rel_acuraacia = []
    for train_idx, val_idx in skf.split(X_train, y_train_int):
        X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
        y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]

        model = constroi_mlp(cfg, mlp_shape, n_classes)
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        model.fit(X_fold_train, y_fold_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)
        _, acc = model.evaluate(X_fold_val, y_fold_val, verbose=0)
        rel_acuraacia.append(acc)
        
    mean_acc = np.mean(rel_acuraacia)
    std_acc = np.std(rel_acuraacia)
    resultados.append({'Model': 'MLP', 'Config': cfg, 'Val_Acc_Mean': mean_acc, 'Val_Acc_Std': std_acc})
    print(f"  -> Acc Média: {mean_acc:.4f}")


Avaliando MLP: 4N
  -> Acc Média: 0.9921
Avaliando MLP: 8N
  -> Acc Média: 0.9958
Avaliando MLP: 16N
  -> Acc Média: 0.9950
Avaliando MLP: 32N
  -> Acc Média: 0.9942
Avaliando MLP: 16-8N
  -> Acc Média: 0.9953


In [None]:

# CNN
confiiguracoes_cnn = ['M1', 'M2', 'M3', 'M4', 'M5']
for cfg in confiiguracoes_cnn:
    print(f"Avaliando CNN: {cfg}")
    rel_acuraacia = []
    for train_idx, val_idx in skf.split(X_train, y_train_int):
        X_fold_train, X_fold_val = X_train_cnn[train_idx], X_train_cnn[val_idx]
        y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]

        model = constroi_cnn(cfg, cnn_shape, n_classes)
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        model.fit(X_fold_train, y_fold_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)
        _, acc = model.evaluate(X_fold_val, y_fold_val, verbose=0)
        rel_acuraacia.append(acc)
        
    mean_acc = np.mean(rel_acuraacia)
    std_acc = np.std(rel_acuraacia)
    resultados.append({'Model': 'CNN', 'Config': cfg, 'Val_Acc_Mean': mean_acc, 'Val_Acc_Std': std_acc})
    print(f"  -> Acc Média: {mean_acc:.4f}")


Avaliando CNN: M1
  -> Acc Média: 0.9958
Avaliando CNN: M2
  -> Acc Média: 0.9956
Avaliando CNN: M3
  -> Acc Média: 0.9968
Avaliando CNN: M4
  -> Acc Média: 0.9936
Avaliando CNN: M5
  -> Acc Média: 0.9933


In [None]:

# KNN
k_values = [1, 2, 5, 10, 20]
print("\nAvaliando KNN...")
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    rel_acuraacia = []
    for train_idx, val_idx in skf.split(X_train, y_train_int):
        X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
        y_fold_train_labels = y_train_int[train_idx]
        y_fold_val_labels = y_train_int[val_idx]
        knn.fit(X_fold_train, y_fold_train_labels)
        acc = knn.score(X_fold_val, y_fold_val_labels)
        rel_acuraacia.append(acc)
    
    mean_acc = np.mean(rel_acuraacia)
    std_acc = np.std(rel_acuraacia)
    resultados.append({'Model': 'KNN', 'Config': f'K={k}', 'Val_Acc_Mean': mean_acc, 'Val_Acc_Std': std_acc})
    print(f"  -> KNN K={k}: {mean_acc:.4f}")



Avaliando KNN...
  -> KNN K=1: 0.8826
  -> KNN K=2: 0.8553
  -> KNN K=5: 0.8862
  -> KNN K=10: 0.8753
  -> KNN K=20: 0.8597


In [None]:

# Resultado Final
resultados_df = pd.DataFrame(resultados)
melhor_rodada = resultados_df.loc[resultados_df['Val_Acc_Mean'].idxmax()]

print("\n" + "="*40)
print(f"MELHOR MODELO: {melhor_rodada['Model']} ({melhor_rodada['Config']})")
print("="*40)

final_acc = 0
if melhor_rodada['Model'] == 'MLP':
    modelo_final = constroi_mlp(melhor_rodada['Config'], mlp_shape, n_classes)
    modelo_final.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    modelo_final.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)
    loss, final_acc = modelo_final.evaluate(X_test, y_test, verbose=0)
elif melhor_rodada['Model'] == 'CNN':
    modelo_final = constroi_cnn(melhor_rodada['Config'], cnn_shape, n_classes)
    modelo_final.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    modelo_final.fit(X_train_cnn, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)
    loss, final_acc = modelo_final.evaluate(X_test_cnn, y_test, verbose=0)
elif melhor_rodada['Model'] == 'KNN':
    k = int(melhor_rodada['Config'].split('=')[1])
    modelo_final = KNeighborsClassifier(n_neighbors=k)
    modelo_final.fit(X_train, y_train_int)
    final_acc = modelo_final.score(X_test, y_test_int)

print(f"Acurácia Final no Teste (10%): {final_acc:.4f}")
print("\nResumo Completo:")
print(resultados_df[['Model', 'Config', 'Val_Acc_Mean', 'Val_Acc_Std']])



MELHOR MODELO: CNN (M3)
Acurácia Final no Teste (10%): 0.9937

Resumo Completo:
   Model Config  Val_Acc_Mean  Val_Acc_Std
0    MLP     4N      0.992083     0.002488
1    MLP     8N      0.995833     0.002778
2    MLP    16N      0.995000     0.003298
3    MLP    32N      0.994167     0.004383
4    MLP  16-8N      0.995278     0.003632
5    CNN     M1      0.995833     0.003287
6    CNN     M2      0.995556     0.002764
7    CNN     M3      0.996806     0.004028
8    CNN     M4      0.993611     0.002860
9    CNN     M5      0.993333     0.004640
10   KNN    K=1      0.882639     0.017070
11   KNN    K=2      0.855278     0.011782
12   KNN    K=5      0.886250     0.013909
13   KNN   K=10      0.875278     0.007212
14   KNN   K=20      0.859722     0.010282
