In [1]:
import numpy as np
import pandas as pd
import os
import random
from tensorflow.keras import models, layers, Input

2025-03-25 15:19:32.539022: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def set_seed(seed: int):
    random.seed(seed) # Python
    np.random.seed(seed)  # Numpy, é o gerador utilizado pelo sklearn
    os.environ["PYTHONHASHSEED"] = str(seed)  # sistema operativo

set_seed(25)

In [7]:
# Load datasets
def load_data(file_path):
    data = pd.read_csv(file_path)
    features = data.drop(columns=['targetLabel']).values  
    labels = data['targetLabel'].values
    return features, labels

train_data, train_targets = load_data('../../datasets/trainSmall.csv')
test_data, test_targets = load_data('../../datasets/testSmall.csv')
print("Done reading!")

Done reading!


In [8]:
print("Train targets shape:", train_targets.shape)
print("Test targets shape:", test_targets.shape)

Train targets shape: (3500,)
Test targets shape: (1001,)


In [9]:
# Build the model
def build_model():
    model = models.Sequential()
    model.add(Input(shape=(train_data.shape[1],)))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(64, activation='sigmoid'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(32, activation='sigmoid'))
    model.add(layers.Dense(1)) 
    model.compile(optimizer='rmsprop', loss='mse', metrics=['accuracy'])
    return model 

model = build_model()

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               1280128   
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                                 
Total params: 1,290,497
Trainable params: 1,290,497
Non-

2025-03-25 15:23:28.225434: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-03-25 15:23:28.231090: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [10]:
model.fit(train_data, train_targets, epochs=10, batch_size=128, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fd34afb1180>

In [11]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_data, test_targets)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")


Test Loss: 0.03625244274735451, Test Accuracy: 0.9500499367713928


In [13]:
# Cross-validation 
# codigo adaptado do codigo do stor

k = 5
num_val_samples = len(train_data) // k
num_epochs = 10
all_scores = []

for i in range(k):
    print('Processing fold #', i)
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]
    partial_train_data = np.concatenate([train_data[:i * num_val_samples], train_data[(i + 1) * num_val_samples:]], axis=0)
    partial_train_targets = np.concatenate([train_targets[:i * num_val_samples], train_targets[(i + 1) * num_val_samples:]], axis=0)
    
    model = build_model()
    model.fit(partial_train_data, partial_train_targets, epochs=num_epochs, batch_size=1, verbose=0)
    val_loss, val_accuracy = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_accuracy)

print(f"All Accuracy scores: {all_scores}")
print(f"Mean Accuracy: {np.mean(all_scores)}")

Processing fold # 0
Processing fold # 1
Processing fold # 2
Processing fold # 3
Processing fold # 4
All Accuracy scores: [0.9557142853736877, 0.9571428298950195, 0.9257143139839172, 0.9385714530944824, 0.9557142853736877]
Mean Accuracy: 0.946571433544159


In [15]:
# Load validation dataset
val_data, val_targets = load_data('../../datasets/validationSmall.csv')
print("Done reading validation data!")

# Get predictions
val_predictions = model.predict(val_data)
val_predictions = (val_predictions > 0.5).astype(int)  # Convert probabilities to binary predictions (0 or 1)

# Save results with header
output_data = np.column_stack((val_targets, val_predictions.flatten()))
np.savetxt('validations_predictions_manual_nn.csv', output_data, delimiter=',', header="real,predicted", comments='')

# Print validation accuracy
val_loss, val_accuracy = model.evaluate(val_data, val_targets, verbose=0)
print(f"Validation Accuracy: {val_accuracy}")

Done reading validation data!
Validation Accuracy: 0.9559999704360962


Testar varias configurações, codigo do stor adaptado


In [16]:
from tensorflow.keras import Sequential

def create_model(hidden_layers=[128, 64, 32], dropout_rate=0.3, activation='relu', optimizer='adam'):
    model = Sequential()
    model.add(layers.Dense(hidden_layers[0], activation=activation, input_shape=(train_data.shape[1],)))
    
    for units in hidden_layers[1:]:
        model.add(layers.Dropout(dropout_rate))
        model.add(layers.Dense(units, activation=activation))
    
    model.add(layers.Dense(1, activation='sigmoid'))  # Binary classification output layer
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [17]:
# Experiment with different architectures
topologies = [
    [128, 64, 32],  # Baseline
    [256, 128, 64],  # Deeper model
    [512, 256, 128, 64],  # Very deep model
    [128, 64],  # Simpler model
]

for topology in topologies:
    print(f"\nTesting topology: {topology}")
    model = create_model(hidden_layers=topology)
    model.fit(train_data, train_targets, epochs=5, batch_size=128, validation_data=(test_data, test_targets), verbose=1)
    test_loss, test_acc = model.evaluate(test_data, test_targets)
    print(f"Test Accuracy: {test_acc:.4f}")


Testing topology: [128, 64, 32]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.9510

Testing topology: [256, 128, 64]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.9550

Testing topology: [512, 256, 128, 64]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.9510

Testing topology: [128, 64]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.9520


In [19]:
optimizers = ['adam', 'sgd', 'rmsprop']
for opt in optimizers:
    print(f"\nTesting optimizer: {opt}")
    model = create_model(optimizer=opt)
    model.fit(train_data, train_targets, epochs=10, batch_size=64, validation_data=(test_data, test_targets), verbose=1)
    test_loss, test_acc = model.evaluate(test_data, test_targets)
    print(f"Test Accuracy with {opt}: {test_acc:.4f}")


Testing optimizer: adam
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy with adam: 0.9520

Testing optimizer: sgd
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy with sgd: 0.9271

Testing optimizer: rmsprop
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy with rmsprop: 0.9640


In [20]:
batch_sizes = [16, 32, 128]
epochs_list = [2, 5, 10]

for batch in batch_sizes:
    for epochs in epochs_list:
        print(f"\nTesting batch_size={batch}, epochs={epochs}")
        model = create_model()
        model.fit(train_data, train_targets, epochs=epochs, batch_size=batch, validation_data=(test_data, test_targets), verbose=1)
        test_loss, test_acc = model.evaluate(test_data, test_targets)
        print(f"Test Accuracy: {test_acc:.4f}")


Testing batch_size=16, epochs=2
Epoch 1/2
Epoch 2/2
Test Accuracy: 0.9500

Testing batch_size=16, epochs=5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.9530

Testing batch_size=16, epochs=10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.9560

Testing batch_size=32, epochs=2
Epoch 1/2
Epoch 2/2
Test Accuracy: 0.9491

Testing batch_size=32, epochs=5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.9500

Testing batch_size=32, epochs=10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.9540

Testing batch_size=128, epochs=2
Epoch 1/2
Epoch 2/2
Test Accuracy: 0.9530

Testing batch_size=128, epochs=5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.9530

Testing batch_size=128, epochs=10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
E