## Libraries

In [1]:
import os

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from models import neural_binary_model
from storage import save_data, load_data, save_label_encoder, load_label_encoder
from tools import prepare_dataset2
from tools import preprocess_labels

2024-01-10 22:38:15.628327: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-10 22:38:15.628353: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-10 22:38:15.629734: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-10 22:38:15.636970: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## CUDA
Uncomment to force tensorflow to use CPU for training and testing

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

## Parameters

In [3]:
storage_path = "./storage/DS2/"

save_to_file = True
load_from_files = False

save_model_to_file = False
load_model_to_file = False

#Ignored if train and test sets are loaded from disk
n_sample = 300 #[set None to include the whole dataset] 

## Data Loading

In [4]:
if load_from_files:
    try:
        X_train, X_test, y_train, y_test = load_data(storage_path)
        encoder = load_label_encoder(storage_path)
    except FileNotFoundError:
        load_from_files = False

## Data preparation

In [5]:
if not load_from_files:
    # Load your data and labels
    X, y = prepare_dataset2(sample_size=n_sample, keep_all_features=True, separate=True)

    # Convert labels to numpy array
    y, encoder = preprocess_labels(y)

    print(X.shape, y.shape)
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Release memory by deleting the original data
    del X, y

    # Reshape the data
    X_train = X_train.reshape(X_train.shape[0], -1)
    X_test = X_test.reshape(X_test.shape[0], -1)

    if save_to_file:
        # Save the data and labels to files
        save_data(X_train, X_test, y_train, y_test, storage_path)
        save_label_encoder(encoder, storage_path)

        # Load the data and labels back from the files (for mmap_mode)
        X_train, X_test, y_train, y_test = load_data(storage_path)

(212100, 2, 300) (212100, 2)


## Model Fitting

### Neural Binary Model


In [6]:
print(X_train.shape, y_train.shape)

# Create a neural network model
model = neural_binary_model(X_train.shape[1])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

(169680, 600) (169680, 2)


2024-01-10 22:38:29.078963: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2024-01-10 22:38:29.079035: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:129] retrieving CUDA diagnostic information for host: ahmedh-GF63-Thin-10UD
2024-01-10 22:38:29.079041: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:136] hostname: ahmedh-GF63-Thin-10UD
2024-01-10 22:38:29.079247: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:159] libcuda reported version is: 525.147.5
2024-01-10 22:38:29.079263: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:163] kernel reported version is: 525.147.5
2024-01-10 22:38:29.079267: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:241] kernel version seems to match DSO: 525.147.5


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f1a20150fd0>

## Evaluation

In [7]:
# Evaluate the model on the test set
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)

# Convert one-hot encoded labels back to original labels
y_test_labels = encoder.inverse_transform(y_test.argmax(axis=1))
y_pred_labels = encoder.inverse_transform(y_pred_binary.argmax(axis=1))

# Evaluate the performance
accuracy = accuracy_score(y_test_labels, y_pred_labels)
print(f"Accuracy: {accuracy}")


Accuracy: 0.92998585572843
