In [1]:
import numpy as np
from sklearn import preprocessing

raw_csv_data = np.loadtxt("Audiobooks_data.csv", delimiter=",")
unscaled_input_all = raw_csv_data[:, 1:-1]
targets_all = raw_csv_data[:, -1]

## Balance the dataset

In [2]:
num_one_targets = int(sum(targets_all))
zero_targets_counter = 0
indices_to_remove = []

for i in range(targets_all.shape[0]):
    if targets_all[i] == 0:
        zero_targets_counter += 1
        if zero_targets_counter > num_one_targets:
            indices_to_remove.append(i)

unscaled_inputs_equal_priors = np.delete(unscaled_input_all, indices_to_remove, axis=0)
targets_equals_priors = np.delete(targets_all, indices_to_remove, axis=0)

## Standarize the inputs

In [3]:
scaled_inputs = preprocessing.scale(unscaled_inputs_equal_priors)

## Shuffle the data

In [4]:
shuffled_indeces = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indeces)

shuffled_inputs = scaled_inputs[shuffled_indeces]
shuffled_targets = targets_equals_priors[shuffled_indeces]

## Split the dataset into train, validation, and test

In [5]:
samples_count = shuffled_inputs.shape[0]

train_samples_count = int(samples_count*0.8)
validation_samples_count = int(0.1*samples_count)
test_samples_count = samples_count - validation_samples_count - train_samples_count

train_inputs = shuffled_inputs[:train_samples_count]
train_targets = shuffled_targets[:train_samples_count]

validation_inputs = shuffled_inputs[train_samples_count:train_samples_count + validation_samples_count]
validation_targets = shuffled_targets[train_samples_count:train_samples_count + validation_samples_count]

test_inputs = shuffled_inputs[train_samples_count + validation_samples_count:]
test_targets = shuffled_inputs[train_samples_count + validation_samples_count:]

## Save the three datasets in *npz

In [6]:
np.savez("Audiobooks_data_train", inputs=train_inputs, targets=train_targets)
np.savez("Audiobooks_data_validation", inputs=validation_inputs, targets=validation_targets)
np.savez("Audiobooks_data_test", inputs=test_inputs, targets=test_targets)

In [7]:
import numpy as np
import tensorflow as tf

In [8]:
npz = np.load("Audiobooks_data_train.npz")

train_inputs = npz["inputs"].astype(np.float)
train_targets = npz["targets"].astype(np.int)

npz = np.load("Audiobooks_data_validation.npz")
validation_inputs = npz["inputs"].astype(np.float)
validation_targets  = npz["targets"].astype(np.int)

npz = np.load("Audiobooks_data_test.npz")
test_inputs = npz["inputs"].astype(np.float)
test_targets  = npz["targets"].astype(np.int)

## Model

In [11]:
input_size = 10
output_size = 2
hidden_layer_size = 50

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation="relu"),
    tf.keras.layers.Dense(hidden_layer_size, activation="relu"),
    tf.keras.layers.Dense(output_size, activation="softmax"), #since we know it is a classifier
])

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

BATCH_SIZE = 100
MAX_EPOCHS = 100
early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

model.fit(train_inputs, 
          train_targets, 
          batch_size=BATCH_SIZE,
          epochs=MAX_EPOCHS,
            validation_data=(validation_inputs, validation_targets),
            verbose=2,
          callbacks = [early_stopping]
         )


Epoch 1/100
36/36 - 0s - loss: 0.5745 - accuracy: 0.7737 - val_loss: 0.4425 - val_accuracy: 0.8635
Epoch 2/100
36/36 - 0s - loss: 0.3853 - accuracy: 0.8704 - val_loss: 0.3386 - val_accuracy: 0.8747
Epoch 3/100
36/36 - 0s - loss: 0.3277 - accuracy: 0.8793 - val_loss: 0.3079 - val_accuracy: 0.8837
Epoch 4/100
36/36 - 0s - loss: 0.3053 - accuracy: 0.8868 - val_loss: 0.2893 - val_accuracy: 0.8993
Epoch 5/100
36/36 - 0s - loss: 0.2907 - accuracy: 0.8910 - val_loss: 0.2795 - val_accuracy: 0.8971
Epoch 6/100
36/36 - 0s - loss: 0.2796 - accuracy: 0.8944 - val_loss: 0.2862 - val_accuracy: 0.8993
Epoch 7/100
36/36 - 0s - loss: 0.2749 - accuracy: 0.8972 - val_loss: 0.2735 - val_accuracy: 0.8993
Epoch 8/100
36/36 - 0s - loss: 0.2681 - accuracy: 0.8994 - val_loss: 0.2650 - val_accuracy: 0.8993
Epoch 9/100
36/36 - 0s - loss: 0.2623 - accuracy: 0.8986 - val_loss: 0.2623 - val_accuracy: 0.9060
Epoch 10/100
36/36 - 0s - loss: 0.2580 - accuracy: 0.9016 - val_loss: 0.2553 - val_accuracy: 0.9038
Epoch 11/

<tensorflow.python.keras.callbacks.History at 0x1fe91d77508>

## Test the model

In [13]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)

ValueError: in user code:

    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:941 test_function  *
        outputs = self.distribute_strategy.run(
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:912 test_step  **
        y, y_pred, sample_weight, regularization_losses=self.losses)
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:205 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:143 __call__
        losses = self.call(y_true, y_pred)
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:246 call
        return self.fn(y_true, y_pred, **self._fn_kwargs)
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:1558 sparse_categorical_crossentropy
        y_true, y_pred, from_logits=from_logits, axis=axis)
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\keras\backend.py:4655 sparse_categorical_crossentropy
        labels=target, logits=output)
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\ops\nn_ops.py:3591 sparse_softmax_cross_entropy_with_logits_v2
        labels=labels, logits=logits, name=name)
    C:\Users\bryan\anaconda3\lib\site-packages\tensorflow\python\ops\nn_ops.py:3507 sparse_softmax_cross_entropy_with_logits
        logits.get_shape()))

    ValueError: Shape mismatch: The shape of labels (received (320,)) should equal the shape of logits except for the last dimension (received (32, 2)).
