In [19]:
import numpy as np
from sklearn import preprocessing

raw=np.loadtxt('C:\\Users\\Admin\\Desktop\\DataScience\\Datasets\\Audiobooks-data.csv',delimiter=',')
unscaled_inputs=raw[:,1:-1]
targets_all=raw[:,-1]

### balance the dataset

In [20]:
num_one_targets=int(np.sum(targets_all))
zero_targets_counter=0
indices_to_remove=[]

for i in range(targets_all.shape[0]):
    if targets_all[i] ==0:
        zero_targets_counter+=1
        if zero_targets_counter>num_one_targets:
            indices_to_remove.append(i)
            
unscaled_inputs_equal_priors=np.delete(unscaled_inputs,indices_to_remove,axis=0)
targets_equal_priors=np.delete(targets_all,indices_to_remove,axis=0)

### standardize the inputs

In [21]:
scaled_inputs=preprocessing.scale(unscaled_inputs_equal_priors)

### shuffle the data

In [22]:
shuffled_indices=np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs=scaled_inputs[shuffled_indices]
shuffled_targets=targets_equal_priors[shuffled_indices]

### split the dataset into train,validation and test

In [23]:
samples_count=shuffled_inputs.shape[0]

train_samples_count=int(0.8*samples_count)
validation_samples_count=int(0.1*samples_count)
test_samples_count=samples_count-train_samples_count -validation_samples_count

train_inputs=shuffled_inputs[:train_samples_count]
train_targets=shuffled_targets[:train_samples_count]

validation_inputs=shuffled_inputs[train_samples_count:train_samples_count+validation_samples_count]
validation_targets=shuffled_targets[train_samples_count:train_samples_count+validation_samples_count]

test_inputs=shuffled_inputs[train_samples_count+validation_samples_count:]
test_targets=shuffled_targets[train_samples_count+validation_samples_count:]

print(np.sum(train_targets),train_samples_count,np.sum(train_targets)/train_samples_count)
print(np.sum(validation_targets),validation_samples_count,np.sum(validation_targets)/validation_samples_count)
print(np.sum(test_targets),test_samples_count,np.sum(test_targets)/test_samples_count)

1781.0 3579 0.49762503492595694
244.0 447 0.5458612975391499
212.0 448 0.4732142857142857


### save the three dataset in npz

In [24]:
np.savez('Audiobooks_data_train',inputs=train_inputs,targets=train_targets)
np.savez('Audiobooks_data_validation',inputs=validation_inputs,targets=validation_targets)
np.savez('Audiobooks_data_test',inputs=test_inputs,targets=test_targets)

### create the machine learning algorithm

In [25]:
import tensorflow as tf

### data

In [26]:
npz=np.load('Audiobooks_data_train.npz')
train_inputs=npz['inputs'].astype(np.float64)
train_targets=npz['targets'].astype(np.int64)

npz=np.load('Audiobooks_data_validation.npz')
validation_inputs,validation_targets=npz['inputs'].astype(np.float64),npz['targets'].astype(np.int64)


npz=np.load('Audiobooks_data_test.npz')
test_inputs,test_targets=npz['inputs'].astype(np.float64),npz['targets'].astype(np.int64)


### model

In [27]:
input_size=10
output_size=2
hidden_layer_size=100
model=tf.keras.Sequential([
                           tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
                           tf.keras.layers.Dense(output_size,activation='softmax')
    
    
])
early_stopping=tf.keras.callbacks.EarlyStopping(patience=2)
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
batch_size=100
max_epochs=100
model.fit(train_inputs,
          train_targets,
          batch_size=batch_size,
          epochs=max_epochs,
          callbacks=[early_stopping],
          validation_data=(validation_inputs,validation_targets),
          verbose=2)


Epoch 1/100
36/36 - 2s - loss: 0.5476 - accuracy: 0.7075 - val_loss: 0.4690 - val_accuracy: 0.7830 - 2s/epoch - 56ms/step
Epoch 2/100
36/36 - 0s - loss: 0.4147 - accuracy: 0.7837 - val_loss: 0.4028 - val_accuracy: 0.7740 - 343ms/epoch - 10ms/step
Epoch 3/100
36/36 - 0s - loss: 0.3773 - accuracy: 0.8002 - val_loss: 0.3719 - val_accuracy: 0.8031 - 356ms/epoch - 10ms/step
Epoch 4/100
36/36 - 0s - loss: 0.3607 - accuracy: 0.8092 - val_loss: 0.3589 - val_accuracy: 0.8143 - 371ms/epoch - 10ms/step
Epoch 5/100
36/36 - 0s - loss: 0.3548 - accuracy: 0.8036 - val_loss: 0.3523 - val_accuracy: 0.8300 - 347ms/epoch - 10ms/step
Epoch 6/100
36/36 - 0s - loss: 0.3500 - accuracy: 0.8108 - val_loss: 0.3427 - val_accuracy: 0.8210 - 349ms/epoch - 10ms/step
Epoch 7/100
36/36 - 0s - loss: 0.3416 - accuracy: 0.8161 - val_loss: 0.3428 - val_accuracy: 0.8076 - 370ms/epoch - 10ms/step
Epoch 8/100
36/36 - 0s - loss: 0.3429 - accuracy: 0.8072 - val_loss: 0.3367 - val_accuracy: 0.8277 - 356ms/epoch - 10ms/step
Epo

<keras.callbacks.History at 0x283e652e6e0>

### testing the model

In [28]:
test_loss,test_accuracy=model.evaluate(test_inputs,test_targets)



In [29]:
print('\nTest loss:{0:.2f}.Test accuracy:{1:2f}%'.format(test_loss,test_accuracy*100))


Test loss:0.35.Test accuracy:80.133927%
