Audio Book business case

In [None]:
import numpy as np
import tensorflow as tf
from sklearn import preprocessing

In [None]:
from google.colab import files
uploaded = files.upload()


Saving Audiobooks_data.csv to Audiobooks_data.csv


In [None]:
raw_data = np.loadtxt("Audiobooks_data.csv", delimiter=',')
unscaled = raw_data[:,1:-1]
targets = raw_data[:,-1]

In [None]:
targets.shape[0]

14084

Balance Dataset

In [None]:
one_target = int(np.sum(targets))
zero_target_count = 0
to_remove = []

for i in range(targets.shape[0]):
  if targets[i] ==0:
    zero_target_count += 1
    if zero_target_count > one_target:
      to_remove.append(i)

final_unscaled_inputs = np.delete(unscaled, to_remove, axis=0)
new_target = np.delete(targets,to_remove, axis=0)


**Scale Data and Shuffle**

In [None]:
scaled_inputs = preprocessing.scale(final_unscaled_inputs)

shuffled_count = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_count)

shuffled_inputs = scaled_inputs[shuffled_count]
shuffled_target = new_target[shuffled_count]

**Train Test Split**

In [None]:
samples_count = shuffled_inputs.shape[0]

train_count = int(0.8*samples_count)
val_count = int(0.1*samples_count)
test_count = samples_count - train_count - val_count

train_inputs = shuffled_inputs[:train_count]
train_targets = shuffled_target[:train_count]

val_inputs = shuffled_inputs[:train_count+val_count]
val_targets = shuffled_target[:train_count+val_count]

test_inputs = shuffled_inputs[train_count+val_count:]
test_targets = shuffled_target[train_count+val_count:]

**Save datasets into tensors**

In [None]:
np.savez('Audio_train', inputs=train_inputs, targets=train_targets)
np.savez('Audio_val', inputs=val_inputs, targets=val_targets)
np.savez('Audio_test', inputs=test_inputs, targets=test_targets)

# New Section - Machine Learning model

Load Data

In [None]:
npz = np.load('Audio_train.npz')
train_inputs, train_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
  

npz = np.load('Audio_val.npz')
val_inputs, val_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
 
npz = np.load('Audio_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int) 

**Classification Model**

In [None]:
inputs_size = 10
outputs_size = 2
hidden_layer = 50

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)
model = tf.keras.Sequential([
                          
                           tf.keras.layers.Dense(units= hidden_layer,activation='relu'),
                           tf.keras.layers.Dense(units= hidden_layer,activation='relu'),
                           tf.keras.layers.Dense(units= outputs_size,activation='softmax')
                          
                             
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 100
max_epochs = 100

In [None]:

model.fit(train_inputs, train_targets,batch_size=batch_size, epochs=max_epochs,callbacks=[early_stopping], validation_data=(val_inputs, val_targets), verbose=2)

Epoch 1/100
36/36 - 1s - loss: 0.6103 - accuracy: 0.6742 - val_loss: 0.5363 - val_accuracy: 0.7479
Epoch 2/100
36/36 - 0s - loss: 0.4951 - accuracy: 0.7569 - val_loss: 0.4586 - val_accuracy: 0.7705
Epoch 3/100
36/36 - 0s - loss: 0.4359 - accuracy: 0.7784 - val_loss: 0.4133 - val_accuracy: 0.7904
Epoch 4/100
36/36 - 0s - loss: 0.4025 - accuracy: 0.7960 - val_loss: 0.3887 - val_accuracy: 0.7923
Epoch 5/100
36/36 - 0s - loss: 0.3862 - accuracy: 0.7932 - val_loss: 0.3732 - val_accuracy: 0.8050
Epoch 6/100
36/36 - 0s - loss: 0.3695 - accuracy: 0.8055 - val_loss: 0.3618 - val_accuracy: 0.8154
Epoch 7/100
36/36 - 0s - loss: 0.3625 - accuracy: 0.8072 - val_loss: 0.3529 - val_accuracy: 0.8142
Epoch 8/100
36/36 - 0s - loss: 0.3534 - accuracy: 0.8103 - val_loss: 0.3475 - val_accuracy: 0.8107
Epoch 9/100
36/36 - 0s - loss: 0.3524 - accuracy: 0.8114 - val_loss: 0.3423 - val_accuracy: 0.8154
Epoch 10/100
36/36 - 0s - loss: 0.3470 - accuracy: 0.8134 - val_loss: 0.3408 - val_accuracy: 0.8154
Epoch 11/

<tensorflow.python.keras.callbacks.History at 0x7f687a6da250>

**Test Model**

In [None]:
test_loss, test_accuracy = model.evaluate(test_inputs,test_targets)



In [None]:
test_loss, test_accuracy*100

(0.3570718467235565, 79.46428656578064)