### This notebook contains a Classification model using TensorFlow to predict Audiobook customer conversion
*  **Data source**: Audiobooks_data.csv (Available in github repo) <br>
*  **Data preprocessing**: Shuffling dataset, Balancing dataset,Scaling inputs,Splitting into training,validattion & test, Saving into .npz format <br>
*  **Model building**: Loading the three .npz files, Training the model <br>
*  **Model Evaluation**: Evaluating the trained model 

In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler

In [2]:
raw_data = np.loadtxt('E:\\Udemy\\Data science\\Python\\Deep Learning\\Classification\\Business case\\Audiobooks_data.csv',delimiter=',')
raw_data.shape

(14084, 12)

#### Shuffling the dataset

In [3]:
shuffle_indices = np.arange(raw_data.shape[0])
np.random.shuffle(shuffle_indices)
shuffled_data = raw_data[shuffle_indices]

#### Balancing the dataset

In [6]:
input_data = shuffled_data[:,1:-1]
target_data = shuffled_data[:,-1]

num_ones = int(np.sum(target_data))
num_zeroes = 0
tobe_removed =[]
for i in range(target_data.shape[0]):
    if(target_data[i] == 0):
        num_zeroes +=1
        if(num_zeroes > num_ones):
            tobe_removed.append(i)

balanced_input = np.delete(input_data, tobe_removed, axis=0)
balanced_output = np.delete(target_data, tobe_removed, axis=0)
print(int(np.sum(balanced_output)) / balanced_output.shape[0])

0.5


#### Scaling the Inputs

In [7]:
scaler = StandardScaler()
scaler = scaler.fit(balanced_input)
scaled_input = scaler.transform(balanced_input)

#### Splitting dataset into training, validation and test

In [9]:
from sklearn.model_selection import train_test_split
input_train, input_test, output_train, output_test = train_test_split(scaled_input, balanced_output, test_size=0.1, random_state=42)
validation_size = int(0.1 * input_train.shape[0])
validation_input =  input_train[input_train.shape[0]-validation_size:]
validation_output = output_train[output_train.shape[0]-validation_size:]
#Confirming balance of each dataset
print(np.sum(output_train)/output_train.shape[0])
print(np.sum(output_test)/output_test.shape[0])
print(np.sum(validation_output)/validation_output.shape[0])

0.5024838549428713
0.47767857142857145
0.5422885572139303


#### Saving the three datasets in .npz format

In [10]:
np.savez('Training_audiobook.npz', features= input_train, targets= output_train)
np.savez('Validation_audiobook.npz', features= input_test, targets= output_test)
np.savez('Test_audiobook.npz', features= validation_input, targets= validation_output)

#### Loading .npz datasets

In [17]:
temp = np.load('Training_audiobook.npz')
#print(temp['targets'][0])
train_inputs, train_outputs = temp['features'].astype(float), temp['targets'].astype(int)
#print(train_outputs[0])
temp = np.load('Validation_audiobook.npz')
val_inputs, val_outputs = temp['features'].astype(float), temp['targets'].astype(int)
temp = np.load('Test_audiobook.npz')
test_inputs, test_outputs = temp['features'].astype(float), temp['targets'].astype(int)

#### Classification model using TensorFlow

In [None]:
import tensorflow as tf

In [19]:


output_size = 2
hidden_width = 50

model = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_width, activation='relu'),
                            tf.keras.layers.Dense(hidden_width, activation='relu'),
                            #tf.keras.layers.Dense(hidden_width, activation='sigmoid'),
                            #tf.keras.layers.Dense(hidden_width, activation='sigmoid'),
                            #tf.keras.layers.Dense(hidden_width, activation='sigmoid'),
                            tf.keras.layers.Dense(output_size, activation='sigmoid')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 100
max_epochs = 100
early_stop = tf.keras.callbacks.EarlyStopping()
model.fit(train_inputs,
          train_outputs,
          batch_size = batch_size,
          epochs = max_epochs,
          callbacks = [early_stop],
          validation_data = (val_inputs,val_outputs),
          verbose=2
          )

Epoch 1/100
41/41 - 1s - loss: 0.5573 - accuracy: 0.7181 - val_loss: 0.4977 - val_accuracy: 0.7165 - 997ms/epoch - 24ms/step
Epoch 2/100
41/41 - 0s - loss: 0.4353 - accuracy: 0.7839 - val_loss: 0.4382 - val_accuracy: 0.7589 - 171ms/epoch - 4ms/step


<keras.callbacks.History at 0x12e5c852bc8>

#### Evaluating the model

In [20]:
test_loss, test_accuracy = model.evaluate(test_inputs,test_outputs)
print('Test Loss: {:.2f} Test Accuracy: {:.2f}%'.format(test_loss, test_accuracy*100))

Test Loss: 0.42 Test Accuracy: 75.87%
