In [46]:
import numpy as np
from sklearn import preprocessing

raw_csv_data=np.loadtxt(' Audiobooks_data.csv',delimiter=',')
unscaled_inputs_all=load_data[:,1:-1]
targets_all=load_data[:,-1]
unscaled_inputs_all

array([[1620.  , 1620.  ,   19.73, ..., 1603.8 ,    5.  ,   92.  ],
       [2160.  , 2160.  ,    5.33, ...,    0.  ,    0.  ,    0.  ],
       [2160.  , 2160.  ,    5.33, ...,    0.  ,    0.  ,  388.  ],
       ...,
       [2160.  , 2160.  ,    6.14, ...,    0.  ,    0.  ,    0.  ],
       [1620.  , 1620.  ,    5.33, ...,  615.6 ,    0.  ,   90.  ],
       [1674.  , 3348.  ,    5.33, ...,    0.  ,    0.  ,    0.  ]])

In [47]:
#balancing the dataset
num_one_targets=int(np.sum(targets_all))
zero_targets_counter=0
indices_to_remove=[]

for i in range(targets_all.shape[0]):
    if targets_all[i]==0:
        zero_targets_counter+=1
        if zero_targets_counter>num_one_targets:
            indices_to_remove.append(i)
unscaled_input_equal_priors=np.delete(unscaled_inputs_all,indices_to_remove,axis=0)
targets_equal_priors=np.delete(targets_all,indices_to_remove,axis=0)


In [39]:
#standardize the input
scaled_inputs=preprocessing.scale(unscaled_input_equal_priors)

In [40]:
#shuffle the data
shuffled_indices=np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs=scaled_inputs[shuffled_indices]
shuffled_targets=target_equal_priors[shuffled_indices]


In [41]:
#split the dataset into train ,validation and test 
samples_count=shuffled_inputs.shape[0]
train_samples_count=int(0.8*samples_count)
test_samples_count=int(0.1*samples_count)
validation_samples_count=int(0.1*samples_count)

train_inputs=shuffled_inputs[:train_samples_count]
train_targets=shuffled_targets[:train_samples_count]

validation_inputs=shuffled_inputs[train_samples_count:train_samples_count+validation_samples_count]
validation_targets=shuffled_targets[:train_samples_count+validation_samples_count]


test_inputs=shuffled_inputs[train_samples_count:train_samples_count+validation_samples_count]
test_targets=shuffled_targets[:train_samples_count+validation_samples_count]

In [42]:
#save the three datasets in/npz
np.savez('Audiobooks_data_train',inputs=train_inputs,targets=train_targets)
np.savez('Audiobooks_data_validation',inputs=validation_inputs,targets=validation_targets)
np.savez('Audiobooks_data_test',inputs=test_inputs,targets=test_targets)

In [58]:
#making class for batching
import numpy as np 
class Audiobooks_Data_Reader:
    def __init__(self,dataset,batch_size=None):
        #load the train ,test,validation data
        npz=np.load('Audiobooks_data_{0}.npz'.format(dataset))
        #giving value to the inouts as float and target as interger
        self.inputs,self.targets=npz['inputs'].astype(np.float),npz['targets'].astype(np.int)
        if batch_size is None:
            self.batch_size=self.inputs.shape[0]
        else:
             self.batch_size=batch_size
        self.curr_batch=0
        self.batch_count=self.inputs.shape[0]//self.batch_size
    
    def __next__(self):
         if self.curr_batch>=self.batch_count:
                self.curr_batch=0
                raise StopIteration()
                raise StopIteration()
         batch_slice=slice(self.curr_batch*self.batch_size,(self.curr_batch+1)*self.batch_size)
         input_batch=self.inputs[batch_slice]
         targets_batch=self.targets[batch_slice]
         self.curr_batch+=1
        
         classes_num=2
         targets_one_hot=np.zeros((targets_batch.shape[0],classes_num))
         targets_one_hot[range(targets_batch.shape[0]),targets_batch]=1
        
         return input_batch,targets_one_hot
        
    def __iter__(self):
        return self

In [65]:
import tensorflow as tf
input_size=10
output_size=2
hidden_layer_size=100
tf.reset_default_graph()

inputs=tf.placeholder(tf.float32,[None,input_size])
targets=tf.placeholder(tf.int32,[None,output_size])

weights_1=tf.get_variable("weights_1",[input_size,hidden_layer_size])
biases_1=tf.get_variable("biases_1",[hidden_layer_size])

outputs_1=tf.nn.relu(tf.matmul(inputs,weights_1)+biases_1)

weights_2=tf.get_variable("weights_2",[hidden_layer_size,hidden_layer_size])
biases_2=tf.get_variable("biases_2",[hidden_layer_size])

outputs_2=tf.nn.relu(tf.matmul(outputs_1,weights_2)+biases_2)

weights_3=tf.get_variable("weights_3",[hidden_layer_size,output_size])
biases_3=tf.get_variable("biases_3",[output_size])

outputs=tf.matmul(outputs_2,weights_3)+biases_3

loss=tf.nn.softmax_cross_entropy_with_logits(logits=outputs,labels =targets)
mean_loss=tf.reduce_mean(loss)

optimize=tf.train.AdamOptimizer(learning_rate=0.001).minimize(mean_loss)

out_equals_target=tf.equal(tf.argmax(outputs,1),tf.argmax(targets,1 ))

accuracy=tf.reduce_mean(tf.cast(out_equals_target,tf.float32))

sess=tf.InteractiveSession()
initializer=tf.global_variables_initializer()
sess.run(initializer)

batch_size=100
max_epochs=50
prev_validation_loss=99999999.

train_data=Audiobooks_Data_Reader('train',batch_size)
validation_data=Audiobooks_Data_Reader('validation')

for epoch_counter in range(max_epochs):
    curr_epoch_loss=0.
    for input_batch,target_batch in train_data:
        _,batch_loss=sess.run([optimize,mean_loss],
                             feed_dict={inputs:input_batch,targets:target_batch})
        
        curr_epoch_loss +=batch_loss
        
        curr_epoch_loss /=train_data.batch_count
        
        validation_loss=0
        validation_accuracy=0
        
        for input_batch,target_batch in validation_data:
            validation_loss,validation_accuracy=sess.run([mean_loss,accuracy],
                                                        feed_dict={inputs:input_batch,targets:target_batch})
            
        print('Epoch'+str(epoch_counter+1)+
             '.Training loss:'+'{0:.3f}'.format(curr_epoch_loss)+
             '.validation loss:'+'{0:.3f}'.format(validation_loss)+
             '.validaiton accuracy :'+'{0:.2f}'.format(validation_accuracy*100.)+'%')
    
        if validation_loss > prev_validation_loss:
            break
        prev_validation_loss=validation_loss
        
print("End of training")
             



Epoch1.Training loss:0.017.validation loss:0.560.validaiton accuracy :80.31%
Epoch1.Training loss:0.016.validation loss:0.545.validaiton accuracy :80.31%
Epoch1.Training loss:0.014.validation loss:0.533.validaiton accuracy :80.31%
Epoch1.Training loss:0.015.validation loss:0.526.validaiton accuracy :80.31%
Epoch1.Training loss:0.015.validation loss:0.524.validaiton accuracy :80.31%
Epoch1.Training loss:0.016.validation loss:0.523.validaiton accuracy :80.31%
Epoch1.Training loss:0.015.validation loss:0.524.validaiton accuracy :80.31%
Epoch2.Training loss:0.018.validation loss:0.527.validaiton accuracy :80.31%
Epoch3.Training loss:0.014.validation loss:0.530.validaiton accuracy :80.31%
Epoch4.Training loss:0.015.validation loss:0.532.validaiton accuracy :80.31%
Epoch5.Training loss:0.015.validation loss:0.535.validaiton accuracy :80.31%
Epoch6.Training loss:0.015.validation loss:0.537.validaiton accuracy :80.31%
Epoch7.Training loss:0.012.validation loss:0.539.validaiton accuracy :80.31%



In [68]:
#testing the model.

test_data=Audiobooks_Data_Reader('test')

for input_batch,target_batch in test_data:
    test_accuracy=sess.run([accuracy],
                          feed_dict={inputs:input_batch,targets:target_batch})
    test_accuracy_percent=test_accuracy[0]*100.
    print('test accuracy:'+'{0:.2f}'.format(test_accuracy_percent)+'%')

test accuracy:79.42%
