#### Predict if Subscription Customer or Regular Customer

In [None]:

import numpy as np
from sklearn import preprocessing

data = "path/to/csv/data.csv"
raw_csv_data = np.loadtxt('data', delimiter =',')
unscaled_inputs_all = raw_csv_data[:,1:-1]
targets_all = raw_csv_data[:,-1]

num_one_targets = int(np.sum(targets_all))

zero_targets_counter = 0
indices_to_remove=[]

for i in range(targets_all.shape[0]): #targets_all.shape[0]: returns the number of rows in the dataset
    if targets_all[i] == 0:
        zero_targets_counter += 1 #ztc=ztc+1 
        if zero_targets_counter > num_one_targets:
            indices_to_remove.append(i)
        
unscaled_inputs_equal_priors = np.delete(unscaled_inputs_all, indices_to_remove, axis=0)

targets_equal_priors = np.delete(targets_all, indices_to_remove, axis=0)


#### Standardize the inputs: scaling

In [3]:
scaled_inputs = preprocessing.scale(unscaled_inputs_equal_priors)

#### Shuffle the data:

In [4]:
shuffled_indices = np.arange(scaled_inputs.shape[0]) #gives indices numbers from 0 to 4473 = 4474 numbers which is the range. (scaled_inputs.shape[0] = 4474) 
np.random.shuffle(shuffled_indices)

#use the shuffled indices to shuffle the inputs and targets
shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets_equal_priors[shuffled_indices]

#### Split the dataset: train, validation, and test

In [5]:

samples_count = shuffled_inputs.shape[0]

train_samples_count = int(0.8 * samples_count)
validation_samples_count = int(0.1 * samples_count)
test_samples_count = samples_count - train_samples_count - validation_samples_count

train_inputs = shuffled_inputs[:train_samples_count]
train_targets = shuffled_targets[:train_samples_count]

validation_inputs = shuffled_inputs[train_samples_count:train_samples_count + validation_samples_count]
validation_targets = shuffled_targets[train_samples_count:train_samples_count + validation_samples_count]

test_inputs = shuffled_inputs[train_samples_count + validation_samples_count:]
test_targets = shuffled_targets[train_samples_count + validation_samples_count:]

print(np.sum(train_targets), train_samples_count, np.sum(train_targets) / train_samples_count)
print(np.sum(validation_targets), validation_samples_count, np.sum(validation_targets) / validation_samples_count)
print(np.sum(test_targets), test_samples_count, np.sum(test_targets) / test_samples_count)

np.savez('YOUR_DATA_data_train', inputs=train_inputs, targets = train_targets)
np.savez('YOUR_DATA_data_validation', inputs=validation_inputs, targets=validation_targets)
np.savez('YOUR_DATA_data_test', inputs = test_inputs, targets= test_targets)

1815.0 3579 0.5071248952221291
225.0 447 0.5033557046979866
197.0 448 0.43973214285714285


#### Create a class that handles batching

In [7]:
class get_data():
    def __init__(self, dataset, batch_size=None):
        npz = np.load("YOUR_DATA_data_{0}.npz".format(dataset))
        self.inputs, self.targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
        if batch_size is None:
            self.batch_size = self.inputs.shape[0]
        else:
            self.batch_size = batch_size
        self.curr_batch = 0
        self.batch_count = self.inputs.shape [0] // self.batch_size
    
    def __next__(self):
        if self.curr_batch>=self.batch_count:
            self.curr_batch = 0
            raise StopIteration()
        batch_slice = slice(self.curr_batch * self.batch_size, (self.curr_batch + 1) * self.batch_size)
        inputs_batch = self.inputs[batch_slice]
        target_batch = self.targets[batch_slice]
        self.curr_batch +=1
        classes_num = 2
        targets_one_hot = np.zeros((target_batch.shape[0], classes_num))
        targets_one_hot[range(target_batch.shape[0]), target_batch] = 1
        return inputs_batch, targets_one_hot
    
    def __iter__(self):
        return self
    


#### Create the Machine Learning Algorithm 

In [None]:

import tensorflow as tf

input_size = 10 
output_size = 2 
hidden_layer_size = 200


tf.reset_default_graph()

inputs = tf.placeholder(tf.float32, [None, input_size])
targets = tf.placeholder(tf.int32, [None, output_size])

weights_1 = tf.get_variable('weights_1', [input_size, hidden_layer_size])
biases_1 = tf.get_variable('biases_1', [hidden_layer_size])
outputs_1 = tf.nn.relu(tf.matmul(inputs, weights_1) + biases_1)

weights_2 = tf.get_variable("weights_2", [hidden_layer_size, hidden_layer_size])
biases_2 = tf.get_variable("biases_2", [hidden_layer_size])
outputs_2 = tf.nn.relu(tf.matmul(outputs_1, weights_2) + biases_2)

weights_3 = tf.get_variable("weights_3", [hidden_layer_size, output_size])
biases_3 = tf.get_variable("biases_3", [output_size])


outputs = tf.matmul(outputs_2, weights_3) + biases_3

loss = tf.nn.softmax_cross_entropy_with_logits(logits = outputs, labels = targets)
mean_loss = tf.reduce_mean(loss)


out_equals_target = tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1))
accuracy = tf.reduce_mean(tf.cast(out_equals_target, tf.float32))

optimize = tf.train.AdamOptimizer(learning_rate = 0.003).minimize(mean_loss)

sess= tf.InteractiveSession()

initializer = tf.global_variables_initializer()
sess.run(initializer)

batch_size=500
max_epochs =50
prev_validation_loss = 9999999.

train_data = get_data('train', batch_size)
validation_data = get_data('validation')

#optimize the algorithm: create for loop for epochs:
for epoch_counter in range(max_epochs):
    curr_epoch_loss=0.
    for input_batch, target_batch in train_data: #iterate over the training data
        _, batch_loss = sess.run([optimize,mean_loss],
                feed_dict = {inputs: input_batch, targets: target_batch})
        curr_epoch_loss +=batch_loss #record the batch loss into the current loss
    curr_epoch_loss /=train_data.batch_count #find the mean curr_epoch_loss
    validation_loss = 0.
    validation_accuracy = 0.
    for input_batch, target_batch in validation_data: #use the same logic of the code to forward propagate the validation set 
        validation_loss, validation_accuracy = sess.run([mean_loss, accuracy],
            feed_dict = {inputs: input_batch, targets: target_batch})
    print('Epoch '+str(epoch_counter+1)+
          '. Training loss: '+'{0:.3f}'.format(curr_epoch_loss)+
          '. Validation loss: '+'{0:.3f}'.format(validation_loss)+
          '. Validation accuracy: '+'{0:.2f}'.format(validation_accuracy * 100.)+'%')
    if validation_loss > prev_validation_loss:
        break
    prev_validation_loss = validation_loss
        
print('End of training')
    
    


#### Test the Model:


In [13]:
test_data = get_data('test')

for input_batch, target_batch in test_data: # we need the forwardpropagate as we did in the validation. cpy and past the validation forward propagate change the names and change the second line 
        test_accuracy = sess.run([accuracy],
            feed_dict = {inputs: input_batch, targets: target_batch})
        
test_accuracy_percent = test_accuracy[0] *100.

print('test accuracy:' + '{0:.2f}'.format(test_accuracy_percent) + '%')



test accuracy:82.59%
