# 1. Audiobooks

In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/peerkart

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/peerkart


In [2]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import tensorflow as tf
from sklearn.metrics import accuracy_score

# 2. Import file
* read the csv file
* do the train, test and vali split
* create dataframes for inputs and targets

In [4]:
df = pd.read_csv('01_preprocessing.csv')
train_inputs = df[df['flag']=='Training'].drop(['flag','11'], axis=1)
train_targets = df[df['flag']=='Training'][['11']]
print(train_inputs.shape)

df = pd.read_csv('01_preprocessing.csv')
test_inputs = df[df['flag']=='Test'].drop(['flag','11'], axis=1)
test_targets = df[df['flag']=='Test'][['11']]
print(test_inputs.shape)

df = pd.read_csv('01_preprocessing.csv')
validation_inputs = df[df['flag']=='Validate'].drop(['flag','11'], axis=1)
validation_targets = df[df['flag']=='Validate'][['11']]
print(validation_inputs.shape)

(16122, 10)
(2303, 10)
(4607, 10)


# 3. Model
single layer

In [5]:
input_size = 10
output_size = 2
hidden_layer_size = 30
model = tf.keras.Sequential([tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
                             tf.keras.layers.Dense(output_size, activation='softmax') # output layer
                             ])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 400
max_epochs = 100
early_stopping = tf.keras.callbacks.EarlyStopping(patience=3)
tf.keras.utils.set_random_seed(42)
tf.config.experimental.enable_op_determinism()

history = model.fit(train_inputs, train_targets, batch_size=batch_size, epochs=max_epochs, callbacks=[early_stopping],
                    validation_data=(validation_inputs, validation_targets), verbose = 0)

In [6]:
pred1 = pd.DataFrame(model.predict(test_inputs))[1]
pred1.columns = [1]
pred1.shape



(2303,)

# 4. Model
double layer

In [7]:
input_size = 10
output_size = 2
hidden_layer_size = 15
model = tf.keras.Sequential([tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
                             tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
                             tf.keras.layers.Dense(output_size, activation='softmax') # output layer
                             ])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 400
max_epochs = 100
early_stopping = tf.keras.callbacks.EarlyStopping(patience=3)
tf.keras.utils.set_random_seed(108)
tf.config.experimental.enable_op_determinism()

history = model.fit(train_inputs, train_targets, batch_size=batch_size, epochs=max_epochs, callbacks=[early_stopping],
                    validation_data=(validation_inputs, validation_targets), verbose = 0)

In [8]:
pred2 = pd.DataFrame(model.predict(test_inputs))[1]
pred2.columns = [2]
pred2.shape



(2303,)

# 5. Model
tripe layer

In [9]:
input_size = 10
output_size = 2
hidden_layer_size = 10
model = tf.keras.Sequential([tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
                             tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
                             tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 3rd hidden layer
                             tf.keras.layers.Dense(output_size, activation='softmax') # output layer
                             ])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 400
max_epochs = 100
early_stopping = tf.keras.callbacks.EarlyStopping(patience=3)
tf.keras.utils.set_random_seed(108)
tf.config.experimental.enable_op_determinism()

history = model.fit(train_inputs, train_targets, batch_size=batch_size, epochs=max_epochs, callbacks=[early_stopping],
                    validation_data=(validation_inputs, validation_targets), verbose=0)

In [10]:
pred3 = pd.DataFrame(model.predict(test_inputs))[1]
pred3.columns = [3]
pred3.shape



(2303,)

# 6. Ensemble - equal weights

In [11]:
pred = pd.concat([pred1,pred2,pred3], axis=1)
pred.columns = [1,2,3]
pred['avg'] = np.mean(pred, axis=1)
pred['flag'] = np.where(pred['avg']>0.5,1,0)
pred.shape

(2303, 5)

In [12]:
print('\nTest accuracy: {0: .2f}%'.format(accuracy_score(test_targets, pred['flag'])*100))


Test accuracy:  83.37%
