# CS5242

## Load Data

In [1]:
from data import Data

train_data = Data(data_type='train')
test_data = Data(data_type='test')

print("Train data: x:"+ str(train_data.x.shape)+ ' y:'+str(train_data.y.shape))
print("Test data: x:"+ str(test_data.x.shape)+ ' y:'+str(test_data.y.shape))



Loading train data 18662/18662: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ [ time left: 00:00 ]
Loading test data 6051/6051: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ [ time left: 00:00 ]


Train data: x:(18662, 1000, 102) y:(18662, 1)
Test data: x:(6051, 1000, 102) y:(6051, 1)


## Model

In [5]:
import tensorflow as tf

model_name = 'template'

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(1000,102)),
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.Dropout(rate=0.3),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(rate=0.3),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(rate=0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(rate=0.3),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='mse',
              optimizer=tf.keras.optimizers.Adam(0.001),
              metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 102000)            0         
_________________________________________________________________
dense_5 (Dense)              (None, 1024)              104449024 
_________________________________________________________________
dropout_4 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_5 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)              

## Save Checkpoint

In [6]:
import os
# Directory where the checkpoints will be saved
checkpoint_dir = './checkpoints/' + model_name
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True)

early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    min_delta=0, 
    patience=30, 
    mode='auto', 
    restore_best_weights=True)

## Training

In [10]:
history = model.fit(x=train_data.x, y=train_data.y, validation_split=0.15, epochs=300,batch_size=16,
                    callbacks=[checkpoint_callback,early_stopping_callback], verbose=2,shuffle=True)

Train on 15862 samples, validate on 2800 samples
Epoch 1/300
15862/15862 - 34s - loss: 0.1707 - accuracy: 0.8291 - val_loss: 0.2095 - val_accuracy: 0.7893
Epoch 2/300
15862/15862 - 31s - loss: 0.3823 - accuracy: 0.6176 - val_loss: 0.3600 - val_accuracy: 0.6400
Epoch 3/300
15862/15862 - 34s - loss: 0.4354 - accuracy: 0.5646 - val_loss: 0.1921 - val_accuracy: 0.8079
Epoch 4/300
15862/15862 - 32s - loss: 0.1752 - accuracy: 0.8248 - val_loss: 0.2008 - val_accuracy: 0.7989
Epoch 5/300
15862/15862 - 32s - loss: 0.1811 - accuracy: 0.8189 - val_loss: 0.2221 - val_accuracy: 0.7779
Epoch 6/300
15862/15862 - 32s - loss: 0.1683 - accuracy: 0.8317 - val_loss: 0.1957 - val_accuracy: 0.8043
Epoch 7/300
15862/15862 - 31s - loss: 0.1662 - accuracy: 0.8338 - val_loss: 0.2021 - val_accuracy: 0.7979
Epoch 8/300
15862/15862 - 32s - loss: 0.1616 - accuracy: 0.8384 - val_loss: 0.2050 - val_accuracy: 0.7950
Epoch 9/300
15862/15862 - 31s - loss: 0.2265 - accuracy: 0.7735 - val_loss: 0.5157 - val_accuracy: 0.48

KeyboardInterrupt: 

## Plot Training History

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

## Restore Weights

In [None]:
checkpoint_dir = './checkpoints/' + model_name
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

In [None]:
test_predict = model.predict(test_data.x)
print(test_predict.shape)



## Save Prediction to CSV

In [None]:
import csv

output_file = 'result_' + model_name + '.csv'
with open(output_file, 'wt', newline='', encoding='utf-8') as output_file:
    csv_writer = csv.writer(output_file)
    csv_writer.writerow(('Id', 'Predicted'))
    for id, predict in enumerate(test_predict):
        csv_writer.writerow((id, predict[0]))
print('Saved to '+'result_' + model_name + '.csv')