In [1]:
from LibraryCode.MLP import MLP,DataReader

In [2]:
import os
import numpy as np
import tensorflow as tf

In [3]:
dir_data = '../Session 3/datasets/'
dir_root = '../Session 3/savedParams/'

In [4]:
with open(os.path.join(dir_data, "words_idfs.txt")) as f:
    vocab_size = len(f.read().splitlines())

def loadDataset():
    test_data_reader = DataReader(
        data_path=os.path.join(dir_data, "data_tf_idf.txt"),
        batch_size=50,
        vocab_size=vocab_size,
        size=(0.8, 1)
    )
    train_data_reader = DataReader(
        data_path=os.path.join(dir_data, "data_tf_idf.txt"),
        batch_size=50,
        vocab_size=vocab_size
    )
    return train_data_reader, test_data_reader

In [5]:
def saveParameters(name, value, epoch):
    filename = name.replace(":", '-colon-') + '-epoch-{}.txt'.format(epoch)
    if len(value.shape) == 1:
        string_form = ",".join([str(number) for number in value])
    else:
        string_form = '\n'.join([",".join([str(number) for number in value[row]]) for row in range(value.shape[0])])
    with open(os.path.join(dir_root, filename), "w") as f:
        f.write(string_form)

In [6]:
def restoreParameter(name, epoch):
    filename = name.replace(":", '-colon-') + '-epoch-{}.txt'.format(epoch)
    with open(os.path.join(dir_root, filename)) as f:
        lines = f.read().splitlines()
    if len(lines) == 1:
        value = [float(number) for number in lines[0].split(",")]
    else:
        value = [[float(number) for number in lines[row].split(",")] for row in range(len(lines))]
    return value

In [7]:
mlp = MLP(vocab_size=vocab_size, hidden_size=50)
pred_y, loss = mlp.buildGraph()
train_op = mlp.trainer(loss=loss, learning_rate=0.01)

In [8]:
with tf.compat.v1.Session() as sess:
    train_data_reader, test_data_reader = loadDataset()
    step, MAX_STEP = 0, 10000
    
    sess.run(tf.compat.v1.global_variables_initializer())
    while step < MAX_STEP:
        train_data, train_labels = train_data_reader.nextBatch()
        plabels_eval, loss_eval, _ = sess.run(
            [pred_y, loss, train_op],
            feed_dict={
                mlp._X: train_data,
                mlp._real_Y: train_labels
            }
        )
        step += 1
        if step % 500 == 0:
            print(f"Step: {step}, loss: {loss_eval}")
        
    trainable_variables = tf.compat.v1.trainable_variables()
    for variable in trainable_variables:
        saveParameters(
            name=variable.name,
            value=variable.eval(),
            epoch=train_data_reader._num_epoch
        )

Step: 500, loss: 0.0004734218819066882
Step: 1000, loss: 0.027874385938048363
Step: 1500, loss: 2.3245736429089447e-06
Step: 2000, loss: 4.329438888817094e-05
Step: 2500, loss: 8.396965313295368e-06
Step: 3000, loss: 0.00013136008055880666
Step: 3500, loss: 0.00011709719547070563
Step: 4000, loss: 0.09098311513662338
Step: 4500, loss: 1.3925422430038452
Step: 5000, loss: 3.542870899764239e-06
Step: 5500, loss: 1.4280637515184935e-05
Step: 6000, loss: 0.0
Step: 6500, loss: 0.0
Step: 7000, loss: 0.0
Step: 7500, loss: 0.0
Step: 8000, loss: 0.0
Step: 8500, loss: 0.0
Step: 9000, loss: 0.0
Step: 9500, loss: 0.0
Step: 10000, loss: 0.0


In [9]:
# Open a session to test the model
with tf.compat.v1.Session() as sess:
    _, test_data_reader = loadDataset()
    step, MAX_STEP = 0, 3000
    
    with tf.compat.v1.Session() as sess:
        epoch = 0
        trainable_variables = tf.compat.v1.trainable_variables()
        for variable in trainable_variables:
            saved_value = restoreParameter(
                name=variable.name,
                epoch=epoch
            )
            assign_op = variable.assign(saved_value)
            sess.run(assign_op)
        
        num_true_preds = 0
        while step < MAX_STEP:
            test_data, test_labels = test_data_reader.nextBatch()
            test_plabels_eval = sess.run(
                pred_y,
                feed_dict={
                    mlp._X: test_data,
                    mlp._real_Y: test_labels
                }
            )
            matches = np.equal(test_plabels_eval, test_labels)
            num_true_preds += np.sum(matches.astype("float"))
            
            step += 1
            if step % 500 == 0:
                print(f"Test on the batch {step}")

            if test_data_reader._batch_id == 0:
                break
        
        print("========")
        print("Epoch:", epoch)
        print("Accuracy on the test data:", num_true_preds / len(test_data_reader._data))

Test on the batch 500
Test on the batch 1000
Test on the batch 1500
Test on the batch 2000
Test on the batch 2500
Test on the batch 3000
Epoch: 0
Accuracy on the test data: 0.008222811671087533
