## Word Sentiment Classification

### List of contents
1. Data Preprocessing
2. Forward Function
3. Backward Function
4. Training Loop Instances
5. Train&Validation Step Function
6. Training
7. Inference&Visualization
Reference


In [1]:
# setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import preprocessing, layers, models, losses, optimizers, metrics 

## 1. Data Preprocessing

In [2]:
# Define Dataset
train_dataset = {
    'Text':   ['good', 'bad', 'worse', 'so good', 'great', 'best', 'false', 'true', 'better', 'mad'],
    'Target': [1     , 0    , 0      , 1        , 1      , 1     , 0      , 1     , 1       , 0    ]
}
validation_dataset  = {
    'Text':   ['goodd', 'worrse', 'greattt', 'sad'],
    'Target': [1      , 0       , 1        , 0    ]
}

# Fit Tokenizer with train_dataset
sequence_length = 1+max(map(lambda word: len(word) , train_dataset['Text']))
num_unique_char = len(set(''.join(train_dataset['Text'])))

tokenizer = preprocessing.text.Tokenizer(num_words=None, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', lower=False,  split='!', char_level=False, oov_token="<OOV>", document_count=0)
tokenizer.fit_on_texts(list(''.join(train_dataset['Text'])))  #tokenizer.word_index

# Preprocessing
train_sequences = tokenizer.texts_to_sequences(list(map(lambda word: list(word), train_dataset['Text'])))
validation_sequences = tokenizer.texts_to_sequences(list(map(lambda word: list(word), validation_dataset['Text'])))
train_pad_sequences = preprocessing.sequence.pad_sequences(train_sequences, value=0, padding='post', maxlen=sequence_length) # padding: 'pre' or 'post'
validation_pad_sequences = preprocessing.sequence.pad_sequences(validation_sequences, value=0, padding='post', maxlen=sequence_length) # padding: 'pre' or 'post'
train_onehot_sequences = tf.one_hot(train_pad_sequences, depth=num_unique_char)
validation_onehot_sequences = tf.one_hot(validation_pad_sequences, depth=num_unique_char)

train_X = train_onehot_sequences
train_Y = train_dataset['Target']
train_Text = train_dataset['Text']
validation_X = validation_onehot_sequences
validation_Y = validation_dataset['Target']
validation_Text = validation_dataset['Text']

train_dataset = tf.data.Dataset.from_tensor_slices((train_X, train_Y))
train_dataset = train_dataset.interleave(lambda x, y: tf.data.Dataset.from_tensors((x, y)).repeat(100), cycle_length=4, block_length=7).batch(5)
validation_dataset = tf.data.Dataset.from_tensor_slices((validation_X, validation_Y))
validation_dataset = validation_dataset.interleave(lambda x, y: tf.data.Dataset.from_tensors((x, y)), cycle_length=4, block_length=7).batch(2)
inference_dataset = tf.data.Dataset.from_tensor_slices((validation_X, validation_Y, validation_Text)).batch(batch_size=len(validation_Text))

## 2. Forward Function

In [3]:
class Model(models.Model):
    def __init__(self, name=None):
        super(Model, self).__init__(name=name)
        self.dense_1 = layers.Dense(5, activation='relu')
        self.rnn = layers.SimpleRNN(units=32)
        self.dense_2 = layers.Dense(units=1, activation='sigmoid')
        
    def call(self, x, training=None):
        x = self.dense_1(x)
        x = self.rnn(x)
        x = self.dense_2(x)
        return tf.squeeze(x, axis=None)
    
class Criterion(losses.Loss):
    def call(self, target, hypothesis, training=None):
        return tf.reduce_mean(losses.binary_crossentropy(y_true=target, y_pred=hypothesis, from_logits=False))

## 3. Backward Function

In [4]:
class LearningRateScheduler(optimizers.schedules.LearningRateSchedule):
    def __init__(self, initial_learning_rate):
        self.initial_learning_rate = initial_learning_rate

    def __call__(self, step):
        return self.initial_learning_rate / (step + 1)

## 4. Training Loop Instances

In [5]:
model = Model(name='Model')
criterion = Criterion()
lr_schedule = LearningRateScheduler(initial_learning_rate=0.1)
optimizer = optimizers.Adam(learning_rate=lr_schedule)

train_loss = metrics.Mean(name='train_loss')
train_accuracy = metrics.BinaryAccuracy(name='train_accuracy')
validation_loss = metrics.Mean(name='validation_loss')
validation_accuracy = metrics.BinaryAccuracy(name='validation_accuracy')

## 5. Train&Validation Step Function

In [6]:
# [Train Step]
@tf.function
def train_step(features, targets):
    with tf.GradientTape() as tape:
        hypothesis = model(features, training=True)
        cost = criterion(targets, hypothesis)
    gradients = tape.gradient(cost, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss.update_state(cost)
    train_accuracy.update_state(targets, hypothesis)
    
# [Validation Step]
@tf.function
def validation_step(features, targets):
    predictions = model(features, training=False)
    cost = criterion(targets, predictions)

    validation_loss.update_state(cost)
    validation_accuracy.update_state(targets, predictions)    

## 6. Training

In [7]:
EPOCHS = 10
for epoch in range(EPOCHS):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    validation_loss.reset_states()
    validation_accuracy.reset_states()

    # Training Step
    for features, targets in train_dataset:
        train_step(features, targets)

    # Validation Step
    for features, targets in validation_dataset:
        validation_step(features, targets)

    print(
        f'Epoch {epoch + 1}, '
        f'Loss: {train_loss.result()}, '
        f'Accuracy: {train_accuracy.result() * 100}, '
        f'Validation Loss: {validation_loss.result()}, '
        f'Validation Accuracy: {validation_accuracy.result() * 100}'
    )

model.save_weights('model.ckpt')
model.load_weights('model.ckpt')
model.summary()

Epoch 1, Loss: 0.29046374559402466, Accuracy: 90.39999389648438, Validation Loss: 0.10069369524717331, Validation Accuracy: 100.0
Epoch 2, Loss: 0.06729231774806976, Accuracy: 100.0, Validation Loss: 0.06611797213554382, Validation Accuracy: 100.0
Epoch 3, Loss: 0.050355296581983566, Accuracy: 100.0, Validation Loss: 0.05259763449430466, Validation Accuracy: 100.0
Epoch 4, Loss: 0.0418490469455719, Accuracy: 100.0, Validation Loss: 0.044528745114803314, Validation Accuracy: 100.0
Epoch 5, Loss: 0.03638181462883949, Accuracy: 100.0, Validation Loss: 0.03906872868537903, Validation Accuracy: 100.0
Epoch 6, Loss: 0.03246432915329933, Accuracy: 100.0, Validation Loss: 0.03503796458244324, Validation Accuracy: 100.0
Epoch 7, Loss: 0.02945704199373722, Accuracy: 100.0, Validation Loss: 0.03188469260931015, Validation Accuracy: 100.0
Epoch 8, Loss: 0.027039000764489174, Accuracy: 100.0, Validation Loss: 0.029317840933799744, Validation Accuracy: 100.0
Epoch 9, Loss: 0.02502952143549919, Accur

## 7. Inference

In [8]:
#prediction = model(list(inference_dataset)[0][0]).numpy().round(0)
prediction = model.predict(inference_dataset).round(0)
inference = pd.DataFrame(np.c_[list(inference_dataset)[0][2].numpy(), prediction], columns=['Input', 'Sentiment'])
inference

Unnamed: 0,Input,Sentiment
0,b'goodd',1.0
1,b'worrse',0.0
2,b'greattt',1.0
3,b'sad',0.0


## Reference