In [1]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_datasets as tfds

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Vreate Dataframe from text_and_sentiment.csv, obtained from https://www.kaggle.com/datasets/nelgiriyewithana/emotions/data by NIDULA ELGIRIYEWITHANA
df = pd.read_csv('text_and_sentiment.csv', index_col=0)
df.head()


df_x = df['text']

df_y = tf.keras.utils.to_categorical(df['label'], 6)

print(df_x[:5])

0        i just feel really helpless and heavy hearted
1    ive enjoyed being able to slouch about relax a...
2    i gave up my internship with the dmrg and am f...
3                           i dont know i feel so lost
4    i am a kindergarten teacher and i am thoroughl...
Name: text, dtype: object


In [3]:
# Create training data with first 80 percent of dataset, validation with the next 10 percent, and test with the last 10 percent
len_train = int(np.floor(len(df_x) * .8))
len_val = int(np.floor(len(df_x) * .1))
x_train = df_x[: len_train]
y_train = df_y[: len_train]
x_val = df_x[len_train : len_train + len_val ]
y_val = df_y[len_train : len_train + len_val ]
x_test = df_x[len_train + len_val:]
y_test = df_y[len_train + len_val:]



# Convert pandas dataframes to tensorflow datasets
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))

ds_train.element_spec

(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(6,), dtype=tf.float32, name=None))

In [4]:
# Shuffle and batch training data to avoid any sorting bias in data during creation, batch validation data

BUFFER_SIZE = 10000
BATCH_SIZE = 64

ds_train = ds_train.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
ds_val = ds_val.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
ds_test = ds_test.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [5]:
# Create encoder to vectorize text with a potential vocabulary of
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE
)
encoder.adapt(ds_train.map(lambda text, label:text))

In [6]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'i', 'feel', 'and', 'to', 'the', 'a', 'feeling',
       'that', 'of', 'my', 'in', 'it', 'like', 'so', 'for', 'im', 'have',
       'me'], dtype='<U13')

In [7]:
for example, label in ds_train.take(1):
  print('texts: ', example.numpy())
  print()
  print('labels: ', label.numpy())

texts:  [b'i dont know why i feel like a supporting character in my own life'
 b'im used to feeling helpless'
 b'i do have a feeling or maybe im just hoping that rich will use his step daughter moving in with her dogs and cats'
 b'i feel horrible for families that cant afford insurance or have inadequate insurance'
 b'i will enjoy feeling bouncy and hope that what i have done up to now will be enough'
 b'i decided to change into my jeans instead of my swim shorts because my legs were feeling hot and looking red despite the three times id put sunscreen on them'
 b'i am hopeful that normalcy is just around the corner grateful for how far i have come and feeling very blessed that although this sucks i am here to write about it'
 b'i have to sit up blow my nose and feel more miserable than ever'
 b'im feeling pissed off and abandoned'
 b'im perfectly happy with just being me and accepting my natural body shape as opposed to starving myself to obtain something that isnt natural to my body a

In [8]:
# Create RNN model 
model = tf.keras.Sequential([
    encoder,
    # Encodes input text into vector form
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Sets inputs and hidden states to zero to allow for variable lengt inputs
        mask_zero=True
    ),
    # Using a Bidirectional Long Short-Term Memory Layer to allow words at beginning and end to equally affect the sentiment of the text
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(6, activation='softmax')
])

In [9]:
# Test
sample_text = ("Wow! Creating a Recurrent Neural Network is fun!")

predictions = model.predict(np.array([sample_text]))

print(predictions[0])

[0.16547187 0.16663423 0.16435453 0.16774139 0.16819467 0.16760333]


In [10]:
# Test with padding
padding = "the " * 2000

predictions = model.predict(np.array([sample_text, padding]))

print(predictions[0])

[0.16547185 0.16663423 0.16435453 0.16774139 0.16819467 0.16760333]


In [11]:
# Using categorical crossentropy loss function to accomodate outputs from 0-5
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])


In [12]:
history = model.fit(ds_train, epochs=10,
                    validation_data=ds_val,
                    validation_steps=30)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
print(ds_val)
test_loss, test_acc = model.evaluate(ds_val)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

<PrefetchDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.string, name=None), TensorSpec(shape=(None, 6), dtype=tf.float32, name=None))>
Test Loss: 0.2568303048610687
Test Accuracy: 0.8801583647727966


In [42]:
tf.keras.models.save_model(model, filepath="./sentiment_model")



INFO:tensorflow:Assets written to: ./sentiment_model\assets


INFO:tensorflow:Assets written to: ./sentiment_model\assets


In [43]:
savedModel = tf.keras.models.load_model("./sentiment_model")

In [44]:
print(ds_val)
test_loss, test_acc = savedModel.evaluate(ds_val)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

<PrefetchDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.string, name=None), TensorSpec(shape=(None, 6), dtype=tf.float32, name=None))>
Test Loss: 0.2568303048610687
Test Accuracy: 0.8801583647727966
