# ⚛ Packages and Important Libraries ⚛
The imported libraries contain various modules that are helpful for model building in tensorflow and for data, financial_phrasebank dataset from Huggingface is downloaded. Important resources can be found down below.

*   https://huggingface.co/datasets/financial_phrasebank (Financial PhraseBank Dataset)
*   https://medium.com/prosus-ai-tech-blog/finbert-financial-sentiment-analysis-with-bert-b277a3607101 (FinBERT Article)
*   https://medium.com/mlearning-ai/optimizing-deep-learning-models-with-pruning-a-practical-guide-163e990c02af (Model Pruning in TF)

Complete list of libraries and modules are;
*  tensorflow.keras.layers,  tensorflow.keras.models, tensorflow.keras.utils, sklearn.metrics, sklearn.model_selection
*  nltk, string, re, random, io, pandas, numpy



In [None]:
# model packages
!pip install datasets
!pip install imbalanced-learn
!wget https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py
from helper_functions import unzip_data, create_tensorboard_callback, plot_loss_curves, compare_historys
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.python.keras.models import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Embedding, Attention
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from keras.utils import pad_sequences
from helper_functions import create_tensorboard_callback
from keras.preprocessing.text import Tokenizer
from tensorflow.keras import layers, models, utils
from keras.utils import to_categorical

# data preprocessing packages
import re
import string
import nltk
from nltk.corpus import stopwords
from keras.utils import to_categorical

# uploading data
import random
import re
import io
import pandas as pd
from google.colab import files
from datasets import load_dataset

# visualization
import matplotlib.pyplot as plt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.12.0-py3-none-any.whl (474 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.6/474.6 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.7,>=0.3.0 (from datasets)
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.5/212.5 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.3/134.3 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
Collec

In [None]:
# create a directory to save TensorBoard Logs
SAVE_DIR = "model_logs"

# Useful Functions ⛽



*   **text_cleaning(text)** : Clean and remove unnecessary string elements from each sentence, and return it back to the original sentence data
*   **calculate_results(y_true, y_pred)** :  Calculate various classification scores, including Accuracy, F1_score, Recall, and Precision.

An addition to weight parameter for "precision_recall_fscore_support(y_true, y_pred, average="weighted")", since the target variables (labels) might be imbalanced, setting average parameter to "weighted" solves the imbalance issue for us.   



In [None]:
def text_cleaning(text):

  """ Clean and remove unnecessary string elements from each sentence, and return it back to the original sentence data."""

  text = re.sub(r'@[A-Za-z0-9]+', '', text)     # removing @mentions
  text = re.sub(r'@[A-Za-zA-Z0-9]+', '', text)  # removing @mentions
  text = re.sub(r'@[A-Za-z]+', '', text)        # removing @mentions
  text = re.sub(r'@[-)]+', '', text)            # removing @mentions
  text = re.sub(r'#', '', text )                # removing '#' sign
  text = re.sub(r'RT[\s]+', '', text)           # removing RT
  text = re.sub(r'https?\/\/\S+', '', text)     # removing the hyper link
  text = re.sub(r'&[a-z;]+', '', text)          # removing '&gt;'

  return text

def calculate_results(y_true, y_pred):

  """ Calculate various classification scores, including Accuracy, F1_score, Recall, and Precision."""

  model_accuracy = accuracy_score(y_true, y_pred) * 100
  model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
  model_results = {"accuracy": round(model_accuracy,3),
                    "precision": round(model_precision,3),
                    "recall": round(model_recall,3),
                    "f1": round(model_f1,3)}
  return model_results

def pre_process(data):

  """ Alternative to text_cleaning(), does the same job"""

  # Convert all text to lowercase
  data = data.str.lower()

  # Remove numbers
  data = data.apply(lambda x: re.sub(r'\d+', '', x))

  # Remove punctuation
  punctuation = string.punctuation.replace("'", "")
  data = data.apply(lambda x: x.translate(str.maketrans('', '', punctuation)))

  # Remove $
  punctuation = string.punctuation.replace("$", "")
  data = data.apply(lambda x: x.translate(str.maketrans('', '', punctuation)))

  # Remove stop words
  nltk.download('stopwords')
  stop_words = set(stopwords.words('english'))
  data = data.apply(lambda x: " ".join(word for word in x.split() if word not in stop_words))

  return data

# Upload Financial_Phrasebank Data 🤗

In [None]:
# load the data using load_dataset
dataset = load_dataset("financial_phrasebank","sentences_allagree",split="train")
# Convert the DatasetDict to a Pandas dataframe (named it after HuggingFace DataFrame)
text_df = pd.DataFrame(dataset)
print("Total Number of Sentences is: {}".format(len(text_df["sentence"])))

Downloading builder script:   0%|          | 0.00/6.04k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/13.7k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/8.86k [00:00<?, ?B/s]

Downloading and preparing dataset financial_phrasebank/sentences_allagree to /root/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141...


Downloading data:   0%|          | 0.00/682k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2264 [00:00<?, ? examples/s]

Dataset financial_phrasebank downloaded and prepared to /root/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141. Subsequent calls will reuse this data.
Total Number of Sentences is: 2264


In [None]:
# Display the dataframe
text_df["label"].value_counts(normalize=True)

1    0.614399
2    0.251767
0    0.133834
Name: label, dtype: float64

In [None]:
# clean and split huggingface dataset into training and validation
sentences = pre_process(text_df["sentence"])
X_train_pre, X_test_pre, y_train_pre, y_test_pre = train_test_split(sentences,
                                                                    text_df["label"],
                                                                    test_size=0.1,
                                                                    random_state=42)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


# Upload Economic Sentiment text from Kaggle ⚓

In [None]:
uploaded = files.upload()
text_df = pd.read_csv(io.BytesIO(uploaded['economic_sentiment_text.csv']))

In [None]:
# clean and split economic sentiment dataset into training and validation
text_df["Sentiment"] = text_df["Sentiment"].replace({"neutral":0, "positive":1, "negative":2})
text_df["Sentiment"].value_counts(normalize=True)


In [None]:
sentences = pre_process(text_df["Sentence"])
X_train_pre, X_test_pre, y_train_pre, y_test_pre = train_test_split(sentences,text_df["Sentiment"],test_size=0.1,random_state=42)

# Data Pre-processing with Tensorflow Tokenizer 📑

In [None]:
# Initialize the tokenizer
tokenizer = Tokenizer()

# Fit the tokenizer on the training data
tokenizer.fit_on_texts(X_train_pre)

# Convert the text data to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train_pre)
X_test_seq = tokenizer.texts_to_sequences(X_test_pre)

# Find max length
max_length = max(len(seq) for seq in X_train_seq)
print("The max length is: ", max_length)

# Pad the sequences to have the same length
X_train = pad_sequences(X_train_seq, max_length)
X_test = pad_sequences(X_test_seq, max_length)

# converting target variables into categorical values
y_train = to_categorical(y_train_pre, num_classes=3)
y_test = to_categorical(y_test_pre, num_classes=3)

# Balance the labels
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

The max length is:  36


# Baseline Model (Dense) ⚫

In [None]:
# construct the model architecture
inputs = layers.Input(shape=(max_length,))
x = layers.Embedding(input_dim=10000, output_dim=256, input_length=max_length)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(256, activation="relu")(x)
x = layers.Dense(64, activation="relu")(x)
outputs = layers.Dense(3, activation="softmax")(x)
model = models.Model(inputs, outputs)

# compile the model
model.compile(loss="categorical_crossentropy",
              optimizer=tf.keras.optimizers.Adam(),
              metrics=["accuracy"])

model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 36)]              0         
                                                                 
 embedding_4 (Embedding)     (None, 36, 256)           2560000   
                                                                 
 dropout_8 (Dropout)         (None, 36, 256)           0         
                                                                 
 global_average_pooling1d_4   (None, 256)              0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dropout_9 (Dropout)         (None, 256)               0         
                                                                 
 dense_12 (Dense)            (None, 256)               65792     
                                                           

In [None]:
# train the model
model_history_hf = model.fit(X_resampled, y_resampled,
          validation_data=(X_test, y_test),
          epochs=100,
          batch_size=128,
          callbacks=[tf.keras.callbacks.ModelCheckpoint(
                        filepath="dense_model_checkpoints/model-{epoch:02d}-{accuracy:.3f}.hdf5",
                        monitor="val_loss",
                        mode="min",
                        save_best_only=True,
                        verbose=1
                    ),
                    # learning rate drop
                    tf.keras.callbacks.ReduceLROnPlateau(
                        monitor="val_loss",
                        mode="min",
                        factor=0.1,
                        patience=5,
                        verbose=1,
                        min_lr=0.000001
                    ),
                    # early stopping
                    tf.keras.callbacks.EarlyStopping(
                        monitor="val_loss",
                        mode="min",
                        min_delta=0.001,
                        patience=10,
                        verbose=1,
                    ),
                    # CSV logger
                    tf.keras.callbacks.CSVLogger(
                        filename="dense_model_training_log.csv",
                        separator=",",
                        append=False
                    )])

Epoch 1/100
Epoch 1: val_loss improved from inf to 1.04037, saving model to dense_model_checkpoints/model-01-0.384.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 1.04037 to 0.81521, saving model to dense_model_checkpoints/model-02-0.531.hdf5
Epoch 3/100
Epoch 3: val_loss did not improve from 0.81521
Epoch 4/100
Epoch 4: val_loss improved from 0.81521 to 0.65927, saving model to dense_model_checkpoints/model-04-0.614.hdf5
Epoch 5/100
Epoch 5: val_loss did not improve from 0.65927
Epoch 6/100
Epoch 6: val_loss improved from 0.65927 to 0.61143, saving model to dense_model_checkpoints/model-06-0.686.hdf5
Epoch 7/100
Epoch 7: val_loss did not improve from 0.61143
Epoch 8/100
Epoch 8: val_loss improved from 0.61143 to 0.57769, saving model to dense_model_checkpoints/model-08-0.752.hdf5
Epoch 9/100
Epoch 9: val_loss did not improve from 0.57769
Epoch 10/100
Epoch 10: val_loss improved from 0.57769 to 0.56213, saving model to dense_model_checkpoints/model-10-0.816.hdf5
Epoch 11/100
Epoch 11:

In [None]:
best_dense_model = tf.keras.models.load_model("/content/dense_model_checkpoints/model-10-0.816.hdf5")

# predict
model_1_preds_probs = best_dense_model.predict(X_test)
# convert model prediction probabilities to label format
model_1_preds = tf.squeeze(tf.round(model_1_preds_probs))
# calculate model_1 results
model_1 = calculate_results(y_test, model_1_preds)



# Model 2 (LSTM) ⬛

In [None]:
# construct the model architecture
inputs = layers.Input(shape=(max_length,))
x = layers.Embedding(input_dim=10000, output_dim=256, input_length=max_length)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.LSTM(256, activation="relu", return_sequences=True)(x)
x = layers.Dropout(0.5)(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dense(128, activation="relu")(x)
outputs = layers.Dense(3, activation="softmax")(x)

model_lstm = models.Model(inputs, outputs)

# compile the model
model_lstm.compile(loss="categorical_crossentropy",
              optimizer=tf.keras.optimizers.Adam(),
              metrics=["accuracy"])

model_lstm.summary()



Model: "model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_13 (InputLayer)       [(None, 36)]              0         
                                                                 
 embedding_12 (Embedding)    (None, 36, 256)           2560000   
                                                                 
 dropout_24 (Dropout)        (None, 36, 256)           0         
                                                                 
 lstm_7 (LSTM)               (None, 36, 256)           525312    
                                                                 
 dropout_25 (Dropout)        (None, 36, 256)           0         
                                                                 
 global_average_pooling1d_12  (None, 256)              0         
  (GlobalAveragePooling1D)                                       
                                                           

In [None]:
# train the model
model_history_lstm = model_lstm.fit(X_resampled, y_resampled,
          validation_data=(X_test, y_test),
          epochs=100,
          batch_size=128,
          callbacks=[tf.keras.callbacks.ModelCheckpoint(
                        filepath="lstm_model_checkpoints/model-{epoch:02d}-{accuracy:.3f}.hdf5",
                        monitor="val_loss",
                        mode="min",
                        save_best_only=True,
                        verbose=1
                    ),
                    # learning rate drop
                    tf.keras.callbacks.ReduceLROnPlateau(
                        monitor="val_loss",
                        mode="min",
                        factor=0.1,
                        patience=5,
                        verbose=1,
                        min_lr=0.000001
                    ),
                    # early stopping
                    tf.keras.callbacks.EarlyStopping(
                        monitor="val_loss",
                        mode="min",
                        min_delta=0.001,
                        patience=10,
                        verbose=1,
                    ),
                    # CSV logger
                    tf.keras.callbacks.CSVLogger(
                        filename="lstm_model_training_log.csv",
                        separator=",",
                        append=False
                    )])

Epoch 1/100
Epoch 1: val_loss improved from inf to 0.91801, saving model to lstm_model_checkpoints/model-01-0.405.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 0.91801 to 0.84704, saving model to lstm_model_checkpoints/model-02-0.561.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 0.84704 to 0.67463, saving model to lstm_model_checkpoints/model-03-0.606.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 0.67463 to 0.63436, saving model to lstm_model_checkpoints/model-04-0.635.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 0.63436 to 0.63214, saving model to lstm_model_checkpoints/model-05-0.648.hdf5
Epoch 6/100
Epoch 6: val_loss did not improve from 0.63214
Epoch 7/100
Epoch 7: val_loss did not improve from 0.63214
Epoch 8/100
Epoch 8: val_loss did not improve from 0.63214
Epoch 9/100
Epoch 9: val_loss improved from 0.63214 to 0.61571, saving model to lstm_model_checkpoints/model-09-0.716.hdf5
Epoch 10/100
Epoch 10: val_loss did not improve from 0.61571
Epoch 11/100
Epoch 11: val_l

In [None]:
best_lstm_model = tf.keras.models.load_model("/content/lstm_model_checkpoints/model-09-0.716.hdf5")

# predict
model_2_preds_probs = best_lstm_model.predict(X_test)
# convert model prediction probabilities to label format
model_2_preds = tf.squeeze(tf.round(model_2_preds_probs))
# calculate model_1 results
model_2 = calculate_results(y_test, model_2_preds)





# Model 3 (GRU) ⬛

In [None]:
# construct the model architecture
inputs = layers.Input(shape=(max_length, ))
x = layers.Embedding(input_dim=10000, output_dim=128, input_length=max_length)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.GRU(128,return_sequences=True)(x)
x = layers.Dropout(0.5)(x)
x = layers.GlobalAveragePooling1D()(x)
outputs = layers.Dense(3, activation="softmax")(x)
model_gru = models.Model(inputs, outputs)

# compile the model
model_gru.compile(loss="categorical_crossentropy",
              optimizer=tf.keras.optimizers.Adam(),
              metrics=["accuracy"])
model_gru.summary()

Model: "model_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_15 (InputLayer)       [(None, 36)]              0         
                                                                 
 embedding_14 (Embedding)    (None, 36, 128)           1280000   
                                                                 
 dropout_28 (Dropout)        (None, 36, 128)           0         
                                                                 
 gru_1 (GRU)                 (None, 36, 128)           99072     
                                                                 
 dropout_29 (Dropout)        (None, 36, 128)           0         
                                                                 
 global_average_pooling1d_14  (None, 128)              0         
  (GlobalAveragePooling1D)                                       
                                                          

In [None]:
# train the model
model_history_gru = model_gru.fit(X_resampled, y_resampled,
          validation_data=(X_test, y_test),
          epochs=50,
          batch_size=64,
          callbacks=[tf.keras.callbacks.ModelCheckpoint(
                        filepath="gru_model_checkpoints/model-{epoch:02d}-{accuracy:.3f}.hdf5",
                        monitor="val_loss",
                        mode="min",
                        save_best_only=True,
                        verbose=1
                    ),
                    # learning rate drop
                    tf.keras.callbacks.ReduceLROnPlateau(
                        monitor="val_loss",
                        mode="min",
                        factor=0.1,
                        patience=5,
                        verbose=1,
                        min_lr=0.000001
                    ),
                    # early stopping
                    tf.keras.callbacks.EarlyStopping(
                        monitor="val_loss",
                        mode="min",
                        min_delta=0.001,
                        patience=10,
                        verbose=1,
                    ),
                    # CSV logger
                    tf.keras.callbacks.CSVLogger(
                        filename="gru_model_training_log.csv",
                        separator=",",
                        append=False
                    )])

Epoch 1/50
Epoch 1: val_loss improved from inf to 0.96003, saving model to gru_model_checkpoints/model-01-0.420.hdf5
Epoch 2/50
Epoch 2: val_loss improved from 0.96003 to 0.69295, saving model to gru_model_checkpoints/model-02-0.579.hdf5
Epoch 3/50
Epoch 3: val_loss improved from 0.69295 to 0.66834, saving model to gru_model_checkpoints/model-03-0.624.hdf5
Epoch 4/50
Epoch 4: val_loss did not improve from 0.66834
Epoch 5/50
Epoch 5: val_loss did not improve from 0.66834
Epoch 6/50
Epoch 6: val_loss did not improve from 0.66834
Epoch 7/50
Epoch 7: val_loss did not improve from 0.66834
Epoch 8/50
Epoch 8: val_loss did not improve from 0.66834

Epoch 8: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/50
Epoch 9: val_loss did not improve from 0.66834
Epoch 10/50
Epoch 10: val_loss did not improve from 0.66834
Epoch 11/50
Epoch 11: val_loss did not improve from 0.66834
Epoch 12/50
Epoch 12: val_loss did not improve from 0.66834
Epoch 13/50
Epoch 13: val_loss did 

In [None]:
best_gru_model = tf.keras.models.load_model("/content/gru_model_checkpoints/model-07-0.661.hdf5")
# predict
model_3_preds_probs = best_gru_model.predict(X_test)
# convert model prediction probabilities to label format
model_3_preds = tf.squeeze(tf.round(model_3_preds_probs))
# calculate model_1 results
model_3 = calculate_results(y_test, model_3_preds)



# Model 4 (Bidirectional LSTM) ⬛

In [None]:
# construct the model architecture
inputs = layers.Input(shape=(max_length,))
x = layers.Embedding(input_dim=10000, output_dim=256, input_length=max_length)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.Bidirectional(layers.LSTM(128,return_sequences=True))(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dense(64, "relu")(x)
outputs = layers.Dense(3, activation="softmax")(x)
model_bd = models.Model(inputs, outputs)

# compile the model
model_bd.compile(loss="categorical_crossentropy",
              optimizer=tf.keras.optimizers.Adam(),
              metrics=["accuracy"])

model_bd.summary()

Model: "model_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_18 (InputLayer)       [(None, 36)]              0         
                                                                 
 embedding_17 (Embedding)    (None, 36, 256)           2560000   
                                                                 
 dropout_35 (Dropout)        (None, 36, 256)           0         
                                                                 
 bidirectional_2 (Bidirectio  (None, 36, 256)          394240    
 nal)                                                            
                                                                 
 global_average_pooling1d_17  (None, 256)              0         
  (GlobalAveragePooling1D)                                       
                                                                 
 dense_34 (Dense)            (None, 64)                164

In [None]:
# train the model
model_history_bd = model_bd.fit(X_resampled, y_resampled,
          validation_data=(X_test, y_test),
          epochs=8,
          batch_size=1,
          callbacks=[tf.keras.callbacks.ModelCheckpoint(
                        filepath="bd_model_checkpoints/model-{epoch:02d}-{accuracy:.3f}.hdf5",
                        monitor="val_loss",
                        mode="min",
                        save_best_only=True,
                        verbose=1
                    ),
                    # learning rate drop
                    tf.keras.callbacks.ReduceLROnPlateau(
                        monitor="val_loss",
                        mode="min",
                        factor=0.1,
                        patience=2,
                        verbose=1,
                        min_lr=0.000001
                    ),
                    # early stopping
                    tf.keras.callbacks.EarlyStopping(
                        monitor="val_loss",
                        mode="min",
                        min_delta=0.001,
                        patience=5,
                        verbose=1,
                    ),
                    # CSV logger
                    tf.keras.callbacks.CSVLogger(
                        filename="bd_model_training_log.csv",
                        separator=",",
                        append=False
                    )])

Epoch 1/8
Epoch 1: val_loss improved from inf to 0.59282, saving model to bd_model_checkpoints/model-01-0.556.hdf5
Epoch 2/8
Epoch 2: val_loss improved from 0.59282 to 0.54493, saving model to bd_model_checkpoints/model-02-0.646.hdf5
Epoch 3/8
Epoch 3: val_loss improved from 0.54493 to 0.53941, saving model to bd_model_checkpoints/model-03-0.741.hdf5
Epoch 4/8
Epoch 4: val_loss did not improve from 0.53941
Epoch 5/8
Epoch 5: val_loss did not improve from 0.53941

Epoch 5: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 6/8
Epoch 6: val_loss did not improve from 0.53941
Epoch 7/8
Epoch 7: val_loss did not improve from 0.53941

Epoch 7: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 8/8
Epoch 8: val_loss did not improve from 0.53941
Epoch 8: early stopping


In [None]:
best_bd_model = tf.keras.models.load_model("/content/bd_model_checkpoints/model-03-0.741.hdf5")

# predict
model_4_preds_probs = best_bd_model.predict(X_test)
# convert model prediction probabilities to label format
model_4_preds = tf.squeeze(tf.round(model_4_preds_probs))
# calculate model_1 results
model_4 = calculate_results(y_test, model_4_preds)



# Feature Extraction #1 ⬛

**A pre-trained sentence encoder (embedding) -> Universal Sentence Encoder (USE)**

**Resources**
* If you'd want to deploy the model for mobile, a lite-version of any pre-trained model would do the work. If otherwise, stick with the large ones.
* https://tfhub.dev/google/universal-sentence-encoder/4

**Import Notes:**
* Do not import tensorflow_hub outside the scope of hub.load(""). If run outside, for some unknown reason the packages throws an error.    
* leave the input_shape as a blank list, because it has a pre-defined input_shape that we have no idea of (probably it has an input shape of 512)

In [None]:
import tensorflow_hub as hub
embedding_fn = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

In [None]:
# Create a Keras Layer using the USE pre-trained layer from tensorflow-hub
sentence_encoder_layer = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4",
                                        input_shape=[], # check the important notes
                                        dtype=tf.string,
                                        trainable=False,
                                        name="USE_encoder")

In [None]:
# Run this only once, otherwise the shape of the label data would look like (0.9*len(y_train),3,3).
y_train_pre = to_categorical(y_train_pre,num_classes=3)
y_test_pre = to_categorical(y_test_pre,num_classes=3)

In [None]:
print(y_train_pre.shape, y_test_pre.shape)
print(X_train_pre.shape, X_test_pre.shape)

(2037, 3) (227, 3)
(2037,) (227,)


In [None]:
# create model using the Sequential API
model_use = tf.keras.Sequential([
    sentence_encoder_layer,
    layers.Dropout(0.5),
    layers.Dense(512, activation="relu"),
    layers.Dropout(0.25),
    layers.Dense(512, activation="relu"),
    layers.Dense(3, activation="softmax")
])

# Compile
model_use.compile(loss= "categorical_crossentropy",
                  optimizer=tf.keras.optimizers.Adam(0.0068),
                  metrics=["accuracy"])
model_use.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 USE_encoder (KerasLayer)    (None, 512)               256797824 
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 512)               262656    
                                                                 
 dropout_1 (Dropout)         (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 512)               262656    
                                                                 
 dense_2 (Dense)             (None, 3)                 1539      
                                                                 
Total params: 257,324,675
Trainable params: 526,851
Non-

In [None]:
# train the model with USE embedding
model_history_use = model_use.fit(X_train_pre, y_train_pre,
                                  validation_data=(X_test_pre, y_test_pre),
                                  epochs=12,
                                  callbacks=[create_tensorboard_callback(SAVE_DIR,"model_use")])

Saving TensorBoard log files to: model_logs/model_use/20230604-205718
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [None]:
# Define the custom layer
class USEEncoderLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(USEEncoderLayer, self).__init__(**kwargs)
        self.use_layer = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4",
                                        input_shape=[],  # check the important notes
                                        dtype=tf.string,
                                        trainable=False,
                                        name="USE_encoder")

    def call(self, inputs, **kwargs):
        return self.use_layer(inputs)

# Register the custom layer
custom_objects = {"USEEncoderLayer": USEEncoderLayer, "KerasLayer": hub.KerasLayer}

# Load the model with custom layer
with tf.keras.utils.custom_object_scope(custom_objects):
    best_use_model = tf.keras.models.load_model("/content/use_model_checkpoints/model-01-0.743.hdf5")

# predict
model_5_preds_probs = best_use_model.predict(X_test_pre)
# convert model prediction probabilities to label format
model_5_preds = tf.squeeze(tf.round(model_5_preds_probs))
# calculate model_1 results
model_5 = calculate_results(y_test_pre, model_5_preds)



# Model Performance Comparison ☑

In [None]:
all_model_results = pd.DataFrame({"Dense":model_1,
                                  "LSTM":model_2,
                                  "GRU":model_3,
                                  "Bidirectional LSTM":model_4,
                                  "Pre-trained":model_5}).T
all_model_results["accuracy"] = round(all_model_results["accuracy"] / 100,3)
all_model_results.sort_values(by="accuracy",ascending=False)

Unnamed: 0,accuracy,f1,precision,recall
Pre-trained,0.784,0.823,0.895,0.784


# Model Testing 🧪

In [None]:
# Define the pre-processing function
def pre_process2(text):
    # Convert text to lowercase
    text = text.lower()

    # Remove numbers
    text = re.sub(r'\d+', '', text)

    # Remove punctuation
    punctuation = string.punctuation.replace("'", "")
    text = text.translate(str.maketrans('', '', punctuation))

    # Remove $
    punctuation = string.punctuation.replace("$", "")
    text = text.translate(str.maketrans('', '', punctuation))

    # Remove stop words
    nltk.download('stopwords')
    stop_words = set(stopwords.words('english'))
    text = " ".join(word for word in text.split() if word not in stop_words)

    return text

# Function to make prediction on new text
def predict_sentiment(text):
    # Preprocess the text
    processed_text = pre_process2(text)

    # Tokenize the preprocessed text
    text_seq = tokenizer.texts_to_sequences([processed_text])

    # Pad the sequence
    text_seq_padded = pad_sequences(text_seq, maxlen=max_length)

    # Make prediction
    prediction = model.predict(text_seq_padded)[0]

    return prediction

# Get user input
text_input = input("Enter the text: ")

# Make prediction
prediction = predict_sentiment(text_input)
print("Sentiment Prediction:", prediction)

Enter the text: "However, the Euro 7 proposal is simply not the right way to do this, as it would have an extremely low environmental impact at an extremely high cost."
Sentiment Prediction: [0.32501534 0.06486119 0.61012346]


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
# Function to make prediction on new text
def predict_sentiment2(text):

    # Make prediction
    prediction = tf.squeeze(model_use.predict([text]))

    return prediction

# Get user input
text_input = input("Enter the text: ")

# Make prediction
prediction = predict_sentiment2(text_input)
print("Sentiment Prediction:", prediction)

# Save Model

In [None]:
# Save Model & Pipeline
import joblib
model_file = open("sentiment_classifier_model.pkl","wb")
joblib.dump(model, model_file)
model_file.close()

In [None]:
import joblib
import tensorflow as tf
import tensorflow_hub as hub

# Define the custom layer
class USEEncoderLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(USEEncoderLayer, self).__init__(**kwargs)
        self.use_layer = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4",
                                        input_shape=[],  # check the important notes
                                        dtype=tf.string,
                                        trainable=False,
                                        name="USE_encoder")

    def call(self, inputs, **kwargs):
        return self.use_layer(inputs)

# Register the custom layer
custom_objects = {"USEEncoderLayer": USEEncoderLayer, "KerasLayer": hub.KerasLayer}

# Load the model with custom layer
with tf.keras.utils.custom_object_scope(custom_objects):
    best_use_model = tf.keras.models.save_model(model_use,"/content/model_use.hdf5")

In [None]:
# Load the model with custom layer
with tf.keras.utils.custom_object_scope(custom_objects):
    best_use_model = tf.keras.models.load_model("/content/model_use.hdf5")

Enter the text: Enter the text: mic outlook, especially in China, achieving this means shouldering the burden of cuts. The rest of the 23-nation group offered no additional action to buttress the current market, but did pledge to maintain their existing cuts until the end of 2024.
Sentiment Prediction: tf.Tensor([0.00611251 0.8095137  0.18437389], shape=(3,), dtype=float32)
