# General Imports

In [None]:
#General Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle 
from os.path import join
import multiprocessing
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tqdm import tqdm

In [None]:
# !pip uninstall numpy -y
# !pip install numpy

# Data Downsampling and Preprocessing



In [None]:
#Preprocessing related imports 
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.stem import WordNetLemmatizer
import gensim.parsing.preprocessing as gpp
import gensim.utils as gu

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
#Load full dataset 
data_dir = "/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/amazon_review_full_csv"
data = pd.read_csv(join(data_dir, "train.csv"), header=None, names=['Rating', 'Title', 'Review'])
display(data)

In [None]:
# Check distribution of rating values as this is likely our target variable
data["Rating"].value_counts()


## Preprocessing



We first preprocess our entire dataset by applying the following transformations to the textual data:

    Stripping HTML Tags (gpp.strip_tags)
    Removing all Punctuation (gpp.strip_punctuation)
    Removing all extra whitespaces (gpp.strip_multiple_whitespaces)
    Removing all numerics (gpp.strip_numeric)
    Removing stopwords(gpp.remove_stopwords)
    Removing words shorter than 3 letters (gpp.strip_short)

Following this initial pre-processing, we also then lemmatize all the words in the reviews to produce lemmatized strings.


In [None]:


def preprocess_text(text):
    """Preprocesses a given string text input"""
    preprocs = [
        gpp.strip_tags, 
        gpp.strip_punctuation,
        gpp.strip_multiple_whitespaces,
        gpp.strip_numeric,
        gpp.remove_stopwords, 
        gpp.strip_short, 
    ]
    text = gu.to_unicode(text.lower().strip())
    for preproc in preprocs:
        text = preproc(text)
    return text

def lemmatize(text):
    """Lemmatizes a given string text input"""
    wnl = WordNetLemmatizer()
    return wnl.lemmatize(text)  



In [None]:
# Combining both the above functions into a single preprocessing function
preprocess = lambda text: lemmatize(preprocess_text(str(text)))



Before we apply the preprocessing, we notice that the dataset has two columns with textial data: the title of the review and the review itself. As the title of the data also indicates the feelings of the user towards the product and is essentially a summarization of the review it is also informative for predicting user rating. Therefore, we create a new feature "ReviewFull" which is a concatenation of the review title as well as the review itself, and use this as our primary data for EDA and model training.


In [None]:
# Create the ReviewFull data column
data["ReviewFull"] = data["Title"] + " " + data["Review"]
data = data.drop(["Title", "Review"], axis=1)

# Apply the preprocessing to the textual data
data["ReviewFull"] = data["ReviewFull"].apply(preprocess)
data.head()


In [None]:
# Save the data
data.to_csv(join(data_dir, "preprocessed_train.csv"))


## Downsampling




We now create a smaller dataset which my computer can process when doing EDA and modeling. We downsample to a dataset size of 50000 data points, ensuring that there is an even distribution of ratings by grouping by the "Rating" column when sampling

In [None]:


downsampled = data.groupby("Rating").sample(10000)
display(downsampled)



In [None]:
# Ensure equal distribution of targets
downsampled["Rating"].value_counts()

In [None]:
# Save data
downsampled.to_csv(join(data_dir, "downsampled_preprocessed_train_50000.csv"))


# Exploratory Data Analysis

Exploratory data analysis to investigate the nature of the textual features and discover potential relationships between the features and the target variable that could aid in the prediction of ratings.


In [None]:
# EDA related imports
from collections import Counter
from textblob import TextBlob
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF

In [None]:
# Load the downsampled preprocessed data
data = pd.read_csv(join(data_dir, "downsampled_preprocessed_train_50000.csv"))[["Rating", "ReviewFull"]]
data["ReviewFull"] = data["ReviewFull"].apply(str)
display(data)


## Sentiment Polarity - Rating Analysis

First we attempt to find whether or not any relationship exists between the sentiment polarity of a review and the rating. We do so by observing how to average sentiment of reviews changes with rating


In [None]:
# Define function to extract a string's sentiment
find_sentiment = lambda text: TextBlob(text).sentiment.polarity

# Create new column in dataframe
data["sentiment"] = data["ReviewFull"].apply(find_sentiment)

In [None]:
# Plot mean sentiment against rating 
for rating in range(1,6):
    print(f"Mean Sentiment for Rating {rating}: {data[data['Rating'] == rating]['sentiment'].mean()}")
data.groupby("Rating")["sentiment"].mean().plot(kind="bar")

In [None]:
plt.scatter(data["sentiment"], data["Rating"])
plt.xlabel("Sentiment")
plt.ylabel("Rating")
plt.show()


As we can see from the above plot, though we see a positive correlation with the mean sentiment of the reviews, when looking at the data at an individual level, we see that there is a lot of variance and overlap between the sentiment values of reviews of different ratings. We can investigate further by seeing what the most common words that contirbute to this sentiment are.

In [None]:


# Function for finding n nost common words in a series
num_most_common = 25
find_n_most_common = lambda text, n: Counter(' '.join(text).split()).most_common(n)
most_common = data.groupby("Rating")["ReviewFull"].apply(lambda x: find_n_most_common(x, num_most_common))

for rating in range(1,6):
    idx = rating - 1
    print(f"Most Common Words for Rating {rating} sorted by sentiment: ")   
    # Display the most common words in each rating sorted by sentiment 
    print(list(sorted(most_common.iloc[idx], key=lambda x: TextBlob(x[0]).sentiment.polarity)))
    print("\n")

    





From the above cell, we see the lower rating reviews have many occurences of negative words such as "bad", "waste", and "terrible" whereas higher rating reviews have many occurences of positive words such as "best" and "excellent". However, we see that there are some words of the same sentiment that occur numerous times in reviews from all the ratings, like "better" and "good".

When finding the sentiment of a sentence using TextBlob as done above, the sentiment is calculated by a simple averaging of the sentiments of the individual words of the sentence. However, this approach does not accurately represent the differences in sentiment of two senetences that use similar words but in different contexts.

For instance, the sentence "the movie was better than most other" and the sentence "the movie could have been much better" both use similar words such as "better" but have completely different sentiments.

This dicovery shows us that when conducting sentiment analysis and attempting to predict ratings, we must use a encoding of the review which captures its contextual meaning.



## Topic Modeling

We now will attempt to use an unsupervised topic modeling approach using non-negative matrix factorization to try and categorize reviews into 5 groups based on their content and try to see whether or not the general topics of the reviews are in any way correlated with their corresponding ratings.


In [None]:
def display_topics(model, words, num_top_words):
    """Function to display the top num_top_words topic words given an NMF model and word vocabulary"""
    for topic_idx, topic in enumerate(model.components_):
        print("Topic %d:" % (topic_idx))
        print(" ".join([words[i]
                          for i in topic.argsort()[:-num_top_words - 1:-1]]))





The train dataset we use for the NMF model will be the TF-IDF vectors of each individual review in the dataset. We restrict the max features (vocabulary size) computed by the TF-IDF vectorizer in order to reduce redundancy and for performance reasons.


In [None]:
tfidf_vectorizer = TfidfVectorizer(max_df=0.90, min_df=25, max_features=5000, use_idf=True)
tfidf = tfidf_vectorizer.fit_transform(data["ReviewFull"])

# Create document term matrix showing the TF-IDF score for each word in each review
tfidf_words = tfidf_vectorizer.get_feature_names()
doc_term_matrix = pd.DataFrame(tfidf.toarray(), columns=list(tfidf_words))
doc_term_matrix

In [None]:
# Fit the NMF model and generate top 5 topics
num_top_words = 10
num_topics = 5
nmf = NMF(n_components=num_topics, random_state=0, alpha=.1, init='nndsvd').fit(tfidf)
display_topics(nmf, tfidf_words, num_top_words)


In [None]:
num_top_words = 10
num_topics = 10
nmf = NMF(n_components=num_topics, random_state=0, alpha=.1, init='nndsvd').fit(tfidf)
display_topics(nmf, tfidf_words, num_top_words)




# Generating BERT Embeddings

As we found from our exploratory data analysis, the textual content and raw sentiment of the reviews is indicative of its corresponding rating. However, we found out that we cannot simply use raw sentiment as training data as it does not capture any contextual information regarding the review. Therefore, we must find a way to encode our data in a way which captures both sentiment and context.

We can create this encoding by leveraging the power of transfer learning and using a pre-trained SOTA deep neural network model: BERT.


In [None]:
# !pip install tensorflow==2.4.1
# !pip install tensorflow-text==2.4.1


In [None]:
# BERT Specific Imports
import tensorflow as tf
import tensorflow_hub as hub 
import tensorflow_text as text 

In [None]:
# Preprocessing layer to generate the tokenized sentences and input mask
bert_preprocess = hub.KerasLayer('https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3')
# Encoder layer which generates word-level and setence-level 768-dimensional text embeddings 
bert = hub.KerasLayer('https://tfhub.dev/google/experts/bert/wiki_books/sst2/2')

In [None]:
data_dir='/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/amazon_review_full_csv'

In [None]:
data = pd.read_csv(join(data_dir, "downsampled_preprocessed_train_50000.csv"), names=['Rating', 'Title', 'Review'])
data = data.iloc[1:, :].reset_index(drop=True)
data["Rating"] = data["Rating"].apply(int) 
data 


Unnamed: 0,Rating,Title,Review
0,2831949,1,awkward use purchased mouse days works mouse u...
1,20701,1,unfortunatley grades william johnstone book bu...
2,282232,1,dissapointed ordered product went listen casse...
3,1467821,1,color blue clear scratched looked new solid bl...
4,426502,1,zgun digest smith wesson reccomend book incomp...
...,...,...,...
49995,1163757,5,thich provides easy access buddhist practice b...
49996,1551315,5,garmin strret pilot gps totally amazed great w...
49997,657152,5,far favorite album waiting patiently album rel...
49998,609342,5,read book blew away heard dss knew state depar...


In [None]:
# Create train, val, and test sets
train_data = shuffle(data)[:10000]
X = train_data["Review"].to_numpy()
y = train_data["Rating"].to_numpy() - 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)
print(f"X_train: {X_train.shape} | X_val: {X_val.shape} | X_test: {X_test.shape} | \n" +
    f"y_train: {y_train.shape} | y_val: {y_val.shape} | y_test: {y_test.shape} | ")



X_train: (7200,) | X_val: (1800,) | X_test: (1000,) | 
y_train: (7200,) | y_val: (1800,) | y_test: (1000,) | 


In [None]:
# Generate train embeddings
def generate_bert_embeddings(data):
    """Generate the BERT embeddings for a given Series/list of senetences"""
    return bert(bert_preprocess(data))['pooled_output'] 

def generate_embeddings_list(data):
    """Generate embeddings of parts of list individually and then concatenate. 
    Create to overcome performance issues."""
    factor = int(data.shape[0]/100)
    embeddings_list = []
    for i in tqdm(range(0, 100)):
        embeddings_list.append(generate_bert_embeddings(X[factor*i: factor*(i+1)]))
    return embeddings_list
    
el = generate_embeddings_list(X_train)
embeddings = tf.stack(el)
embeddings


100%|██████████| 100/100 [54:17<00:00, 32.58s/it]


<tf.Tensor: shape=(100, 72, 768), dtype=float32, numpy=
array([[[-0.1948052 , -0.95507926, -0.24179976, ...,  0.05045499,
          0.2423901 , -0.03863095],
        [ 0.35723734, -0.0274303 ,  0.4144501 , ...,  0.2758953 ,
          0.62082785, -0.86638427],
        [ 0.14349917, -0.22837189, -0.23407343, ...,  0.611253  ,
         -0.14926855, -0.7794769 ],
        ...,
        [ 0.44140217, -0.67050815,  0.47997528, ...,  0.5887903 ,
         -0.13771066, -0.6668596 ],
        [ 0.04778063, -0.17479178, -0.5822535 , ..., -0.78787553,
         -0.97851884,  0.371748  ],
        [-0.10331483, -0.31313476, -0.6607549 , ...,  0.58495533,
         -0.56560117,  0.2668065 ]],

       [[ 0.13625519, -0.75547063, -0.21902573, ..., -0.06331038,
         -0.6633688 , -0.03453353],
        [ 0.48480302,  0.11011658, -0.31587446, ..., -0.39897922,
          0.8270069 , -0.1672962 ],
        [ 0.5322556 , -0.03090397,  0.54893243, ..., -0.10614056,
          0.63226426, -0.7130269 ],
        ...

In [None]:
embeddings = tf.reshape(embeddings, (7200, 768))

In [None]:
# Save train embeddings
import pickle
pickle.dump(embeddings, open(join(data_dir, "downsampled_shuffled_train_embeddings.pkl"), "wb"))
pickle.dump(y_train, open(join(data_dir, "downsampled_shuffled_train_labels.pkl"), "wb"))

In [None]:


# Generate and save validation and test embeddings 
print("Generating val data...")
val_embeddings = tf.reshape(tf.stack(generate_embeddings_list(X_val)), (1800, 768))
print("Generating test data...")
test_embeddings = tf.reshape(tf.stack(generate_embeddings_list(X_test)), (1000, 768))

pickle.dump(val_embeddings, open(join(data_dir, "downsampled_shuffled_val_embeddings.pkl"), "wb"))
pickle.dump(y_val, open(join(data_dir, "downsampled_shuffled_val_labels.pkl"), "wb"))

pickle.dump(test_embeddings, open(join(data_dir, "downsampled_shuffled_test_embeddings.pkl"), "wb"))
pickle.dump(y_test, open(join(data_dir, "downsampled_shuffled_test_labels.pkl"), "wb"))



Generating val data...


100%|██████████| 100/100 [14:16<00:00,  8.56s/it]


Generating test data...


100%|██████████| 100/100 [08:24<00:00,  5.05s/it]



# Classical ML Models Benchmark

This notebook contains attempts to solve the problem of predicting ratings will classical ML models which support multinomial classification. The scores achieved by these models will serve as a benchmark for the deep neural network based approach


In [None]:
# Classifier Imports
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, KFold


SEED = 0
CPU_COUNT = multiprocessing.cpu_count()

In [None]:
CPU_COUNT

2

In [None]:
# Wrapper class for a general SKLearn classifier
class Classifier():
    def __init__(self, classifier_name, classifier, init_params, param_grid, seed):
        self.classifier_name = classifier_name
        self.seed = seed
        self.param_grid = param_grid

        #Init classifier
        self.init_params = init_params
        self.init_params["random_state"] = seed
        self.classifier = classifier(**self.init_params) if init_params else classifier(random_state=seed)

        #Dict to explicitly store best stats
        self.best_stats = {"best_params": None, "best_score": None}
    
    def fit(self, X, y):
        print(f"Fitting {self.classifier_name} model...")
        self.classifier.fit(X, y)

    def predict(self, X):
        return self.classifier.predict(X)
    
    def evaluate(self, X_test, y_test):
        return self.classifier.score(X_test, y_test)
    
    def tune_hyperparameters(self, X, y):
        print(f"Tuning hyperparameters for {self.classifier_name} model...")
        cv = KFold(n_splits=5, random_state=self.seed, shuffle=True)
        gscv = GridSearchCV(self.classifier, self.param_grid, scoring="accuracy", cv=cv, n_jobs=-1)
        gscv.fit(X, y)
        self.classifier = gscv.best_estimator_
        self.best_stats["best_params"], self.best_stats["best_score"] = gscv.best_params_, gscv.best_score_

In [None]:
data_dir = "/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/amazon_review_full_csv"


# Loading train, val, and test data (BERT text embeddings and corresponding labels)

X_train = pickle.load(open(join(data_dir, "downsampled_shuffled_train_embeddings.pkl"), "rb")).numpy()
y_train = pickle.load(open(join(data_dir, "downsampled_shuffled_train_labels.pkl"), "rb"))
X_val = pickle.load(open(join(data_dir, "downsampled_shuffled_val_embeddings.pkl"), "rb")).numpy()
y_val = pickle.load(open(join(data_dir, "downsampled_shuffled_val_labels.pkl"), "rb"))
X_test = pickle.load(open(join(data_dir, "downsampled_shuffled_test_embeddings.pkl"), "rb")).numpy()
y_test = pickle.load(open(join(data_dir, "downsampled_shuffled_test_labels.pkl"), "rb"))

# Combine train and validation set into one as we use K-Fold cross validation
X_train = np.concatenate([X_train, X_val])
y_train = np.concatenate([y_train, y_val])

print(f"X_train: {X_train.shape} | X_test: {X_test.shape} | \n" +
    f"y_train: {y_train.shape} | y_test: {y_test.shape} | ")



X_train: (9000, 768) | X_test: (1000, 768) | 
y_train: (9000,) | y_test: (1000,) | 


In [None]:
# Create list of classifiers
SEED = 0
param_grids = []

# Create parameter grids for hyperparameter tuning
rf_param_grid = {"max_features": ["sqrt", "log2"],
                    "max_depth" : [3, 6, 8],
                    "criterion" :["gini", "entropy"]     ,
                    "n_jobs": [-1]}

lsvc_param_grid = {"penalty": ["l2"],
                   "C": [0.0001, 0.01, 1.0, 10, 100]}

lreg_param_grid = {'penalty' : ['l1', 'l2'],
                     'C' : np.logspace(-4, 4, 20)}

clf_names = ["RandomForest", "LinearSVC", "LogisticRegression"]
clfs = [RandomForestClassifier, LinearSVC, LogisticRegression]
init_params = [{'n_jobs': CPU_COUNT}, {'multi_class': 'crammer_singer'}, {'multi_class': 'multinomial', 'solver': 'lbfgs'}]
param_grids.extend([rf_param_grid, lsvc_param_grid, lreg_param_grid])


classifiers = [Classifier(name, model, {}, param_grid, SEED) for name, model, param_grid in zip(clf_names, clfs, param_grids)]



In [None]:
[clf.fit(X_train, y_train) for clf in classifiers]

Fitting RandomForest model...


In [None]:
# Score classifier
[clf.evaluate(X_test, y_test) for clf in classifiers]

In [None]:
# Tune hyperparameters
[clf.tune_hyperparameters(X_train, y_train) for clf in classifiers]

In [None]:
# Score tuned clasifiers
[clf.evaluate(X_test, y_test) for clf in classifiers]


In [None]:
# Save models
models_dir = "models/"
for clf in classifiers:
    pickle.dump(clf, open(join(models_dir, f"{clf.classifier_name}.pkl"), "wb")) 

# BERT MODEL

In [None]:
pip install tensorflow-text

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-text
  Downloading tensorflow_text-2.9.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)
[K     |████████████████████████████████| 4.6 MB 21.2 MB/s 
Collecting tensorflow<2.10,>=2.9.0
  Downloading tensorflow-2.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (511.7 MB)
[K     |████████████████████████████████| 511.7 MB 6.5 kB/s 
Collecting keras<2.10.0,>=2.9.0rc0
  Downloading keras-2.9.0-py2.py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 60.1 MB/s 
Collecting tensorflow-estimator<2.10.0,>=2.9.0rc0
  Downloading tensorflow_estimator-2.9.0-py2.py3-none-any.whl (438 kB)
[K     |████████████████████████████████| 438 kB 74.4 MB/s 
[?25hCollecting gast<=0.4.0,>=0.2.1
  Downloading gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting flatbuffers<2,>=1.12
  Downloading flatbuffers-1.12-py2.py3-none-any.whl (15 kB)

In [None]:
# General Imports
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from os.path import join

# NN-related imports
import tensorflow as tf
import tensorflow_hub as hub 
import tensorflow_text as text 

print(tf.test.is_built_with_cuda())
print(tf.config.list_physical_devices('GPU'))


True
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
bert_preprocess = hub.KerasLayer('https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3')
bert = hub.KerasLayer('https://tfhub.dev/google/experts/bert/wiki_books/sst2/2')

In [None]:
data_dir = "/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/amazon_review_full_csv"
data = pd.read_csv(join(data_dir, "downsampled_preprocessed_train_50000.csv"))[["Rating", "ReviewFull"]]
data["ReviewFull"] = data["ReviewFull"].apply(str)
display(data)

Unnamed: 0,Rating,ReviewFull
0,1,awkward use purchased mouse days works mouse u...
1,1,unfortunatley grades william johnstone book bu...
2,1,dissapointed ordered product went listen casse...
3,1,color blue clear scratched looked new solid bl...
4,1,zgun digest smith wesson reccomend book incomp...
...,...,...
49995,5,thich provides easy access buddhist practice b...
49996,5,garmin strret pilot gps totally amazed great w...
49997,5,far favorite album waiting patiently album rel...
49998,5,read book blew away heard dss knew state depar...


In [None]:


def build_model():
    """Build model with custom classifier stacked on top of BERT encoder"""
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='txt_input')
    bert_input = bert_preprocess(text_input)
    bert_output = bert(bert_input)
    clf_input = bert_output['pooled_output']
    clf = tf.keras.layers.Dropout(0.1)(clf_input) 
    clf = tf.keras.layers.Dense(384, activation='sigmoid', kernel_regularizer='l2')(clf)
    clf = tf.keras.layers.Dropout(0.1)(clf)
    clf = tf.keras.layers.Dense(5, activation='sigmoid', name='clf')(clf)
    return tf.keras.Model(text_input, clf)

model = build_model() 



In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 txt_input (InputLayer)         [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_mask': (Non  0           ['txt_input[0][0]']              
                                e, 128),                                                          
                                 'input_type_ids':                                                
                                (None, 128),                                                      
                                 'input_word_ids':                                                
                                (None, 128)}                                                  

In [None]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [tf.metrics.SparseCategoricalAccuracy()]
optimizer = tf.keras.optimizers.Adam()

model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [None]:
train_data = shuffle(data)[:10000]
X = train_data["ReviewFull"].to_numpy()
y = train_data["Rating"].to_numpy() - 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)
print(f"X_train: {X_train.shape} | X_val: {X_val.shape} | X_test: {X_test.shape} | \n" +
    f"y_train: {y_train.shape} | y_val: {y_val.shape} | y_test: {y_test.shape} | ")

X_train: (7200,) | X_val: (1800,) | X_test: (1000,) | 
y_train: (7200,) | y_val: (1800,) | y_test: (1000,) | 


In [None]:
EPOCHS = 25
BATCH_SIZE = 64
history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_val, y_val))


In [None]:
tf.saved_model.save(model, "models/bert2")



In [None]:
model = tf.keras.models.load_model("models/bert2")

In [None]:
model.evaluate(X_test, y_test)