# Sentiment Analysis with BERT
Setting Environment

In [None]:
# install ktrain on Google Colab
!pip install --upgrade keras_nlp tensorflow
!pip install --upgrade protobuf

In [None]:
import pandas as pd
import numpy as np
import keras_nlp
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical

## 1. Import Data
* Credits: IMDB Training data taken from https://github.com/Ankit152/IMDB-sentiment-analysis

In [None]:
data_train = pd.read_csv('https://raw.githubusercontent.com/Ankit152/IMDB-sentiment-analysis/master/IMDB-Dataset.csv')

X_train = data_train[0:1000].review.tolist()
y_train = data_train[0:1000].sentiment.tolist()

#print('size of training set: %s' % (len(data_train['review'])))
print(data_train[0:10000].review.value_counts())

#print(data_train.head(10))

In [None]:
encoding = {
    'positive': 0,
    'negative': 1
}

# Integer values for each class
y_train = [encoding[x] for x in y_train]


## 2. Data preprocessing and Training

* BERT can handle a maximum length of 512

In [None]:
preprocessor = keras_nlp.models.BertPreprocessor.from_preset("bert_tiny_en_uncased_sst2")
classifier = keras_nlp.models.BertClassifier.from_preset("bert_tiny_en_uncased_sst2", num_classes=2, preprocessor=preprocessor)
#classifier.compile(loss=keras.losses.CategoricalCrossentropy(),)
print(X_train[0:3])
print(y_train[0:3])
message = ['I went for a 5Km walk.']
prediction = classifier.predict(message)
print(prediction)
classifier.fit(x=X_train, y=y_train, batch_size=10, epochs=2)
message = ['I just lost my job.']
prediction = classifier.predict(message)
print(prediction)

message = ['What an amazing weather it is today.']
prediction = classifier.predict(message)
print(prediction)

message = ['I was kicked out of my home at night']
prediction = classifier.predict(message)
print(prediction)

message = ['I went for a 5Km walk.']
prediction = classifier.predict(message)
print(prediction)


In [None]:
classifier.fit(x=X_train, y=y_train, batch_size=10, epochs=5)

#### Testing with other inputs

In [None]:
message = ['I just lost my job.']
prediction = classifier.predict(message)
print(prediction)

message = ['What an amazing weather it is today.']
prediction = classifier.predict(message)
print(prediction)

message = ['I was kicked out of my home at night']
prediction = classifier.predict(message)
print(prediction)

message = ['I went for a 5Km walk.']
prediction = classifier.predict(message)
print(prediction)

[[-1.3162708  1.394291 ]]
[[ 0.05885917 -0.22809342]]
[[-0.950709   1.0482234]]
[[ 0.7050017  -0.69958615]]


## 4. Saving the model


In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [None]:
!ls /content/gdrive/MyDrive

In [None]:
path = F"/content/gdrive/MyDrive/bert_model_sentiment_kerasNLP"
classifier.save(path)

In [None]:
from keras.models import load_model
# returns the saved model
model = load_model(path)


In [None]:
message = ['I just lost my job.']
prediction = model.predict(message)
print(prediction)

message = ['What an amazing weather it is today.']
prediction = model.predict(message)
print(prediction)

message = ['I was kicked out of my home at night']
prediction = model.predict(message)
print(prediction)

message = ['I went for a 5Km walk.']
prediction = model.predict(message)
print(prediction)