# Sentiment Analysis (using BERT)

In [1]:
import os.path
import numpy as np
import tensorflow as tf
import ktrain
from ktrain import text

In [2]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

## Data Loading

In [3]:
dataset = tf.keras.utils.get_file(fname="aclImdb_v1.tar.gz",
                                  origin="https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
                                  extract=True)
IMDB_DATADIR = os.path.join(os.path.dirname(dataset), "aclImdb")

## Train and Test Set

In [4]:
classes = ['pos', 'neg']
train_test_names = ['train', 'test']
maxlen = 500

(X_train, y_train), (X_test, y_test), preproc = text.texts_from_folder(datadir=IMDB_DATADIR, 
                                                                       classes=classes,
                                                                       maxlen=maxlen,
                                                                       train_test_names=train_test_names,
                                                                       preprocess_mode='bert')

detected encoding: utf-8
preprocessing train...
language: en


Is Multi-Label? False
preprocessing test...
language: en


## BERT Model

In [5]:
model = text.text_classifier(name='bert', 
                             train_data=(X_train, y_train),
                             preproc=preproc)

Is Multi-Label? False
maxlen is 500
done.


In [6]:
batch_size = 3

learner = ktrain.get_learner(model=model,
                             train_data=(X_train, y_train),
                             val_data=(X_test, y_test),
                             batch_size=batch_size)                       

In [7]:
epochs = 1

learner.fit_onecycle(lr=2e-5, epochs=epochs)



begin training using onecycle policy with max lr of 2e-05...
Train on 25000 samples, validate on 25000 samples


<tensorflow.python.keras.callbacks.History at 0x7ff34c770990>

In [8]:
model_json = learner.model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
learner.model.save_weights("model.h5")
print("Model Saved!")

Model Saved!
