<a href="https://colab.research.google.com/github/ichrafmoula/IMDB-Movies-Sentiment-Analysis-using-BERT/blob/master/Sentiment_Classification_Using_BERT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# install ktrain on Google Colab
!pip3 install ktrain

In [None]:
import pandas as pd
import numpy as np

import ktrain
from ktrain import text

**1. Import Data**

In [None]:
data_train = pd.read_csv('/content/drive/MyDrive/PFE_2021/rapport/data_train.csv', encoding='utf-8')
data_test = pd.read_csv('/content/drive/MyDrive/PFE_2021/rapport/data_test.csv', encoding='utf-8')

X_train = data_train.Text.tolist()
X_test = data_test.Text.tolist()

y_train = data_train.Emotion.tolist()
y_test = data_test.Emotion.tolist()

data = data_train.append(data_test, ignore_index=True)

class_names = ['joy', 'sadness', 'fear', 'anger', 'neutral']

print('size of training set: %s' % (len(data_train['Text'])))
print('size of validation set: %s' % (len(data_test['Text'])))
print(data.Emotion.value_counts())

data.head(10)

In [None]:
encoding = {
    'joy': 0,
    'sadness': 1,
    'fear': 2,
    'anger': 3,
    'neutral': 4
}

# Integer values for each class
y_train = [encoding[x] for x in y_train]
y_test = [encoding[x] for x in y_test]

**Data preprocessing**

In [None]:
(x_train,  y_train), (x_test, y_test), preproc = text.texts_from_array(x_train=X_train, y_train=y_train,
                                                                       x_test=X_test, y_test=y_test,
                                                                       class_names=class_names,
                                                                       preprocess_mode='bert',
                                                                       maxlen=350, 
                                                                       max_features=35000)

**Training and validation**

In [None]:
from keras import metrics

In [None]:
model = text.text_classifier('bert', 
                             train_data=(x_train, y_train), 
                             metrics=['accuracy', metrics.Recall(),metrics.Precision()],
                             preproc=preproc)

In [None]:
model.summary()

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model_plot_bert.png', show_shapes=True, show_layer_names=True)


In [None]:
learner = ktrain.get_learner(model, train_data=(x_train, y_train), 
                             val_data=(x_test, y_test),
                             batch_size=6)

In [None]:
#learner.lr_find(show_plot=True , max_epochs=2)

In [None]:
learner.fit_onecycle(2e-5, 4)

In [None]:
learner.validate(val_data=(x_test, y_test))

In [None]:
learner.evaluate()

In [None]:
learner.plot()

In [None]:
learner.validate(val_data=(x_train, y_train), class_names=class_names)

In [None]:
learner.validate(val_data=(x_test, y_test), class_names=class_names)

In [None]:

learner.plot('accuracy')

In [None]:
learner.plot('recall')

In [None]:
learner.plot('precision')

In [None]:
predictor = ktrain.get_predictor(learner.model, preproc)
predictor.get_classes()

In [None]:
message="I hate the new #iphone upgrade. Won't let me download apps. #ugh #apple sucks"
prediction = predictor.predict(message)
prediction

In [None]:
predictor.save("/content/drive/MyDrive/PFE_2021/rapport/bert")