### Imports

In [1]:
import ktrain
from ktrain import text
import numpy as np
import pandas as pd
import tensorflow as tf
import time 

### Enable GPU usage for Tensorflow

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

### Paths

In [3]:
paths = {'dataset': '../../assets/text_sentiment_data/datasets',
         'models': '../../assets/text_sentiment_data/models'}

### Load the dataset

In [24]:
data_train = pd.read_csv(f'{paths["dataset"]}/mydata_train.csv', encoding='utf-8')
data_test = pd.read_csv(f'{paths["dataset"]}/mydata_test.csv', encoding='utf-8')

### Exploring the data

In [25]:
data_train.loc[data_train['Emotion'] == 'joy', 'Emotion'] = 'happy'
data_train.loc[data_train['Emotion'] == 'sadness', 'Emotion'] = 'sad'
data_train.loc[data_train['Emotion'] == 'anger', 'Emotion'] = 'angry'

data_test.loc[data_test['Emotion'] == 'joy', 'Emotion'] = 'happy'
data_test.loc[data_test['Emotion'] == 'sadness', 'Emotion'] = 'sad'
data_test.loc[data_test['Emotion'] == 'anger', 'Emotion'] = 'angry'

In [26]:
data_train

Unnamed: 0,Emotion,Text
0,sad,When my girlfriend's nephew passed away.
1,angry,When my sister lost my favourite jumper at a p...
2,happy,"I'd be delighted to , John . Saturday did you..."
3,angry,"` No , "" she replied briefly , feeling more an..."
4,happy,Sure.Where would you suggest ?
...,...,...
6362,sad,"It 's been our experience that families , and ..."
6363,angry,I felt angry when my friend told me that I had...
6364,angry,I had made an error in planning a programme an...
6365,happy,"Nevertheless , your father ( and mother , too ..."


In [27]:
data_test

Unnamed: 0,Emotion,Text
0,sad,That ’ s a lot of money .
1,neutral,I can't open the door .
2,sad,Well in the meantime you are a biohazard ! I ...
3,angry,Thus if a particularly irate husband pronounce...
4,angry,And she was furious that Mr Clarke did not bre...
...,...,...
2710,happy,After a long time (of no communication with hi...
2711,angry,Then stay away . Nobody's keeping you from do...
2712,happy,Yes . I have . I liked it very much . Like yo...
2713,happy,When I was pressing figures on the curtain clo...


In [31]:
X_train = data_train.Text.tolist()
X_test = data_test.Text.tolist()

y_train = data_train.Emotion.tolist()
y_test = data_test.Emotion.tolist()

data = data_train.append(data_test, ignore_index=True)

class_names = ['angry', 'happy', 'neutral', 'sad']

print('size of training set: %s' % (len(data_train['Text'])))
print('size of validation set: %s' % (len(data_test['Text'])))
print(data.Emotion.value_counts())

data.head(10)

size of training set: 6367
size of validation set: 2715
happy      2326
angry      2259
neutral    2254
sad        2243
Name: Emotion, dtype: int64


Unnamed: 0,Emotion,Text
0,sad,When my girlfriend's nephew passed away.
1,angry,When my sister lost my favourite jumper at a p...
2,happy,"I'd be delighted to , John . Saturday did you..."
3,angry,"` No , "" she replied briefly , feeling more an..."
4,happy,Sure.Where would you suggest ?
5,angry,When my room-mates were watching video-tapes a...
6,happy,I can't believe they would play so good .
7,happy,Last night
8,angry,"When I came back to my hostel, my bed was full..."
9,neutral,I did that .


### Encoding the emotion classes

In [32]:
encoding = {
    'angry': 0,
    'happy': 1,
    'neutral': 2,
    'sad': 3
}

y_train = [encoding[x] for x in y_train]
y_test = [encoding[x] for x in y_test]

### Preparing the train and test sets

In [33]:
(x_train,  y_train), (x_test, y_test), preproc = text.texts_from_array(x_train=X_train, y_train=y_train,
                                                                       x_test=X_test, y_test=y_test,
                                                                       class_names=class_names,
                                                                       preprocess_mode='bert',
                                                                       maxlen=350, 
                                                                       max_features=35000)

preprocessing train...
language: en


Is Multi-Label? False
preprocessing test...
language: en


task: text classification


### Creating the model and the learner

In [34]:
model = text.text_classifier('bert', train_data=(x_train, y_train), preproc=preproc)

Is Multi-Label? False
maxlen is 350
done.


In [35]:
learner = ktrain.get_learner(model, train_data=(x_train, y_train), 
                             val_data=(x_test, y_test),
                             batch_size=4)

### Training

In [36]:
learner.fit_onecycle(2e-5, 3)



begin training using onecycle policy with max lr of 2e-05...
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x14fbdf316a0>

### Validation

In [37]:
learner.validate(val_data=(x_test, y_test), class_names=class_names)

              precision    recall  f1-score   support

       angry       0.84      0.82      0.83       693
       happy       0.88      0.85      0.86       707
     neutral       0.79      0.86      0.82       638
         sad       0.83      0.82      0.83       677

    accuracy                           0.84      2715
   macro avg       0.84      0.84      0.84      2715
weighted avg       0.84      0.84      0.84      2715



array([[570,  18,  38,  67],
       [ 19, 599,  72,  17],
       [ 30,  36, 546,  26],
       [ 63,  27,  32, 555]], dtype=int64)

In [38]:
predictor = ktrain.get_predictor(learner.model, preproc)
classes = predictor.get_classes()

### Saving the model

In [39]:
predictor.save(f"{paths['models']}/bert_model_2")

### Loading the model

In [40]:
predictor = ktrain.load_predictor(f"{paths['models']}/bert_model_2")
classes = predictor.get_classes()
classes

['angry', 'happy', 'neutral', 'sad']

### Testing the model

In [41]:
message ="For this curious child was very fond of pretending to be two people. But it's no use now thought poor Alice to pretend to be two people why there's hardly enough of me left to make one respectable person. Soon her eye fell on the little glass box that was lying under the table she opened it and found in it a very small cake on which the words eat me were beautifully marked in currants. Well I'll eat it said Alice and if."
message = message.split(' ')
splitlen = len(message)//3
splitlen

28

In [42]:
splitmsg = []
split = 0
for i in range(2):
    splitmsg.append(message[split:split+splitlen])
    split = split+splitlen
splitmsg.append(message[split::])

In [43]:
splitsent= []
for i in range(len(splitmsg)):
    sentence = " ".join(splitmsg[i])
    splitsent.append(sentence)
splitsent

["For this curious child was very fond of pretending to be two people. But it's no use now thought poor Alice to pretend to be two people why",
 "there's hardly enough of me left to make one respectable person. Soon her eye fell on the little glass box that was lying under the table she opened",
 "it and found in it a very small cake on which the words eat me were beautifully marked in currants. Well I'll eat it said Alice and if."]

In [44]:
start_time = time.time()
finpredval = []
for i in range(len(splitsent)):
    start_time = time.time() 
    prediction = predictor.predict(splitsent[i], return_proba=True)
    finpredval.append(prediction)
    
    print('predicted: {} ({:.2f})'.format(prediction, (time.time() - start_time)))
    print(f"Emotion = {classes[prediction.argmax()]}")

predicted: [0.19706903 0.04027015 0.26506022 0.49760056] (6.10)
Emotion = sad
predicted: [0.01453719 0.6345863  0.13332759 0.21754897] (0.11)
Emotion = happy
predicted: [0.02694552 0.9272913  0.01635365 0.02940957] (0.11)
Emotion = happy


In [45]:
finpredval

[array([0.19706903, 0.04027015, 0.26506022, 0.49760056], dtype=float32),
 array([0.01453719, 0.6345863 , 0.13332759, 0.21754897], dtype=float32),
 array([0.02694552, 0.9272913 , 0.01635365, 0.02940957], dtype=float32)]

In [46]:
finpredval1 = np.sum(finpredval , axis = 0) / 3
finpredval1

array([0.07951725, 0.5340492 , 0.13824715, 0.24818636], dtype=float32)

In [47]:
finpred = classes[finpredval1.argmax()]
finpred

'happy'