In [40]:
!pip install tensorflow




[notice] A new release of pip is available: 23.0 -> 23.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [41]:
import tensorflow as tf
import logging
from tensorflow.keras.layers import (
    Dense,
    Flatten,
    Conv1D,
    Dropout,
    Input,
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow.keras import regularizers
from transformers import BertTokenizer, TFBertModel
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
tqdm.pandas()
import re
import random

In [73]:
max_length = 20
batch_size = 16
dev_size = 0.1
num_class = 6

In [74]:
model_name = "bert-base-multilingual-cased"
tokenizer = BertTokenizer.from_pretrained(model_name)

In [75]:
df_train = pd.read_csv(r'dataset\train.txt', header =None, sep =';', names = ['Input','Sentiment'], encoding='utf-8')
df_train.head()

Unnamed: 0,Input,Sentiment
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [76]:
df_test = pd.read_csv(r'dataset\test.txt', header = None, sep =';', names = ['Input','Sentiment'],encoding='utf-8')
df_test.head()

Unnamed: 0,Input,Sentiment
0,im feeling rather rotten so im not very ambiti...,sadness
1,im updating my blog because i feel shitty,sadness
2,i never make her separate from me because i do...,sadness
3,i left with my bouquet of red and yellow tulip...,joy
4,i was feeling a little vain when i did this one,sadness


In [77]:
df_train['Sentiment'].replace("anger",0,inplace = True)
df_train['Sentiment'].replace("fear",1,inplace = True)
df_train['Sentiment'].replace("joy",2,inplace = True)
df_train['Sentiment'].replace("love",3,inplace = True)
df_train['Sentiment'].replace("sadness",4,inplace = True)
df_train['Sentiment'].replace("surprise",5,inplace = True)

In [78]:
df_test['Sentiment'].replace("anger",0,inplace = True)
df_test['Sentiment'].replace("fear",1,inplace = True)
df_test['Sentiment'].replace("joy",2,inplace = True)
df_test['Sentiment'].replace("love",3,inplace = True)
df_test['Sentiment'].replace("sadness",4,inplace = True)
df_test['Sentiment'].replace("surprise",5,inplace = True)

In [79]:
df_train.isnull().sum()
df_test.isnull().sum()

Input        0
Sentiment    0
dtype: int64

In [80]:
df_train.drop_duplicates(inplace=True)
df_test.drop_duplicates(inplace=True)

In [81]:
df_train.shape

(15999, 2)

In [82]:
df_test.shape

(2000, 2)

In [83]:
x_train = df_train["Input"]
y_train = df_train["Sentiment"]

In [84]:
x_test = df_train["Input"]
y_test = df_train["Sentiment"]

In [85]:
import pickle
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [86]:
import pickle
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

In [87]:
def bert_encode(data):
    tokens = tokenizer.batch_encode_plus(
        data, max_length=max_length, padding="max_length", truncation=True
    )
    return tf.constant(tokens["input_ids"])

In [88]:

train_encoded = bert_encode(x_train)
dev_encoded = bert_encode(x_test)

train_labels = tf.keras.utils.to_categorical(y_train.values, num_classes=num_class)
dev_labels = tf.keras.utils.to_categorical(y_test.values, num_classes=num_class)

train_dataset = (
    tf.data.Dataset.from_tensor_slices((train_encoded, train_labels))
    .shuffle(100)
    .batch(batch_size)
).cache()

dev_dataset = (
    tf.data.Dataset.from_tensor_slices((dev_encoded, dev_labels))
    .shuffle(100)
    .batch(batch_size)
).cache()

In [90]:
def bert_tweets_model():
    bert_encoder = TFBertModel.from_pretrained(model_name, output_attentions=True)
    input_word_ids = Input(
        shape=(max_length,), dtype=tf.int32, name="input_ids"
    )
    last_hidden_states = bert_encoder(input_word_ids)[0]
    clf_output = Flatten()(last_hidden_states)
    net = Dense(40, activation="relu")(clf_output)
    net = Dropout(0.3)(net)

    net = Dense(30, activation="relu")(net)
    net = Dropout(0.3)(net)

    net = Dense(20, activation="relu")(net)
    net = Dropout(0.3)(net)

    output = Dense(num_class, activation="softmax")(net)
    model = Model(inputs=input_word_ids, outputs=output)
    return model

In [91]:
model = bert_tweets_model()
adam_optimizer = Adam(learning_rate=1e-5)
model.compile(
    loss="categorical_crossentropy", optimizer=adam_optimizer, metrics=["accuracy"]
)
model.summary()

Downloading (…)"tf_model.h5";:   0%|          | 0.00/1.08G [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-multilingual-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-multilingual-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_ids (InputLayer)      [(None, 20)]              0         
                                                                 
 tf_bert_model (TFBertModel)  TFBaseModelOutputWithPoo  177853440
                             lingAndCrossAttentions(l            
                             ast_hidden_state=(None,             
                             20, 768),                           
                              pooler_output=(None, 76            
                             8),                                 
                              past_key_values=None, h            
                             idden_states=None, atten            
                             tions=((None, 12, None,             
                             20),                                
                              (None, 12, None, 20),          

In [92]:
early_stopping=tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0.0001,
    patience=20, 
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
)

In [None]:
history = model.fit(
    train_dataset,
    batch_size=batch_size,
    epochs=20,
    validation_data=dev_dataset,
    verbose=1,
    callbacks=early_stopping
)

In [None]:
model.save('emotion_model.h5' , overwrite=True)

In [None]:
new_model = tf.keras.models.load_model('emotion_model.h5',custom_objects={"TFBertModel": TFBertModel})

In [None]:
import matplotlib.pyplot as plt
def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history["val_" + string])
    plt.title('Emotional model')
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, "val_" + string])
    plt.show()
plot_graphs(history, "accuracy")
plot_graphs(history, "loss")    

In [None]:
val = pd.read_csv('dataset/val.txt', header =None, sep =';', names = ['Input','Sentiment'], encoding='utf-8')
val

In [None]:
val['Sentiment'].replace("anger",0,inplace = True)
val['Sentiment'].replace("fear",1,inplace = True)
val['Sentiment'].replace("joy",2,inplace = True)
val['Sentiment'].replace("love",3,inplace = True)
val['Sentiment'].replace("sadness",4,inplace = True)
val['Sentiment'].replace("surprise",5,inplace = True)

In [None]:
x_val = val["Input"]
y_val = val["Sentiment"]

In [None]:
test_encoded = bert_encode(x_val)
test_dataset = tf.data.Dataset.from_tensor_slices(test_encoded).batch(batch_size)

In [None]:
predicted = new_model.predict(test_dataset, batch_size=batch_size)
predicted_binary = np.argmax(predicted, axis=-1)

In [None]:
from sklearn import metrics

accuracy = metrics.accuracy_score(y_val,predicted_binary)
print('Accuracy: {:.3f}'.format(accuracy))

In [None]:
print('Classification report')
print(metrics.classification_report(y_val, predicted_binary))