<h2 align="center">BERT 12 Layer</h2>

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
import numpy as np
from numpy import argmax
# !pip install tensorflow_text
import tensorflow_text as text
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import sklearn.metrics as skm
#from google.colab import drive
#drive.mount('/content/gdrive')

In [2]:
df = pd.read_csv("review.csv")
df_down = []
df_2 = df[df['rating']==2]
df_2.shape
for i in range(1,6,2):
    if (i == 2):
        df_down.append(df[df['rating']==i])
    else:
        df_down.append(df[df['rating']==i].sample(df_2.shape[0]))

df_balanced = pd.concat(df_down)
df_balanced.shape
df_balanced['rating'].value_counts()
df_balanced.dropna(inplace=True)

In [3]:
#Reducing dataset size for initial testing - creating stratified sample of classes
df_balanced = df_balanced.groupby('rating', group_keys=False).apply(lambda x: x.sample(20000))

<h4>Split it into training and test data set</h4>

In [4]:
df_balanced['rating'].value_counts()

1    20000
3    20000
5    20000
Name: rating, dtype: int64

In [5]:
#X_train, X_test, y_train, y_test = train_test_split(df_balanced['review'],df_balanced['rating'], stratify=df_balanced['rating'],test_size=0.2)

In [95]:
#save the data
# X_train.to_csv('X_train.csv',index=False)
# X_test.to_csv('X_test.csv',index=False)
# y_train.to_csv('y_train.csv',index=False)
# y_test.to_csv('y_test.csv',index=False)


In [11]:
X_train=pd.read_csv('X_train.csv').squeeze()
X_test=pd.read_csv('X_test.csv').squeeze()
y_train=pd.read_csv('y_train.csv').squeeze()
y_test=pd.read_csv('y_test.csv').squeeze()


<h4>import BERT model to get embeding vectors</h4>

In [12]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [13]:
#performing one hot encoding for output. 
dum_df = pd.get_dummies(y_train)
y_train = np.asarray(dum_df) #converting onehot encoded output variable as needed for softmax layer

<h4>Build Model</h4>

In [14]:
# Bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

# Neural network layers
l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
l = tf.keras.layers.Dense(3, activation='softmax', name="output")(l)
# l_y = tf.keras.layers.Dense(1,activation = 'softmax')(l)

from numpy import argmax
# Use inputs and outputs to construct a final model
model = tf.keras.Model(inputs=[text_input], outputs = [l])

https://stackoverflow.com/questions/47605558/importerror-failed-to-import-pydot-you-must-install-pydot-and-graphviz-for-py

In [15]:
# model.summary()

In [16]:
METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall')
]

model.compile(optimizer='adam',
              loss='categorical_crossentropy', #for multiclass
              metrics=METRICS)

<h4>Train the model</h4>

In [17]:

model.fit(X_train, np.asarray(y_train), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1bdee1fe580>

In [18]:
test_df = pd.get_dummies(y_test)
y_test = np.asarray(test_df)


In [19]:
# model.evaluate(X_test, y_test)
y_predicted = model.predict(X_test)
print( skm.classification_report(argmax(y_test, axis=1), argmax(y_predicted, axis=1)))
# y_train_p = model.predict(X_train) #train prediction

              precision    recall  f1-score   support

           0       0.73      0.64      0.68      4000
           1       0.52      0.77      0.62      4000
           2       0.86      0.55      0.67      4000

    accuracy                           0.65     12000
   macro avg       0.70      0.65      0.66     12000
weighted avg       0.70      0.65      0.66     12000



In [20]:

# c = pd.DataFrame(X_test)
# for i in range(len(y_predicted)):
#     print(str(c.iloc[i]))
#     print(str(argmax(y_test[i], axis=0)),str(argmax(y_predicted[i], axis=0)))

# d = pd.DataFrame(X_train)
# for i in range(len(y_train_p)):
#     print(str(d.iloc[i]))
#     print(str(argmax(y_train[i], axis=0)),str(argmax(y_train_p[i], axis=0)))


In [21]:
# cm = confusion_matrix(argmax(y_test, axis = 1) , argmax(y_predicted, axis = 1))
# cm_train = confusion_matrix(argmax(y_train, axis = 1) , argmax(y_train_p, axis = 1))

# print(cm_train)

In [22]:

# cm = skm.multilabel_confusion_matrix(argmax(y_test, axis=1), argmax(y_predicted, axis=1))
# print(cm)


In [23]:
man_pred = model.predict([" do not buy. this phone is useless. Broken screen."])
print(argmax(man_pred))

0


In [24]:
#approach 1
model.save_weights("new_model_20000.h5")

#refer to dep_class.py for calling the saved model for deployment

In [25]:
#approach 2
#saving trained model for deployment
model.save('new_sentiment_model_20000')
#to use the saved model in another program use the saved model file from above and below code:
#model = tf.keras.models.load_model('sentiment_model')



INFO:tensorflow:Assets written to: new_sentiment_model_20000\assets


INFO:tensorflow:Assets written to: new_sentiment_model_20000\assets


<h4>Inference</h4>

In [26]:
review = ["Its amazing,great,awesome",
          "Screen looked pixelated and it doesnt seem as show in the pictures. Moreover the color grading is extremely poor,but I would recommend it",
          "Screen is purple"]
print(argmax(model.predict(review), axis=1))

[2 1 2]


In [31]:
model = tf.keras.models.load_model('sentiment_model_100000')

In [32]:
model.predict(review)

array([[0.0178345 , 0.01080635, 0.02355301, 0.10879672, 0.83900946],
       [0.13253807, 0.2947677 , 0.3742915 , 0.15653375, 0.04186901],
       [0.12786381, 0.1228462 , 0.20406692, 0.23231836, 0.31290472]],
      dtype=float32)

In [36]:
# from google.colab import files
# export_dir = 'content/saved_model'
# sentiment_model = 'sentiment_model'
# tf.saved_model.save(model, export_dir=export_dir)

In [37]:
# files.download(export_dir + '/saved_model.pb')

In [38]:
# !zip -r '/content/sentiment_model.zip' '/content/sentiment_model'
# from google.colab import files
# files.download("/content/sentiment_model.zip")

In [39]:
# # serialize model to JSON
# model_json = model1.to_json()
# with open("model.json", "w") as json_file:
# #     json_file.write(model_json)
# # serialize weights to HDF5
# model.save_weights("model.h5")
# print("Saved model to disk")