## Load Libraries

In [1]:
# !pip install tensorflow_text

In [2]:
import os
import numpy as np
import pandas as pd
import re

import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_hub as hub
import tensorflow_text as text

from sklearn import metrics

ROOT = '/content/drive/MyDrive/Project Datasets/Data/DisasterTweets/'

# Read the cleaned train and validation sets

In [3]:
train = pd.read_csv(ROOT+'custom/custom_train.csv')
train.head()

Unnamed: 0,text,target
0,national briefing west california spring oil s...,1
1,max blew tf shots fired catfishmtv,0
2,hellfire song hunchback notre dame reminds lot...,0
3,space_wolverine harm foul somebody needed say,0
4,joel 228 book acts 217 http tco rgpem2tqej,0


In [4]:
test = pd.read_csv(ROOT+'custom/custom_test.csv')
test.head()

Unnamed: 0,text,target
0,bad say im kinda afraid storms storm help,1
1,nbanews soudelor typhoon soudelor taking dead ...,1
2,perspectives terrorism understanding jihadi pr...,1
3,justinbieber arianagrande hear screaming,0
4,whvholst leashless structural problem rather f...,0


# Model Selection and Training

## Model Building

In [5]:
def build_model():
  encoder_url = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4"
  preprocess_url = "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"

  bert_encoder = hub.KerasLayer(encoder_url)
  bert_preprocess = hub.KerasLayer(preprocess_url)

  text_input = layers.Input(shape=(), dtype=tf.string, name='text')
  preprocessed_text = bert_preprocess(text_input)
  outputs = bert_encoder(preprocessed_text)

  l = layers.Dense(64, activation='relu', name='fully-connected-1')(outputs["pooled_output"])
  l = layers.Dense(1, activation='sigmoid', name='output-layer')(l)

  model = tf.keras.Model(inputs=[text_input], outputs=[l])

  model.compile(
      loss="binary_crossentropy",
      optimizer=tf.keras.optimizers.Adam(),
      metrics=["accuracy"]
  )

  return model

In [6]:
model = build_model()
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               [(None,)]            0                                            
__________________________________________________________________________________________________
keras_layer_1 (KerasLayer)      {'input_word_ids': ( 0           text[0][0]                       
__________________________________________________________________________________________________
keras_layer (KerasLayer)        {'sequence_output':  109482241   keras_layer_1[0][0]              
                                                                 keras_layer_1[0][1]              
                                                                 keras_layer_1[0][2]              
______________________________________________________________________________________________

## Model Training

In [7]:
history = model.fit(
    train.text.to_numpy(),
    train.target.to_numpy(),
    epochs=10,
    validation_data=(test.text.to_numpy(), test.target.to_numpy()),
    callbacks=[
              tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 3)
    ]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


## Model Classification Metrics

In [8]:
preds = model.predict(test.text.to_numpy().reshape(-1,1))
preds = tf.squeeze(tf.round(preds))

In [9]:
print("Confusion Matrix:")
print(metrics.confusion_matrix(preds, test.target.to_numpy()))
print("\nClassification Report:")
print(metrics.classification_report(preds, test.target.to_numpy()))

Confusion Matrix:
[[771 138]
 [315 680]]

Classification Report:
              precision    recall  f1-score   support

         0.0       0.71      0.85      0.77       909
         1.0       0.83      0.68      0.75       995

    accuracy                           0.76      1904
   macro avg       0.77      0.77      0.76      1904
weighted avg       0.77      0.76      0.76      1904



In [10]:
model.save(ROOT+'models/BERT')



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Project Datasets/Data/DisasterTweets/models/BERT/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Project Datasets/Data/DisasterTweets/models/BERT/assets
