In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

model_name = 'bert-base-uncased'
# run data_processing.ipynb to generate the train, val, test sets here
# load and split train, val, test, (X, y)
TRAIN_DATA = '../twitter-sentiment-classification/data/training.1440000.csv'
VAL_DATA = '../twitter-sentiment-classification/data/validation.80000.csv'
TEST_DATA = '../twitter-sentiment-classification/data/test.80000.csv'

In [3]:
df_train = pd.read_csv(TRAIN_DATA)
df_val = pd.read_csv(VAL_DATA)
df_test = pd.read_csv(TEST_DATA)

X_train, y_train = df_train['text'], df_train['target']
X_val, y_val = df_val['text'], df_val['target']
X_test, y_test = df_test['text'], df_test['target']

In [8]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from transformers import BertTokenizer, TFBertModel
max_length = 100
def bert_tweets_model():
    bert_encoder = TFBertModel.from_pretrained(model_name)
    input_word_ids = tf.keras.Input(shape=(max_length,), dtype=tf.int32, name="input_ids")
    last_hidden_states = bert_encoder(input_word_ids)[0]    
    x = tf.keras.layers.LSTM(100, dropout=0.3, recurrent_dropout=0.3)(last_hidden_states)
    output = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.Model(inputs=input_word_ids, outputs=output)
    
    return model


In [4]:
encoder = TFBertModel.from_pretrained(model_name)

tokenizer = BertTokenizer.from_pretrained(model_name)


Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [5]:

# hyperparameters
max_length = 100
batch_size = 50
def bert_encode(data):
    tokens = tokenizer.batch_encode_plus(data, max_length=max_length, padding='max_length', truncation=True)
    
    return tf.constant(tokens['input_ids'])
train_encoded = bert_encode(X_train[:1000])
train_target = y_train[:1000]
val_encoded = bert_encode(X_val[:1000])
val_target = y_val[:1000]

In [6]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_encoded, train_target))
    .shuffle(100)
    .batch(batch_size)
)

val_dataset = (
    tf.data.Dataset
    .from_tensor_slices((val_encoded, val_target))
    .shuffle(100)
    .batch(batch_size)
)


In [9]:
model = bert_tweets_model()

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [10]:
adam_optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
model.compile(loss='binary_crossentropy',optimizer=adam_optimizer,metrics=['accuracy'])
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_ids (InputLayer)      [(None, 100)]             0         
                                                                 
 tf_bert_model_1 (TFBertMode  TFBaseModelOutputWithPoo  109482240
 l)                          lingAndCrossAttentions(l            
                             ast_hidden_state=(None,             
                             100, 768),                          
                              pooler_output=(None, 76            
                             8),                                 
                              past_key_values=None, h            
                             idden_states=None, atten            
                             tions=None, cross_attent            
                             ions=None)                          
                                                             

In [None]:
history = model.fit(
    train_dataset,
    batch_size=batch_size,
    epochs=3,
    validation_data=val_dataset,
    verbose=2)

Epoch 1/3
