Data: https://www.kaggle.com/mustfkeskin/turkish-movie-sentiment-analysis-dataset/code

In [1]:
# Check the GPU colab gave to us.
!nvidia-smi -L

GPU 0: Tesla K80 (UUID: GPU-aaffa2a1-f876-6201-5001-4ba083f68319)


### Preprocess data

In [2]:
# Get data
import pandas as pd

df = pd.read_csv("magaza_yorumlari_duygu_analizi.csv", encoding="utf-16")
df.head()

Unnamed: 0,Görüş,Durum
0,"ses kalitesi ve ergonomisi rezalet, sony olduğ...",Olumsuz
1,hizli teslimat tesekkürler,Tarafsız
2,ses olayı süper....gece çalıştır sıkıntı yok.....,Olumlu
3,geldi bigün kullandık hemen bozoldu hiçtavsiye...,Olumsuz
4,Kulaklığın sesi kaliteli falan değil. Aleti öv...,Olumsuz


In [3]:
# Check the DataFrame to see the number of lines and non-null objects
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11429 entries, 0 to 11428
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Görüş   11426 non-null  object
 1   Durum   11429 non-null  object
dtypes: object(2)
memory usage: 178.7+ KB


In [4]:
# Check value counts to see whether the data is balanced or not
df.Durum.value_counts()

Olumlu      4253
Olumsuz     4238
Tarafsız    2938
Name: Durum, dtype: int64

In [5]:
# Since some nulls might be seen as a float, drop na to not face any problems.
df.dropna(inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11426 entries, 0 to 11428
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Görüş   11426 non-null  object
 1   Durum   11426 non-null  object
dtypes: object(2)
memory usage: 267.8+ KB


In [6]:
# Check the first line in df.Görüş
df.Görüş[0]

"ses kalitesi ve ergonomisi rezalet, sony olduğu için aldım ama 4'de 1 fiyatına çin replika ürün alsaydım çok çok daha iyiydi, kesinlikle tavsiye etmiyorum."

In [7]:
# Remove punctuation for our model to learn better
df['Görüş'] = df['Görüş'].str.replace(r'[^\w\s]+', '')
df.Görüş[0]

'ses kalitesi ve ergonomisi rezalet sony olduğu için aldım ama 4de 1 fiyatına çin replika ürün alsaydım çok çok daha iyiydi kesinlikle tavsiye etmiyorum'

In [8]:
# Lower the inputs for our model to learn better
df["Görüş"] = df["Görüş"].str.lower()
df.head()

Unnamed: 0,Görüş,Durum
0,ses kalitesi ve ergonomisi rezalet sony olduğu...,Olumsuz
1,hizli teslimat tesekkürler,Tarafsız
2,ses olayı süpergece çalıştır sıkıntı yokkablo ...,Olumlu
3,geldi bigün kullandık hemen bozoldu hiçtavsiye...,Olumsuz
4,kulaklığın sesi kaliteli falan değil aleti öve...,Olumsuz


In [9]:
# Import the nltk library and download stopwords
#import nltk

#nltk.download("stopwords")

In [10]:
# Get the stopwords
#from nltk.corpus import stopwords
#
#stop_words = stopwords.words("turkish")
#stop_words[:10]

In [11]:
# Remove stopwords from each line and check the lines
#stop_words = set(stop_words)
#df['Görüş'] = df['Görüş'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))
#
#df.Görüş[0]

In [12]:
# Import the library and get the stemmer for Turkish Language
#from TurkishStemmer import TurkishStemmer
#
#stemmer = TurkishStemmer()

In [13]:
# To use stemmer on each word, turn each line into a list
#df['Görüş'] = df['Görüş'].str.split()
#df.head()

In [14]:
# Apply stemmer
#df['Görüş'] = df['Görüş'].apply(lambda x: [stemmer.stem(y) for y in x])
#
#df.head()

In [15]:
#df.Görüş[0]

In [16]:
# Turn each line back to a string (from list)
#df['Görüş'] = df['Görüş'].apply(lambda x: ' '.join(word for word in x))
#
#df.head()

In [17]:
# Get train sentences from df.Görüş
train_sentences = df["Görüş"].tolist()

train_sentences[0]

'ses kalitesi ve ergonomisi rezalet sony olduğu için aldım ama 4de 1 fiyatına çin replika ürün alsaydım çok çok daha iyiydi kesinlikle tavsiye etmiyorum'

In [18]:
# Shuffle the data so our model can learn in a proper way

from sklearn.utils import shuffle
df = shuffle(df)
df.head()

Unnamed: 0,Görüş,Durum
818,ben aldigima pismanim fan yeri cok isiniyor ve...,Olumsuz
3473,beğendim güzel,Olumlu
8322,kendi evime almıştım git gel annem makineyi be...,Olumlu
3962,leş arkadaşlar leş ssd aldık biraz hızlansın d...,Olumsuz
10751,fiyat kalite iyi,Tarafsız


In [19]:
# Check the DataFrame for one last time
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11426 entries, 818 to 10899
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Görüş   11426 non-null  object
 1   Durum   11426 non-null  object
dtypes: object(2)
memory usage: 267.8+ KB


### Input pipeline

In [20]:
# Get the average and the max length of the inputs
import numpy as np

sent_lens = [len(sentence.split()) for sentence in train_sentences]
avg_sent_len = np.mean(sent_lens)
max_sent_len = np.max(sent_lens)
avg_sent_len, max_sent_len

(21.72378785226676, 422)

In [21]:
# How long of a sentence lenght covers 95% of examples?
output_seq_len_95 = int(np.percentile(sent_lens, 95))

output_seq_len_95

64

In [22]:
# How long of a sentence lenght covers 99% of examples?
output_seq_len_99 = int(np.percentile(sent_lens, 99))

output_seq_len_99

120

In [23]:
# Since it is not a large number, 120 is chosen. The GPU that we're using can handle this
output_seq_len = 120

In [24]:
# Get transformers
!pip install transformers

Collecting transformers
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 5.3 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 38.0 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 457 kB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 41.3 MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 40.8 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
 

In [25]:
# Import AutoTokenizer
from transformers import AutoTokenizer
# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-128k-uncased")

Downloading:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/386 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.18M [00:00<?, ?B/s]

In [26]:
# Encode inputs
import tensorflow as tf

input_ids = []
attention_mask = []

for txt in df.Görüş.values:
    encoded = tokenizer.encode_plus(
        text=txt, # the sentence to be encoded 
        add_special_tokens=True, # Add [CLS] and [SEP]
        max_length=output_seq_len, # max length of a sentence
        truncation=True, # truncate if sentence length is bigger than max_length
        pad_to_max_length=True, # Add [PAD]s
        return_attention_mask=True, # Generate attention mask
        return_tensors="tf" # return TensorFlow tensors
    )

    # Append input_ids and attention_masks to their own lists
    input_ids.append(encoded["input_ids"])
    attention_mask.append(encoded["attention_mask"])

# Concatenate
input_ids = tf.concat(input_ids, 0)
attention_mask = tf.concat(attention_mask, 0)

print("Original: ", df.Görüş.values[0])
print("Token IDs: ", input_ids[0])



Original:  ben aldigima pismanim fan yeri cok isiniyor ve cok ses cikartiyor
alali 4 ay oluyor 10 kere kullanmamisimdiryinede ekran kapaniyor ve simsiyah oluyor
Token IDs:  tf.Tensor(
[     2   2105  95487   1007 108756   1950   7085   4673   6110  97496
   2012   1946   6110   3072 100210   2019   3110   1987     24   2054
   3419   2562   6143  47020  23208   2025  34088   2302   4587   6331
   2019   1946  57157   3419      3      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0      0

In [27]:
# Convert tokens to ids to check if the encoding operation is done correctly
tokenizer.convert_ids_to_tokens(input_ids[0])

['[CLS]',
 'ben',
 'aldigim',
 '##a',
 'pisman',
 '##im',
 'fan',
 'yeri',
 'cok',
 'isini',
 '##yor',
 've',
 'cok',
 'ses',
 'cikart',
 '##iyor',
 'ala',
 '##li',
 '4',
 'ay',
 'oluyor',
 '10',
 'kere',
 'kullanmam',
 '##isim',
 '##dir',
 '##yin',
 '##ede',
 'ekran',
 'kapan',
 '##iyor',
 've',
 'simsiyah',
 'oluyor',
 '[SEP]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]'

In [28]:
# Check input_ids and shape of input_ids
input_ids, input_ids.shape

(<tf.Tensor: shape=(11426, 120), dtype=int32, numpy=
 array([[    2,  2105, 95487, ...,     0,     0,     0],
        [    2, 86096, 14368, ...,     0,     0,     0],
        [    2,  2676, 21705, ...,     0,     0,     0],
        ...,
        [    2,  6920, 30422, ...,     0,     0,     0],
        [    2,  5303, 13286, ...,     0,     0,     0],
        [    2, 36664,  2327, ...,     0,     0,     0]], dtype=int32)>,
 TensorShape([11426, 120]))

In [29]:
# Check attention_mask and shape of attention_mask
attention_mask, attention_mask.shape

(<tf.Tensor: shape=(11426, 120), dtype=int32, numpy=
 array([[1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        ...,
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0]], dtype=int32)>, TensorShape([11426, 120]))

In [30]:
# Seperate test df from original df
test_df = df.iloc[:1100]
df = df.iloc[1100:]
len(df), len(test_df)

(10326, 1100)

In [31]:
# Seperate test input ids and test attention mask from the original ones
test_input_ids = input_ids[:1100]
test_attention_mask = attention_mask[:1100]

input_ids = input_ids[1100:]
attention_mask = attention_mask[1100:]

input_ids.shape, test_input_ids.shape, attention_mask.shape, test_attention_mask.shape

(TensorShape([10326, 120]),
 TensorShape([1100, 120]),
 TensorShape([10326, 120]),
 TensorShape([1100, 120]))

In [32]:
# One hot encode our labels to use in our models
from sklearn.preprocessing import OneHotEncoder
one_hot_encoder = OneHotEncoder(sparse=False)
labels_one_hot = one_hot_encoder.fit_transform(df["Durum"].to_numpy().reshape(-1,1))
labels_one_hot.shape

(10326, 3)

In [33]:
# One hot encode test labels
one_hot_encoder = OneHotEncoder(sparse=False)
test_labels_one_hot = one_hot_encoder.fit_transform(test_df["Durum"].to_numpy().reshape(-1,1))
test_labels_one_hot.shape

(1100, 3)

In [34]:
# Label encode test labels
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
test_labels_encoded = label_encoder.fit_transform(test_df["Durum"].to_numpy())

test_labels_encoded

array([1, 0, 0, ..., 0, 1, 2])

In [35]:
# Create a TensorFlow Dataset
dataset = tf.data.Dataset.from_tensor_slices((input_ids, attention_mask, labels_one_hot))
test_dataset = tf.data.Dataset.from_tensor_slices((test_input_ids, test_attention_mask, test_labels_one_hot))
dataset, test_dataset

(<TensorSliceDataset shapes: ((120,), (120,), (3,)), types: (tf.int32, tf.int32, tf.float64)>,
 <TensorSliceDataset shapes: ((120,), (120,), (3,)), types: (tf.int32, tf.int32, tf.float64)>)

In [36]:
# Create a function to map our dataset
def map_func(input_ids, masks, labels):
    # We convert our three-item tuple into a two-item tuple where the input item is a dictionary
    return {"input_ids": input_ids,
            "attention_mask": masks}, labels

In [37]:
# Map the dataset using the function we created and check the dataset
dataset = dataset.map(map_func)
test_dataset = test_dataset.map(map_func)
dataset, test_dataset

(<MapDataset shapes: ({input_ids: (120,), attention_mask: (120,)}, (3,)), types: ({input_ids: tf.int32, attention_mask: tf.int32}, tf.float64)>,
 <MapDataset shapes: ({input_ids: (120,), attention_mask: (120,)}, (3,)), types: ({input_ids: tf.int32, attention_mask: tf.int32}, tf.float64)>)

In [38]:
# Get the length of our dataset
len_dataset_unbatched = len(dataset)

In [39]:
# Batch our dataset and drop remainders
batch_size=32
dataset = dataset.batch(batch_size) #.shuffle(10000) , drop_remainder=True
test_dataset = test_dataset.batch(batch_size)

dataset.take, test_dataset

(<bound method DatasetV2.take of <BatchDataset shapes: ({input_ids: (None, 120), attention_mask: (None, 120)}, (None, 3)), types: ({input_ids: tf.int32, attention_mask: tf.int32}, tf.float64)>>,
 <BatchDataset shapes: ({input_ids: (None, 120), attention_mask: (None, 120)}, (None, 3)), types: ({input_ids: tf.int32, attention_mask: tf.int32}, tf.float64)>)

In [40]:
# Split our dataset into train, validation and test datasets
split = 0.85
size = int((input_ids.shape[0] / batch_size) * split)

train_ds = dataset.take(size) # 85% of the dataset
val_ds = dataset.skip(size) # 15% of the dataset

len(dataset), len(train_ds), len(val_ds)

(323, 274, 49)

## Build and train

In [65]:
# Import the model
from transformers import TFAutoModel

bert128k = TFAutoModel.from_pretrained("dbmdz/bert-base-turkish-128k-uncased")

Downloading:   0%|          | 0.00/1.06G [00:00<?, ?B/s]

Some layers from the model checkpoint at dbmdz/bert-base-turkish-128k-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at dbmdz/bert-base-turkish-128k-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [66]:
# Create the model
import tensorflow as tf

# two input layers, we ensure layer name variables match to dictionary keys in TF dataset
input_ids = tf.keras.layers.Input(shape=(120,), name='input_ids', dtype='int32')
mask = tf.keras.layers.Input(shape=(120,), name='attention_mask', dtype='int32')

# we access the transformer model within our bert object using the bert attribute (eg bert.bert instead of bert)
embeddings = bert128k.bert(input_ids, attention_mask=mask)[1]  # access final activations (already max-pooled) [1]
# convert bert embeddings into 3 output classes
x = tf.keras.layers.Dense(1024, activation='relu')(embeddings)
outputs = tf.keras.layers.Dense(3, activation='softmax', name='outputs')(x)

# model
model_1_128k_uncased = tf.keras.Model(inputs=[input_ids, mask], outputs=outputs)

In [67]:
#Get the summary of model_1_128k_uncased
model_1_128k_uncased.summary()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 120)]        0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 120)]        0           []                               
                                                                                                  
 bert (TFBertMainLayer)         TFBaseModelOutputWi  184345344   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, 120,                                         

In [68]:
# Get learning rate using PolynomialDecay
from tensorflow.keras.optimizers.schedules import PolynomialDecay

x = len_dataset_unbatched*0.85 # length of train_ds (unbatched)

num_epochs = 3
num_train_steps = x * num_epochs
lr_scheduler = PolynomialDecay(
    initial_learning_rate=5e-5,
    end_learning_rate=0.,
    decay_steps=num_train_steps
)

In [69]:
# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_scheduler)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

model_1_128k_uncased.compile(optimizer=optimizer, 
              loss=loss, 
              metrics=[acc])

In [71]:
# Fit the model
history = model_1_128k_uncased.fit(
    train_ds,
    validation_data=val_ds,
    epochs=4,
    verbose=1
)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [72]:
# Evaluate the model on test_ds
model_1_128k_uncased.evaluate(test_dataset)



[0.8040470480918884, 0.7290909290313721]

In [73]:
# Download helper functions
!wget https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py

--2021-12-07 01:03:00--  https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10246 (10K) [text/plain]
Saving to: ‘helper_functions.py’


2021-12-07 01:03:00 (52.2 MB/s) - ‘helper_functions.py’ saved [10246/10246]



In [74]:
from helper_functions import calculate_results

In [75]:
# Make predictions
model_1_128k_uncased_pred_probs = model_1_128k_uncased.predict(test_dataset)
model_1_128k_uncased_pred_probs[0], model_1_128k_uncased_pred_probs.shape

(array([6.2831619e-04, 9.7018200e-01, 2.9189603e-02], dtype=float32),
 (1100, 3))

In [76]:
# Convert pred_probs to classes
model_1_128k_uncased_preds = tf.argmax(model_1_128k_uncased_pred_probs, axis=1)
model_1_128k_uncased_preds

<tf.Tensor: shape=(1100,), dtype=int64, numpy=array([1, 0, 0, ..., 0, 1, 1])>

In [77]:
# Calculate model_1 results
model_1_results = calculate_results(y_true=test_labels_encoded,
                                    y_pred=model_1_128k_uncased_preds)
model_1_results

{'accuracy': 72.9090909090909,
 'f1': 0.7316083809301425,
 'precision': 0.7357624165776253,
 'recall': 0.7290909090909091}

In [84]:
import tensorflow as tf

# two input layers, we ensure layer name variables match to dictionary keys in TF dataset
input_ids = tf.keras.layers.Input(shape=(120,), name='input_ids', dtype='int32')
mask = tf.keras.layers.Input(shape=(120,), name='attention_mask', dtype='int32')

# we access the transformer model within our bert object using the bert attribute (eg bert.bert instead of bert)
embeddings = bert128k.bert(input_ids, attention_mask=mask)[0]  # access final activations with [0]

x = tf.keras.layers.LSTM(8, dropout=.4, recurrent_dropout=.4, return_sequences=False) (embeddings) 
# normalize
x = tf.keras.layers.BatchNormalization()(x)
# output
outputs = tf.keras.layers.Dense(3, activation='softmax', name='outputs')(x)

model_2_128k_uncased_LSTM = tf.keras.Model(inputs=[input_ids, mask], outputs=outputs)



In [85]:
model_2_128k_uncased_LSTM.layers[2].trainable = False

# print out model summary
model_2_128k_uncased_LSTM.summary()

Model: "model_8"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 120)]        0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 120)]        0           []                               
                                                                                                  
 bert (TFBertMainLayer)         TFBaseModelOutputWi  184345344   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, 120,                                         

In [86]:
# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) #lr_scheduler
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

model_2_128k_uncased_LSTM.compile(optimizer=optimizer, 
              loss=loss, 
              metrics=[acc])

In [87]:
# Fit the model
history_model_2 = model_2_128k_uncased_LSTM.fit(
    train_ds,
    validation_data=val_ds,
    epochs=3,
    verbose=1
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [88]:
# Make predictions
model_2_128k_uncased_LSTM_pred_probs = model_2_128k_uncased_LSTM.predict(test_dataset)
model_2_128k_uncased_LSTM_pred_probs[0], model_2_128k_uncased_LSTM_pred_probs.shape

(array([0.9814563 , 0.00938745, 0.00915632], dtype=float32), (1100, 3))

In [89]:
# Convert pred_probs to classes
model_2_128k_uncased_LSTM_preds = tf.argmax(model_2_128k_uncased_LSTM_pred_probs, axis=1)
model_2_128k_uncased_LSTM_preds

<tf.Tensor: shape=(1100,), dtype=int64, numpy=array([0, 2, 0, ..., 0, 0, 0])>

In [90]:
# Calculate model_2 results
model_2_results = calculate_results(y_true=test_labels_encoded,
                                    y_pred=model_2_128k_uncased_LSTM_preds)
model_2_results

{'accuracy': 75.72727272727273,
 'f1': 0.7575059480348981,
 'precision': 0.7581581375171819,
 'recall': 0.7572727272727273}

In [92]:
# two input layers, we ensure layer name variables match to dictionary keys in TF dataset
input_ids = tf.keras.layers.Input(shape=(120,), name='input_ids', dtype='int32')
mask = tf.keras.layers.Input(shape=(120,), name='attention_mask', dtype='int32')

# we access the transformer model within our bert object using the bert attribute (eg bert.bert instead of bert)
embeddings = bert128k.bert(input_ids, attention_mask=mask)[0]  # access final activations with [0]

x = tf.keras.layers.GRU(16, return_sequences=False)(embeddings) 
# normalize
x = tf.keras.layers.BatchNormalization()(x)
# output
outputs = tf.keras.layers.Dense(3, activation='softmax', name='outputs')(x)

model_3_128k_uncased_GRU = tf.keras.Model(inputs=[input_ids, mask], outputs=outputs)

In [93]:
model_3_128k_uncased_GRU.layers[2].trainable = False

# print out model summary
model_3_128k_uncased_GRU.summary()

Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 120)]        0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 120)]        0           []                               
                                                                                                  
 bert (TFBertMainLayer)         TFBaseModelOutputWi  184345344   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, 120,                                         

In [94]:
# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) #lr_scheduler
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

model_3_128k_uncased_GRU.compile(optimizer=optimizer, 
              loss=loss, 
              metrics=[acc])

In [95]:
# Fit the model
history_model_3 = model_3_128k_uncased_GRU.fit(
    train_ds,
    validation_data=val_ds,
    epochs=3,
    verbose=1
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [96]:
# Make predictions
model_3_128k_uncased_GRU_pred_probs = model_3_128k_uncased_GRU.predict(test_dataset)
model_3_128k_uncased_GRU_pred_probs[0], model_3_128k_uncased_GRU_pred_probs.shape

(array([0.9847973 , 0.00456741, 0.01063525], dtype=float32), (1100, 3))

In [97]:
# Convert pred_probs to classes
model_3_128k_uncased_GRU_preds = tf.argmax(model_3_128k_uncased_GRU_pred_probs, axis=1)
model_3_128k_uncased_GRU_preds

<tf.Tensor: shape=(1100,), dtype=int64, numpy=array([0, 2, 0, ..., 0, 0, 0])>

In [98]:
# Calculate model_3 results
model_3_results = calculate_results(y_true=test_labels_encoded,
                                    y_pred=model_3_128k_uncased_GRU_preds)
model_3_results

{'accuracy': 76.0,
 'f1': 0.7597449525691167,
 'precision': 0.7599282353908192,
 'recall': 0.76}

In [107]:
# two input layers, we ensure layer name variables match to dictionary keys in TF dataset
input_ids = tf.keras.layers.Input(shape=(120,), name='input_ids', dtype='int32')
mask = tf.keras.layers.Input(shape=(120,), name='attention_mask', dtype='int32')

# we access the transformer model within our bert object using the bert attribute (eg bert.bert instead of bert)
embeddings = bert128k.bert(input_ids, attention_mask=mask)[1]  # access final activations with [0]

x = tf.keras.layers.Dense(16, activation="relu")(embeddings)
# output
outputs = tf.keras.layers.Dense(3, activation='softmax', name='outputs')(x)

model_4_dense = tf.keras.Model(inputs=[input_ids, mask], outputs=outputs)

In [108]:
model_4_dense.layers[2].trainable = False

# print out model summary
model_4_dense.summary()

Model: "model_12"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 120)]        0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 120)]        0           []                               
                                                                                                  
 bert (TFBertMainLayer)         TFBaseModelOutputWi  184345344   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, 120,                                        

In [109]:
# Compile the model
optimizer = tf.keras.optimizers.Adam(lr_scheduler)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

model_4_dense.compile(optimizer=optimizer, 
              loss=loss, 
              metrics=[acc])

In [110]:
# Fit the model
history_model_4 = model_4_dense.fit(
    train_ds,
    validation_data=val_ds,
    epochs=3,
    verbose=1
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [112]:
# Make predictions
model_4_dense_pred_probs = model_4_dense.predict(test_dataset)
model_4_dense_pred_probs[0], model_4_dense_pred_probs.shape

(array([0.9831699 , 0.00322217, 0.01360795], dtype=float32), (1100, 3))

In [113]:
# Convert pred_probs to classes
model_4_dense_preds = tf.argmax(model_4_dense_pred_probs, axis=1)
model_4_dense_preds

<tf.Tensor: shape=(1100,), dtype=int64, numpy=array([0, 2, 0, ..., 0, 0, 0])>

In [115]:
# Calculate model_3 results
model_4_results = calculate_results(y_true=test_labels_encoded,
                                    y_pred=model_4_dense_preds)
model_4_results

{'accuracy': 75.54545454545455,
 'f1': 0.758077003516226,
 'precision': 0.7617458890607534,
 'recall': 0.7554545454545455}