<a href="https://colab.research.google.com/github/abdulhirshad/Multiclass-classifier-Bert/blob/main/Multi_class_classification_using_Bert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import re
  
# reading given tsv file
with open("train.tsv", 'r') as myfile:  
  with open("train.csv", 'w') as csv_file:
    for line in myfile:
        
      # Replace every tab with comma
      fileContent = re.sub("\t", ",", line)
        
      # Writing into csv file
      csv_file.write(fileContent)

In [5]:
import pandas as pd
import numpy as np


In [6]:
df = pd.read_csv('train.tsv', sep='\t')
df.head()

Unnamed: 0,PhraseId,SentenceId,Phrase,Sentiment
0,1,1,A series of escapades demonstrating the adage ...,1
1,2,1,A series of escapades demonstrating the adage ...,2
2,3,1,A series,2
3,4,1,A,2
4,5,1,series,2


In [7]:
seq_len = 512
num_samples = len(df)

Xids = np.zeros((num_samples,seq_len))
Xmask = np.zeros((num_samples,seq_len))

Xids.shape

(156060, 512)

In [8]:
!pip install transformers



In [9]:
from transformers import BertTokenizer

In [10]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

for i, phrase in enumerate(df['Phrase']):
  tokens = tokenizer.encode_plus(phrase,max_length = seq_len, truncation = True,
                                 padding = 'max_length', add_special_tokens = True,
                                 return_tensors = 'tf')
  
  Xids[i,:]=tokens['input_ids']
  Xmask[i,:]=tokens['attention_mask']

In [11]:
Xids

array([[  101.,   138.,  1326., ...,     0.,     0.,     0.],
       [  101.,   138.,  1326., ...,     0.,     0.,     0.],
       [  101.,   138.,  1326., ...,     0.,     0.,     0.],
       ...,
       [  101.,   170., 25247., ...,     0.,     0.,     0.],
       [  101.,   170., 25247., ...,     0.,     0.,     0.],
       [  101., 22572., 12148., ...,     0.,     0.,     0.]])

In [12]:
Xmask

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       ...,
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.]])

In [13]:
array = df['Sentiment'].values
array

array([1, 2, 2, ..., 3, 2, 2])

In [14]:
labels = np.zeros((num_samples,array.max()+1))
labels.shape


(156060, 5)

In [15]:
labels[np.arange(num_samples), array] = 1

In [16]:
labels

array([[0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0.],
       ...,
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0.]])

In [17]:
import tensorflow as tf

In [18]:
dataset = tf.data.Dataset.from_tensor_slices((Xids,Xmask,labels))
dataset.take(1)

<TakeDataset shapes: ((512,), (512,), (5,)), types: (tf.float64, tf.float64, tf.float64)>

Map Xids and Xmask into a dictionary

In [19]:
def map_fun(input_ids,masks,labels):
  return{'input_ids': input_ids,'attention_mask': masks}, labels

In [20]:
dataset = dataset.map(map_fun)

In [21]:
dataset.take(1)

<TakeDataset shapes: ({input_ids: (512,), attention_mask: (512,)}, (5,)), types: ({input_ids: tf.float64, attention_mask: tf.float64}, tf.float64)>

In [22]:
batch_size = 16
dataset = dataset.shuffle(10000).batch(batch_size, drop_remainder = True)
dataset.take(1)

<TakeDataset shapes: ({input_ids: (16, 512), attention_mask: (16, 512)}, (16, 5)), types: ({input_ids: tf.float64, attention_mask: tf.float64}, tf.float64)>

In [23]:
split = 0.8

size = int((num_samples/batch_size) * split)

In [24]:
train_ds = dataset.take(size)
test_ds = dataset.skip(size)

del dataset

In [25]:
from transformers import TFAutoModel

bert = TFAutoModel.from_pretrained('bert-base-cased')

bert.summary()

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Model: "tf_bert_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  108310272 
                                                                 
Total params: 108,310,272
Trainable params: 108,310,272
Non-trainable params: 0
_________________________________________________________________


In [26]:
input_ids = tf.keras.layers.Input(shape = (seq_len),name = 'input_ids',dtype='int32')
mask = tf.keras.layers.Input(shape = (seq_len),name = 'attention_mask',dtype='int32')

embeddings = bert.bert(input_ids,attention_mask=mask)[1]

x = tf.keras.layers.Dense(1024,activation='relu')(embeddings)
y = tf.keras.layers.Dense(array.max()+1, activation = 'softmax', name = 'outputs')(x)

In [27]:
model = tf.keras.Model(inputs = [input_ids,mask], outputs = y)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 512)]        0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 512)]        0           []                               
                                                                                                  
 bert (TFBertMainLayer)         TFBaseModelOutputWi  108310272   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, 512,                                           

In [28]:
optimizer = tf.keras.optimizers.Adam(lr = 1e-5,decay = 1e-6)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

  super(Adam, self).__init__(name, **kwargs)


In [29]:
model.compile(optimizer = optimizer, loss = loss, metrics = [acc])

In [1]:
a = model.fit(train_ds,validation_data=test_ds,epochs = 1)

NameError: ignored