In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%pip install -q transformers

In [None]:
import pandas as pd
import numpy as np
import re
from transformers import BertTokenizer, BertModel
from keras.preprocessing.sequence import pad_sequences

In [None]:
df = pd.read_csv('/content/drive/MyDrive/final_intent_df.csv')
#df = pd.read_csv('final_intent_df.csv')
df.head()

In [None]:
df['pred'].value_counts()

In [None]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained("bert-base-cased")

In [None]:
input_ids = []

for question in df['Query']:
  enc_question = tokenizer.encode(question, add_special_tokens= True)
  input_ids.append(enc_question)

In [None]:
vocab_size = tokenizer.vocab_size
max_len = max([len(question) for question in input_ids])

In [None]:
intent_names = df['pred'].unique()
intent_map = dict((label, idx) for idx, label in enumerate(intent_names))
intent_map

In [None]:
intent_val = df['pred'].map(intent_map).values

In [None]:
tokenizer.vocab_size

In [None]:
%pip install -q tensorflow

In [None]:
from transformers  import TFBertModel, BertConfig, BertTokenizerFast
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical


In [None]:
model_name = 'bert-base-cased'
max_length = max_len

config = BertConfig.from_pretrained(model_name)
config.output_hidden_states = False

tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path=model_name, config=config)



In [None]:
transformer_model = TFBertModel.from_pretrained(model_name, config = config)

In [None]:
bert = transformer_model.layers[0]

input_ids = Input(shape=(max_len,), name='input_ids', dtype='int32')

inputs = {'input_ids': input_ids}

In [None]:
bert_model = bert(inputs)[1]
dropout = Dropout(config.hidden_dropout_prob, name = 'pooled_output')
pooled_output = dropout(bert_model, training = False)

In [None]:
len(intent_map)

In [None]:
inputs

In [None]:
intent = Dense(units=len(intent_map), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name = 'intent')(pooled_output)

outputs = {'intent':intent}

In [None]:
model = Model(inputs = inputs, outputs=outputs, name='Bert_MultiClass')
model.summary()

In [None]:
optimizer = Adam(learning_rate=5e-05, epsilon=1e-08, decay=0.01, clipnorm=1.0)

loss = {'intent':CategoricalCrossentropy(from_logits=True)}
metric = {'intent':CategoricalAccuracy('accuracy')}

In [None]:
model.compile(optimizer=optimizer, loss = loss, metrics= metric)

In [None]:
y_intent = to_categorical(intent_val)

In [None]:
x = tokenizer(text=df['Query'].to_list(), add_special_tokens=True, max_length=max_len, truncation=True, padding=True, return_tensors='tf', return_token_type_ids=False, return_attention_mask=True, verbose = True)

In [None]:
history = model.fit(x={'input_ids':x['input_ids'], 'attention_mask': x['attention_mask']}, y = {'intent': y_intent}, validation_split=0.2, batch_size=32, epochs=3)

In [None]:
import matplotlib.pyplot as plt 
fig = plt.figure(figsize=(10,5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='right')
plt.show()
plt.savefig('accuracy.png')

In [None]:
import matplotlib.pyplot as plt 
fig = plt.figure(figsize=(10,5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='right')
plt.show()
plt.savefig('accuracy.png')