In [None]:
import torch
from torch import nn
!pip install torchtext 
!pip install transformers
!pip install seaborn
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from transformers import BertTokenizer, BertForSequenceClassification

In [5]:
import pandas as pd
from torchtext.data import Field

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [7]:
a=tokenizer.encode('Hello this is me. My name is hello')
print(a)

[101, 7592, 2023, 2003, 2033, 1012, 2026, 2171, 2003, 7592, 102]


In [6]:
MAX_SEQ_LEN = 128
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)
df_train=pd.read_csv('switchboard_train.csv')
# print(df_train.head())
text_field = Field(tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True, use_vocab=False, fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_token=UNK_INDEX)
label_field = Field(sequential=False, batch_first=True, use_vocab=True)
preprocessed_text = df_train['clean_text'].apply(lambda x: text_field.preprocess(str(x)))
# text_field.build_vocab(
#     preprocessed_text, 
#     vectors='glove.6B.50d')



In [8]:
from torchtext.data import TabularDataset
train_datafields=[('swda_filename',None),('ptb_basename',None),('conversation_no',None),('transcript_index',None),('act_tag',None),('act_label_1',label_field),('act_label_2',None),('act_label_relation',None),('caller',None),('utterance_index',None),('subutterance_index',None),('clean_text',text_field),('topic_description',None),('prompt',None)]
train,validation,test=TabularDataset.splits(path='',train='switchboard_train.csv',validation='switchboard_valid.csv',test='switchboard_test.csv',format='csv',skip_header=True,fields=train_datafields)



In [9]:
label_field.build_vocab(train)

In [10]:
print(len(label_field.vocab))

55


In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

batch_size=8


from torchtext.data import Iterator, BucketIterator

train_iter, val_iter = BucketIterator.splits((train, validation),batch_sizes=(batch_size,batch_size),device=device,sort_key=lambda x: len(x.clean_text),sort_within_batch=False,repeat=False)
test_iter = Iterator(test, batch_size=batch_size, device=device, sort=False, sort_within_batch=False, repeat=False)



In [12]:
class BERT(nn.Module):
    def __init__(self):
        super(BERT, self).__init__()

        options_name = 'bert-base-uncased'
        self.encoder = BertForSequenceClassification.from_pretrained(options_name)

    def forward(self, text, label):
        loss, text_fea = self.encoder(text, labels=label)[:2]

        return loss, text_fea

In [13]:
# Save and Load Functions

def save_checkpoint(save_path, model, valid_loss):
    if save_path == None:
        return
    state_dict = {'model_state_dict': model.state_dict(),
                  'valid_loss': valid_loss}
    torch.save(state_dict, save_path)
    print(f'Model saved to ==> {save_path}')

def load_checkpoint(load_path, model):
    if load_path==None:
        return
    state_dict = torch.load(load_path, map_location=device)
    print(f'Model loaded from <== {load_path}')
    model.load_state_dict(state_dict['model_state_dict'])
    return state_dict['valid_loss']

def save_metrics(save_path, train_loss_list, valid_loss_list, global_steps_list):
    if save_path == None:
        return
    state_dict = {'train_loss_list': train_loss_list,
                  'valid_loss_list': valid_loss_list,
                  'global_steps_list': global_steps_list}
    torch.save(state_dict, save_path)
    print(f'Model saved to ==> {save_path}')


def load_metrics(load_path):
    if load_path==None:
        return
    state_dict = torch.load(load_path, map_location=device)
    print(f'Model loaded from <== {load_path}')
    return state_dict['train_loss_list'], state_dict['valid_loss_list'], state_dict['global_steps_list']

In [14]:
# Training Function
destination_folder='BERT'
def train(model,
          optimizer,
          criterion = nn.BCELoss(),
          train_loader = train_iter,
          valid_loader = val_iter,
          num_epochs = 5,
          eval_every = len(train_iter) // 2,
          file_path = destination_folder,
          best_valid_loss = float("Inf")):
    
    # initialize running values
    running_loss = 0.0
    valid_running_loss = 0.0
    global_step = 0
    train_loss_list = []
    valid_loss_list = []
    global_steps_list = []

    # training loop
    model.train()
    for epoch in range(num_epochs):
        for batch in train_loader:
            labels = batch.act_label_1.type(torch.LongTensor)            
            labels = labels.to(device)
            titletext = batch.clean_text.type(torch.LongTensor)  
            titletext = titletext.to(device)
            print(labels,titletext)
            output = model(titletext, labels)
            loss, _ = output

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # update running values
            running_loss += loss.item()
            global_step += 1

            # evaluation step
            if global_step % eval_every == 0:
                model.eval()
                with torch.no_grad():                    

                    # validation loop
                    for val_batch in valid_loader:
                        labels = val_batch.act_label_1.type(torch.LongTensor)           
                        labels = labels.to(device)
                        titletext = val_batch.clean_text.type(torch.LongTensor)  
                        titletext = titletext.to(device)
                        output = model(titletext, labels)
                        loss, _ = output
                        
                        valid_running_loss += loss.item()

                # evaluation
                average_train_loss = running_loss / eval_every
                average_valid_loss = valid_running_loss / len(valid_loader)
                train_loss_list.append(average_train_loss)
                valid_loss_list.append(average_valid_loss)
                global_steps_list.append(global_step)

                # resetting running values
                running_loss = 0.0                
                valid_running_loss = 0.0
                model.train()

                # print progress
                print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'
                      .format(epoch+1, num_epochs, global_step, num_epochs*len(train_loader),
                              average_train_loss, average_valid_loss))
                
                # checkpoint
                if best_valid_loss > average_valid_loss:
                    best_valid_loss = average_valid_loss
                    save_checkpoint(file_path + '/' + 'model.pt', model, best_valid_loss)
                    save_metrics(file_path + '/' + 'metrics.pt', train_loss_list, valid_loss_list, global_steps_list)
    
    save_metrics(file_path + '/' + 'metrics.pt', train_loss_list, valid_loss_list, global_steps_list)
    print('Finished Training!')

In [None]:
model = BERT().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

train(model=model, optimizer=optimizer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

tensor([ 9,  1,  3,  3,  3,  5,  2,  5,  1,  1,  1,  3,  1, 17,  3,  1,  6,  1,
         5,  1,  5,  1,  4,  1,  2, 29,  8,  1,  5, 10,  9,  5]) tensor([[ 101, 1045, 2123,  ...,    0,    0,    0],
        [ 101, 1045, 3246,  ...,    0,    0,    0],
        [ 101, 2009, 2515,  ...,    0,    0,    0],
        ...,
        [ 101, 7910, 1011,  ...,    0,    0,    0],
        [ 101, 1045, 2228,  ...,    0,    0,    0],
        [ 101, 2009,  102,  ...,    0,    0,    0]])


In [3]:
!nvidia-smi

In [5]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu
