In [9]:
!pip install transformers
!pip install torch
!pip install scikit-learn

[0m

In [62]:
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import ElectraTokenizer, ElectraForSequenceClassification, AdamW

In [63]:
# Load the SNIPS NLU dataset
df = pd.read_csv('/kaggle/input/vadataset/ChatbotCorpus.csv')

# Split the dataset into train and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [64]:
# Load the ELECTRA tokenizer
tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')

# Tokenize the input data
train_encodings = tokenizer(train_df['text'].tolist(), truncation=True, padding=True)
test_encodings = tokenizer(test_df['text'].tolist(), truncation=True, padding=True)

In [65]:
# Create PyTorch datasets
class SNIPSDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = SNIPSDataset(train_encodings, train_df['intent'].tolist())
test_dataset = SNIPSDataset(test_encodings, test_df['intent'].tolist())


In [66]:
train_df['intent'].unique()

array(['DepartureTime', 'FindConnection'], dtype=object)

In [67]:
from sklearn.preprocessing import LabelEncoder

# Encode the intent labels as integers
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_df['intent'])
test_labels = label_encoder.transform(test_df['intent'])

train_dataset = SNIPSDataset(train_encodings, train_labels)
test_dataset = SNIPSDataset(test_encodings, test_labels)

# Load the ELECTRA model
model = ElectraForSequenceClassification.from_pretrained('google/electra-small-discriminator', num_labels=2)

Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier

In [68]:
# Fine-tune the model
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)

optim = AdamW(model.parameters(), lr=5e-5)
num_correct = 0
num_total = 0

for epoch in range(5):
    epoch_loss = 0
    for batch in train_loader:
        optim.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        epoch_loss += loss.item()
        loss.backward()
        optim.step()
        _, predicted = torch.max(outputs.logits, 1)
        num_correct += (predicted == labels).sum().item()
        num_total += labels.size(0)
    print(f'Epoch {epoch + 1} loss: {epoch_loss}')
    print(f'Accuracy {epoch + 1}: {num_correct / num_total}')

Epoch 1 loss: 7.05332887172699
Accuracy 1: 0.75
Epoch 2 loss: 5.260806977748871
Accuracy 2: 0.8536585365853658
Epoch 3 loss: 3.566635251045227
Accuracy 3: 0.9004065040650406
Epoch 4 loss: 2.36725090444088
Accuracy 4: 0.9237804878048781
Epoch 5 loss: 1.5401325896382332
Accuracy 5: 0.9390243902439024


In [69]:
# Evaluate the model on the test set
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False)

model.eval()

num_correct = 0
num_total = 0

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        _, predicted = torch.max(outputs.logits, 1)
        num_correct += (predicted == labels).sum().item()
        num_total += labels.size(0)

print(f'Test accuracy: {num_correct / num_total}')

Test accuracy: 0.9761904761904762
