In [9]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset
from sklearn.metrics import f1_score
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
from transformers import EarlyStoppingCallback


In [None]:
# Loading the dataset
df = pd.read_csv("intent_dataset.csv")

# Encode Lables
labels = {label : idx for idx, label in enumerate(df["intent"].unique())}
df["label"] = df["intent"].map(labels)



# Split data
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df["question"].tolist(), df["label"].tolist(), test_size = 0.2, random_state = 42
)
print(f"Training examples: {len(train_texts)}")
print(f"Validation examples: {len(val_texts)}")
#print(df.count(df['intent'] == "get_teams_in_year"))

intent_rep = df['intent'].value_counts()
print(intent_rep)


Training examples: 640
Validation examples: 160
intent
get_teams_in_year                   80
get_driver_wins                     80
get_championship_winner             80
get_driver_with_most_wins           80
get_constructors_championship       80
get_race_winners_in_year            80
get_championship_runner_up          80
get_drivers_with_multiple_titles    80
get_driver_with_most_poles          80
get_car_in_season                   80
Name: count, dtype: int64


In [11]:
# Tokenizer 
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

# Tokenization 
def tokenize_function(texts) : 
    return tokenizer (texts, padding = True, truncation = True, max_length = 128, return_tensor="pt")

train_encodings = tokenize_function(train_texts)
val_encondings = tokenize_function(val_texts)

Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword arguments {'return_tensor': 'pt'} not recognized.
Keyword argume

In [12]:
# Custom dataset class
class IntentDataset (Dataset) :
    def __init__ (self, encodings, labels):
        self.encodings = encodings 
        self.labels = labels
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx) :
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

train_dataset = IntentDataset(train_encodings, train_labels)
val_dataset = IntentDataset(val_encondings, val_labels)

In [13]:
# Model
model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels = len(labels))


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
# training arguments
training_args = TrainingArguments(
    output_dir ="./results",
    eval_strategy = "epoch",
    save_strategy= "epoch", 
    per_device_train_batch_size = 4,
    per_device_eval_batch_size= 4,
    num_train_epochs= 20,
    learning_rate= 5e-5,
    weight_decay= 0.02,
    warmup_steps=100,
    load_best_model_at_end=True,
)


# Trainer
trainer = Trainer(
    model =model, 
    args = training_args ,
    train_dataset= train_dataset,
    eval_dataset= val_dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
)

# Train Model
trainer.train()

# Save Model
model.save_pretrained("intent_classifier_model")
tokenizer.save_pretrained("intent_classifier_model")

print("Model training complete! The model has been saved.")

# Evaluate the model
results = trainer.evaluate()

# Print the evaluation results
print(f"Validation Results: {results}")

Epoch,Training Loss,Validation Loss
1,No log,0.25441
2,No log,0.150425
3,No log,0.125867
4,0.537800,0.074619
5,0.537800,0.084674
6,0.537800,0.084148
7,0.013300,0.072179
8,0.013300,0.097211
9,0.013300,0.110097
10,0.017300,0.100266


Model training complete! The model has been saved.


Validation Results: {'eval_loss': 0.07217920571565628, 'eval_runtime': 1.1655, 'eval_samples_per_second': 137.283, 'eval_steps_per_second': 34.321, 'epoch': 12.0}


In [None]:
# Function to predict intent for a new question
def predict_intent(question):
    encoding = tokenizer(question, return_tensors="pt", padding=True, truncation=True, max_length=128)
    outputs = model(**encoding)
    logits = outputs.logits
    print(logits)
    predicted_label = torch.argmax(logits, dim=-1).item()
    intent = list(labels.keys())[predicted_label]
    print(labels.keys())
    return intent

# Test prediction
new_question = "Who is the driver with most wins ? "
predicted_intent = predict_intent(new_question)
print(f"Predicted Intent: {predicted_intent}")


tensor([[-2.4362, -2.0982, -2.1671,  7.9313, -3.7504, -1.5755, -3.0004, -1.5476,
         -1.4873, -3.3896]], grad_fn=<AddmmBackward0>)
dict_keys(['get_teams_in_year', 'get_driver_wins', 'get_championship_winner', 'get_driver_with_most_wins', 'get_constructors_championship', 'get_race_winners_in_year', 'get_championship_runner_up', 'get_drivers_with_multiple_titles', 'get_driver_with_most_poles', 'get_car_in_season'])
Predicted Intent: get_driver_with_most_wins
{'get_teams_in_year': 0, 'get_driver_wins': 1, 'get_championship_winner': 2, 'get_driver_with_most_wins': 3, 'get_constructors_championship': 4, 'get_race_winners_in_year': 5, 'get_championship_runner_up': 6, 'get_drivers_with_multiple_titles': 7, 'get_driver_with_most_poles': 8, 'get_car_in_season': 9}


In [16]:
# Generate predictions for the validation set
predictions = trainer.predict(val_dataset)

# Get the predicted labels
predicted_labels = np.argmax(predictions.predictions, axis=-1)

# Get the true labels
true_labels = predictions.label_ids

# Map numerical labels to intent names
y_true = [list(labels.keys())[label] for label in true_labels] # True intents
y_pred = [list(labels.keys())[label] for label in predicted_labels] # Predicted intents

In [17]:
# Metrics
# Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy: {accuracy:.4f}')

# Precision, Recall, F1-Score (Per-Class Metrics)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')  # Use 'micro' or 'macro' for other types of averaging
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-Score: {f1:.4f}')

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

# Classification Report (includes precision, recall, F1-score per class)
print('Classification Report:')
print(classification_report(y_true, y_pred))


Accuracy: 0.9875
Precision: 0.9882
Recall: 0.9875
F1-Score: 0.9875
Confusion Matrix:
[[19  0  0  1  0  0  0  0  0  0]
 [ 0 18  0  0  0  0  0  0  0  0]
 [ 0  0 14  0  0  0  0  0  0  0]
 [ 0  0  0 19  0  0  0  0  0  0]
 [ 0  0  0  0 13  0  0  0  0  0]
 [ 0  0  0  0  0 16  1  0  0  0]
 [ 0  0  0  0  0  0 15  0  0  0]
 [ 0  0  0  0  0  0  0 16  0  0]
 [ 0  0  0  0  0  0  0  0  9  0]
 [ 0  0  0  0  0  0  0  0  0 19]]
Classification Report:
                                  precision    recall  f1-score   support

               get_car_in_season       1.00      0.95      0.97        20
      get_championship_runner_up       1.00      1.00      1.00        18
         get_championship_winner       1.00      1.00      1.00        14
   get_constructors_championship       0.95      1.00      0.97        19
                 get_driver_wins       1.00      1.00      1.00        13
      get_driver_with_most_poles       1.00      0.94      0.97        17
       get_driver_with_most_wins       0.9