In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
import torch
from sklearn.preprocessing import LabelEncoder
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
import sys 

In [2]:
# Load dataset
data = pd.read_csv(r"C:\Users\aadya\Desktop\ChatBot\Flight.csv") 

In [3]:
data.head(10)

Unnamed: 0,Airline,Date,Flight_Duration,Stops,Time_total,class_type,Flight_price
0,Spirit Airlines,2023-12-01,12h 44mBOS-ORD,1 stop,9:44 pm – 9:28 am+1,Economy,$82
1,American Airlines,2023-12-01,2h 48mBOS-ORD,nonstop,6:52 am – 8:40 am,Basic Economy,$89
2,Spirit Airlines,2023-12-01,12h 44mBOS-ORD,1 stop,9:44 pm – 9:28 am+1,Economy,$82
3,JetBlue,2023-12-01,3h 01mBOS-ORD,nonstop,4:54 pm – 6:55 pm,Blue Basic,$109
4,United Airlines,2023-12-01,2h 57mBOS-ORD,nonstop,6:00 am – 7:57 am,Basic Economy,$106
5,JetBlue,2023-12-01,3h 00mBOS-ORD,nonstop,7:05 am – 9:05 am,Blue Basic,$119
6,Delta,2023-12-01,3h 05mBOS-ORD,nonstop,5:45 pm – 7:50 pm,Basic Economy,$139
7,Delta,2023-12-01,2h 59mBOS-ORD,nonstop,6:05 am – 8:04 am,Basic Economy,$139
8,American Airlines,2023-12-01,2h 58mBOS-ORD,nonstop,5:40 pm – 7:38 pm,Basic Economy,$144
9,United Airlines,2023-12-01,2h 57mBOS-ORD,nonstop,6:28 pm – 8:25 pm,Basic Economy,$142


In [4]:
# Step 1: Data Acquisition
X = data['Airline']
y = data['class_type']

In [5]:
# Step 2: Intent Recognition
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Initialize NLP pipeline for entity extraction
nlp = spacy.load("en_core_web_sm")  

In [7]:
# Define intent classifier pipeline
intent_classifier = Pipeline([
    ("tfidf", TfidfVectorizer()),
    ("clf", RandomForestClassifier()) 
])

In [8]:
# Train intent classifier
intent_classifier.fit(X_train, y_train)

In [9]:

# Evaluate intent classifier
intent_predictions = intent_classifier.predict(X_test)
accuracy = accuracy_score(y_test, intent_predictions)
precision = precision_score(y_test, intent_predictions, average='weighted')
recall = recall_score(y_test, intent_predictions, average='weighted')
f1 = f1_score(y_test, intent_predictions, average='weighted')

  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
print("Intent Classification Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Intent Classification Metrics:
Accuracy: 0.8293216630196937
Precision: 0.7376523913723039
Recall: 0.8293216630196937
F1 Score: 0.7727208035955434


In [11]:
# Step 3: Entity Extraction
# Define named entity recognition (NER) function
def extract_entities(text):
    doc = nlp(text)
    entities = []
    for ent in doc.ents:
        entities.append((ent.text, ent.label_))
    return entities

In [12]:
# Example usage of NER function
text = "Book a flight from New York to London tomorrow"
entities = extract_entities(text)
print("Entities:", entities)

Entities: [('New York', 'GPE'), ('London', 'GPE'), ('tomorrow', 'DATE')]


In [13]:
# Step 4: Slot Filling and Context Handling
# Define functions for slot filling based on extracted entities and maintaining conversation context

# Define a function to fill slots based on extracted entities
def fill_slots(entities):
    slots = {}
    for entity, label in entities:
        if label == 'DATE':
            slots['date'] = entity
        elif label == 'LOC':
            slots['location'] = entity
        # Add more conditions for other entity types as needed
    return slots

In [14]:
# Example usage of slot filling function
entities = [('New York', 'LOC'), ('December 25th', 'DATE')]
slots = fill_slots(entities)
print("Slots:", slots)

Slots: {'location': 'New York', 'date': 'December 25th'}


In [15]:
# Define a function to maintain conversation context
def manage_context(previous_context, current_intent, current_slots):
    # Implement logic to maintain context based on previous context, current intent, and slots
    # Example: Update previous context with current intent and slots
    updated_context = {
        'intent': current_intent,
        'slots': current_slots
    }
    return updated_context

In [16]:
# Example usage of context management function
previous_context = {'intent': 'BookFlight', 'slots': {'from': 'New York'}}
current_intent = 'GetWeather'
current_slots = {'location': 'London'}
updated_context = manage_context(previous_context, current_intent, current_slots)
print("Updated Context:", updated_context)

Updated Context: {'intent': 'GetWeather', 'slots': {'location': 'London'}}


In [17]:
# Step 5: Dialog Management
# Define rules or a simple model to generate responses based on intents and slots
# We'll create a simple rule-based dialog management system

def generate_response(intent, slots):
    if intent == 'BookFlight':
        if 'from' in slots and 'to' in slots and 'date' in slots:
            return f"Booking flight from {slots['from']} to {slots['to']} on {slots['date']}"
        else:
            return "Please provide missing information to book a flight."
    elif intent == 'GetWeather':
        if 'location' in slots and 'date' in slots:
            return f"Fetching weather forecast for {slots['location']} on {slots['date']}"
        else:
            return "Please provide location and date for weather forecast."
    else:
        return "I'm sorry, I didn't understand that."


In [18]:
# Example usage of response generation function
intent = 'BookFlight'
slots = {'from': 'New York', 'to': 'London', 'date': 'December 25th'}
response = generate_response(intent, slots)
print("Response:", response)

Response: Booking flight from New York to London on December 25th


In [19]:
# Model Evaluation
# Evaluate Intent Recognition Model
intent_accuracy = accuracy_score(y_test, intent_predictions)
intent_precision = precision_score(y_test, intent_predictions, average='weighted')
intent_recall = recall_score(y_test, intent_predictions, average='weighted')
intent_f1 = f1_score(y_test, intent_predictions, average='weighted')

  _warn_prf(average, modifier, msg_start, len(result))


In [20]:
print("Intent Recognition Metrics:")
print("Accuracy:", intent_accuracy)
print("Precision:", intent_precision)
print("Recall:", intent_recall)
print("F1 Score:", intent_f1)

Intent Recognition Metrics:
Accuracy: 0.8293216630196937
Precision: 0.7376523913723039
Recall: 0.8293216630196937
F1 Score: 0.7727208035955434


In [21]:
# BERT Fine-tuning for Intent Recognition
# Load BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
num_labels = len(set(y))
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
# Preprocess labels using LabelEncoder
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [23]:
# Tokenize and encode input data
X_train_encoded = tokenizer(list(X_train), padding=True, truncation=True, return_tensors="pt")
X_test_encoded = tokenizer(list(X_test), padding=True, truncation=True, return_tensors="pt")

In [24]:
# Create DataLoader for train and test sets
batch_size = 32
train_dataset = TensorDataset(X_train_encoded['input_ids'], X_train_encoded['attention_mask'], torch.tensor(y_train_encoded))
test_dataset = TensorDataset(X_test_encoded['input_ids'], X_test_encoded['attention_mask'], torch.tensor(y_test_encoded))
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

In [25]:
# Fine-tuning parameters
learning_rate = 2e-5
num_epochs = 3

In [26]:
# Define optimizer using torch.optim.AdamW
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

In [27]:
# Fine-tuning loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.train()  # Set model to training mode
for epoch in range(num_epochs):
    for batch in train_dataloader:
        input_ids, attention_mask, labels = batch
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels.long())  # Cast labels to torch.long
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        
        # Check for user input to stop execution
        user_input = input("Enter 'q' to stop execution: ")
        if user_input == 'q':
            print("Execution stopped by user.")
            sys.exit()

Enter 'q' to stop execution: Book a flight from New York to London tomorrow
Enter 'q' to stop execution: q
Execution stopped by user.


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [28]:
# Evaluation
model.eval()  # Set model to evaluation mode
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in test_dataloader:
        input_ids, attention_mask, labels = batch
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
        
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = logits.argmax(dim=-1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())

In [29]:
# Calculate evaluation metrics
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='weighted')
recall = recall_score(all_labels, all_preds, average='weighted')
f1 = f1_score(all_labels, all_preds, average='weighted')

  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
# Print evaluation metrics
print("BERT Intent Classification Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

BERT Intent Classification Metrics:
Accuracy: 0.5645514223194749
Precision: 0.3194172521018081
Recall: 0.5645514223194749
F1 Score: 0.4079951455418054
