In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report

# Load the datasets
train_data = pd.read_csv('atis_intents_train.csv')
test_data = pd.read_csv('atis_intents_test.csv')

# Rename columns for consistency
train_data.columns = ['intent', 'text']
test_data.columns = ['intent', 'text']

# Combine datasets for preprocessing
texts = pd.concat([train_data['text'], test_data['text']])
labels = pd.concat([train_data['intent'], test_data['intent']])

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split back into training and testing sets
train_texts, test_texts, train_labels, test_labels = train_test_split(
    texts, encoded_labels, test_size=0.2, random_state=42
)

# Tokenize and vectorize text
tokenizer = Tokenizer(num_words=10000)  # Use the top 10,000 words
tokenizer.fit_on_texts(train_texts)

X_train = tokenizer.texts_to_sequences(train_texts)
X_test = tokenizer.texts_to_sequences(test_texts)

# Pad sequences to the same length
max_len = 50
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

# Determine the number of classes
num_classes = len(label_encoder.classes_)

# Define the neural network model
model = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=max_len),
    Bidirectional(LSTM(128, return_sequences=True)),
    Dropout(0.5),
    Bidirectional(LSTM(64)),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, train_labels,
    validation_split=0.2,
    epochs=20,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, test_labels, verbose=0)
print(f"\nTest Accuracy: {accuracy * 100:.2f}%")

# Generate predictions and classification report
y_pred = np.argmax(model.predict(X_test), axis=1)
print("\nClassification Report:")
print(classification_report(test_labels, y_pred, target_names=label_encoder.classes_))

# Save the model
model.save('intent_classification_model.h5')




Epoch 1/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 180ms/step - accuracy: 0.7252 - loss: 1.0968 - val_accuracy: 0.8602 - val_loss: 0.4800
Epoch 2/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 159ms/step - accuracy: 0.8858 - loss: 0.3944 - val_accuracy: 0.9179 - val_loss: 0.2913
Epoch 3/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 156ms/step - accuracy: 0.9295 - loss: 0.2374 - val_accuracy: 0.9456 - val_loss: 0.2264
Epoch 4/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 154ms/step - accuracy: 0.9544 - loss: 0.1595 - val_accuracy: 0.9589 - val_loss: 0.1437
Epoch 5/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 161ms/step - accuracy: 0.9806 - loss: 0.0776 - val_accuracy: 0.9689 - val_loss: 0.1355
Epoch 6/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 163ms/step - accuracy: 0.9867 - loss: 0.0665 - val_accuracy: 0.9745 - val_loss: 0.1279
Epoch 7/20




Classification Report:
                     precision    recall  f1-score   support

  atis_abbreviation       0.97      0.89      0.93        36
      atis_aircraft       0.92      0.73      0.81        15
       atis_airfare       0.97      0.98      0.97        98
       atis_airline       0.91      0.94      0.93        33
        atis_flight       0.99      1.00      0.99       865
   atis_flight_time       0.73      0.89      0.80         9
atis_ground_service       1.00      0.97      0.98        58
      atis_quantity       1.00      0.92      0.96        13

           accuracy                           0.98      1127
          macro avg       0.94      0.91      0.92      1127
       weighted avg       0.98      0.98      0.98      1127

