In [None]:
import json
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import os


# Load intents data from JSON file
with open("new.json", 'r') as json_data:
    intents = json.load(json_data)

# Extract patterns and labels from intents data
X = []
y = []

for intent in intents['intents']:
    for pattern in intent['patterns']:
        X.append(pattern)
        y.append(intent['tag'])


tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
X_sequences = tokenizer.texts_to_sequences(X)
max_len = max(len(seq) for seq in X_sequences)
X_padded = pad_sequences(X_sequences, maxlen=max_len)


labels = list(set(y))
label_to_idx = {label: idx for idx, label in enumerate(labels)}
y_numeric = np.array([label_to_idx[label] for label in y])


X_train, X_val, y_train, y_val = train_test_split(X_padded, y_numeric, test_size=0.2, random_state=42)

vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 100


model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len))
model.add(Bidirectional(LSTM(96, dropout=0.5, recurrent_dropout=0.2, kernel_regularizer='l2')))
model.add(Dense(96, activation='relu', kernel_regularizer='l2'))
model.add(Dropout(0.5))
model.add(Dense(len(labels), activation='softmax'))


model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=40, batch_size=16, validation_data=(X_val, y_val), callbacks=[early_stopping])

y_val_probs = model.predict(X_val)
y_val_pred_labels = [labels[np.argmax(prob)] for prob in y_val_probs]
y_val_true_labels = [labels[idx] for idx in y_val]

report = classification_report(y_val_true_labels, y_val_pred_labels)
print("Classification Report:")
print(report)


Epoch 1/40




[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - accuracy: 0.0753 - loss: 6.4294 - val_accuracy: 0.0721 - val_loss: 4.7773
Epoch 2/40
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.1264 - loss: 4.4180 - val_accuracy: 0.0721 - val_loss: 3.5984
Epoch 3/40
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.1390 - loss: 3.4468 - val_accuracy: 0.0721 - val_loss: 3.0575
Epoch 4/40
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.1472 - loss: 2.9824 - val_accuracy: 0.2523 - val_loss: 2.7825
Epoch 5/40
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.1902 - loss: 2.7438 - val_accuracy: 0.2432 - val_loss: 2.6333
Epoch 6/40
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.2068 - loss: 2.5423 - val_accuracy: 0.2523 - val_loss: 2.2303
Epoch 7/40
[1m28/28[0m [32m━━━━━━━━━━━━━━━