
### Bước 0: Thiết lập môi trường và tải dữ liệu

In [2]:
import pandas as pd
from google.colab import files
uploaded = files.upload()
# Dữ liệu có thể được phân tách bằng tab và không có header
df_train = pd.read_csv('train.csv', sep='\t', header=None, names=['text', 'intent'])
df_val = pd.read_csv('val.csv', sep='\t', header=None, names=['text', 'intent'])
df_test = pd.read_csv('test.csv', sep='\t', header=None, names=['text', 'intent'])

# tách data thành 2 cột text và intent
df_train[['text', 'intent']] = df_train['text'].str.rsplit(',', n=1, expand=True)
df_val[['text', 'intent']] = df_val['text'].str.rsplit(',', n=1, expand=True)
df_test[['text', 'intent']] = df_test['text'].str.rsplit(',', n=1, expand=True)

print("Train shape:", df_train.shape)
print("Validation shape:", df_val.shape)
print("Test shape:", df_test.shape)

df_train.head()

Saving test.csv to test.csv
Saving train.csv to train.csv
Saving val.csv to val.csv
Train shape: (8955, 2)
Validation shape: (1077, 2)
Test shape: (1077, 2)


Unnamed: 0,text,intent
0,text,category
1,what alarms do i have set right now,alarm_query
2,checkout today alarm of meeting,alarm_query
3,report alarm settings,alarm_query
4,see see for me the alarms that you have set to...,alarm_query


In [3]:
# Tiền xử lí
from sklearn.preprocessing import LabelEncoder

# Loại bỏ trích dẫn khỏi intent
df_train['intent'] = df_train['intent'].str.replace('"', '', regex=False)
df_val['intent'] = df_val['intent'].str.replace('"', '', regex=False)
df_test['intent'] = df_test['intent'].str.replace('"', '', regex=False)

# Khởi tạo labelencoder
label_encoder = LabelEncoder()

# huấn luyện cột intent
df_train['intent_encoded'] = label_encoder.fit_transform(df_train['intent'])
df_val['intent_encoded'] = label_encoder.transform(df_val['intent'])
df_test['intent_encoded'] = label_encoder.transform(df_test['intent'])

# Hiển thị vài dòng đầu
display(df_train.head())

# Hiển thị ánh xạ sau khi huấn luyện
print("Label mapping:")
for i, intent in enumerate(label_encoder.classes_):
    print(f"{intent}: {i}")

Unnamed: 0,text,intent,intent_encoded
0,text,category,9
1,what alarms do i have set right now,alarm_query,0
2,checkout today alarm of meeting,alarm_query,0
3,report alarm settings,alarm_query,0
4,see see for me the alarms that you have set to...,alarm_query,0


Label mapping:
alarm_query: 0
alarm_remove: 1
alarm_set: 2
audio_volume_down: 3
audio_volume_mute: 4
audio_volume_up: 5
calendar_query: 6
calendar_remove: 7
calendar_set: 8
category: 9
cooking_recipe: 10
datetime_convert: 11
datetime_query: 12
email_addcontact: 13
email_query: 14
email_querycontact: 15
email_sendemail: 16
general_affirm: 17
general_commandstop: 18
general_confirm: 19
general_dontcare: 20
general_explain: 21
general_joke: 22
general_negate: 23
general_praise: 24
general_quirky: 25
general_repeat: 26
iot_cleaning: 27
iot_coffee: 28
iot_hue_lightchange: 29
iot_hue_lightdim: 30
iot_hue_lightoff: 31
iot_hue_lighton: 32
iot_hue_lightup: 33
iot_wemo_off: 34
iot_wemo_on: 35
lists_createoradd: 36
lists_query: 37
lists_remove: 38
music_likeness: 39
music_query: 40
music_settings: 41
news_query: 42
play_audiobook: 43
play_game: 44
play_music: 45
play_podcasts: 46
play_radio: 47
qa_currency: 48
qa_definition: 49
qa_factoid: 50
qa_maths: 51
qa_stock: 52
recommendation_events: 53
re

### Nhiệm vụ 1: Pipeline TF-IDF + Logistic Regression

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# Tạo một pipeline với TfidfVectorizer và LogisticRegression
tfidf_lr_pipeline = make_pipeline(TfidfVectorizer(max_features=5000), LogisticRegression(max_iter=1000))

# Huấn luyện trên tập train
tfidf_lr_pipeline.fit(df_train['text'], df_train['intent_encoded'])

# Dự đoán trên tập test
yPred = tfidf_lr_pipeline.predict(df_test['text'])

# Tính các thông số đánh giá
accuracy = accuracy_score(df_test['intent_encoded'], yPred)
precision = precision_score(df_test['intent_encoded'], yPred, average='weighted')
recall = recall_score(df_test['intent_encoded'], yPred, average='weighted')
f1 = f1_score(df_test['intent_encoded'], yPred, average='weighted')
report = classification_report(df_test['intent_encoded'], yPred)
# In
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")
print(f"Classification report: {report}")

Accuracy: 0.8356545961002786
Precision: 0.8405884523852166
Recall: 0.8356545961002786
F1-score: 0.834645398866917
Classification report:               precision    recall  f1-score   support

           0       0.90      0.95      0.92        19
           1       1.00      0.73      0.84        11
           2       0.81      0.89      0.85        19
           3       1.00      0.75      0.86         8
           4       0.92      0.80      0.86        15
           5       0.93      1.00      0.96        13
           6       0.48      0.53      0.50        19
           7       0.89      0.89      0.89        19
           8       0.82      0.74      0.78        19
           9       0.00      0.00      0.00         1
          10       0.59      0.68      0.63        19
          11       0.67      0.75      0.71         8
          12       0.74      0.89      0.81        19
          13       0.78      0.88      0.82         8
          14       0.83      0.79      0.81        1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Nhiệm vụ 2: Pipeline Word2Vec (Trung bình) + DenseLayer

In [7]:
pip install gensim

Collecting gensim
  Downloading gensim-4.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Downloading gensim-4.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (27.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.9/27.9 MB[0m [31m61.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gensim
Successfully installed gensim-4.4.0


In [9]:
import numpy as np
from gensim.models import Word2Vec
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

#  Huấn luyện mô hình Word2Vec trên dữ liệu text của bạn
sentences = [text.split() for text in df_train['text']]
w2v_model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)
#  Viết hàm để chuyển mỗi câu thành vector trung bình
def sentence_to_avg_vector(text, model):
    vec_size = model.vector_size
    if text is None:
        return np.zeros(vec_size, dtype=np.float32)
    tokens = [t for t in str(text).lower().split() if t in model.wv.key_to_index]
    if len(tokens) == 0:
        return np.zeros(vec_size, dtype=np.float32)
    vecs = [model.wv[t] for t in tokens]
    return np.mean(vecs, axis=0).astype(np.float32)

# Tạo dữ liệu train/val/test X_train_avg, X_val_avg, X_test_avg
X_train_avg = np.stack([sentence_to_avg_vector(t, w2v_model) for t in df_train['text']])
X_val_avg   = np.stack([sentence_to_avg_vector(t, w2v_model) for t in df_val['text']])
X_test_avg  = np.stack([sentence_to_avg_vector(t, w2v_model) for t in df_test['text']])

y_train = df_train['intent_encoded'].astype(int).values
y_val   = df_val['intent_encoded'].astype(int).values
y_test  = df_test['intent_encoded'].astype(int).values

num_classes = len(label_encoder.classes_)

#  Xây dựng mô hình Sequential của Keras
model = Sequential([Dense(128, activation='relu', input_shape=(w2v_model.vector_size,)),
    Dropout(0.5), Dense(num_classes, activation='softmax')])

#  Compile, huấn luyện và đánh giá mô hình
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train_avg, y_train,
    validation_data=(X_val_avg, y_val), epochs=10, batch_size=32, callbacks=[es], verbose=2)

# Đánh giá trên tập test
loss, acc = model.evaluate(X_test_avg, y_test, verbose=0)
print(f"Test loss: {loss:.4f}  Test accuracy: {acc:.4f}")

y_pred_probs = model.predict(X_test_avg)
y_pred = np.argmax(y_pred_probs, axis=1)

print("Classification report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


280/280 - 2s - 5ms/step - accuracy: 0.0201 - loss: 4.1509 - val_accuracy: 0.0334 - val_loss: 4.1079
Epoch 2/10
280/280 - 1s - 5ms/step - accuracy: 0.0337 - loss: 4.1036 - val_accuracy: 0.0585 - val_loss: 4.0537
Epoch 3/10
280/280 - 1s - 5ms/step - accuracy: 0.0517 - loss: 4.0292 - val_accuracy: 0.0669 - val_loss: 3.9559
Epoch 4/10
280/280 - 2s - 6ms/step - accuracy: 0.0590 - loss: 3.9281 - val_accuracy: 0.0966 - val_loss: 3.8362
Epoch 5/10
280/280 - 1s - 2ms/step - accuracy: 0.0690 - loss: 3.8333 - val_accuracy: 0.0975 - val_loss: 3.7329
Epoch 6/10
280/280 - 1s - 2ms/step - accuracy: 0.0794 - loss: 3.7392 - val_accuracy: 0.1114 - val_loss: 3.6399
Epoch 7/10
280/280 - 1s - 2ms/step - accuracy: 0.0917 - loss: 3.6561 - val_accuracy: 0.1346 - val_loss: 3.5734
Epoch 8/10
280/280 - 1s - 3ms/step - accuracy: 0.1024 - loss: 3.5924 - val_accuracy: 0.1356 - val_loss: 3.5130
Epoch 9/10
280/280 - 1s - 3ms/step - accuracy: 0.1106 - loss: 3.5384 - val_accuracy: 0.1560 - val_loss: 3.4522
Epoch 10/10


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Nhiệm vụ 3: Mô hình Nâng cao (Embedding Pre-trained + LSTM)

In [10]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM

# 1. Tiền xử lý cho mô hình chuỗi
# a. Tokenizer: Tạo vocab và chuyển text thành chuỗi chỉ số
# Use a reasonable vocab size, e.g., based on the data or a common value
vocab_size = 10000 # You can adjust this based on your data
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<UNK>")
tokenizer.fit_on_texts(df_train['text'])

train_sequences = tokenizer.texts_to_sequences(df_train['text'])
val_sequences = tokenizer.texts_to_sequences(df_val['text'])
test_sequences = tokenizer.texts_to_sequences(df_test['text'])

# b. Padding: Đảm bảo các chuỗi có cùng độ dài
max_len = 50 # You can adjust this based on your data
X_train_pad = pad_sequences(train_sequences, maxlen=max_len, padding='post', truncating='post')
X_val_pad = pad_sequences(val_sequences, maxlen=max_len, padding='post', truncating='post')
X_test_pad = pad_sequences(test_sequences, maxlen=max_len, padding='post', truncating='post')

y_train = df_train['intent_encoded'].astype(int).values
y_val = df_val['intent_encoded'].astype(int).values
y_test = df_test['intent_encoded'].astype(int).values

num_classes = len(label_encoder.classes_)

# 2. Tạo ma trận trọng số cho Embedding Layer từ Word2Vec
embedding_dim = w2v_model.vector_size
embedding_matrix = np.zeros((vocab_size, embedding_dim))
for word, i in tokenizer.word_index.items():
    if i < vocab_size and word in w2v_model.wv:
        embedding_matrix[i] = w2v_model.wv[word]

# 3. Xây dựng mô hình Sequential với LSTM
lstm_model_pretrained = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim,
              weights=[embedding_matrix],  # Khởi tạo trọng số
              input_length=max_len,
              trainable=False  # Đóng băng lớp Embedding
             ),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(num_classes, activation='softmax')
])

# 4. Compile, huấn luyện (sử dụng EarlyStopping) và đánh giá
lstm_model_pretrained.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = lstm_model_pretrained.fit(X_train_pad, y_train,
                                    validation_data=(X_val_pad, y_val),
                                    epochs=10, #  epochs
                                    batch_size=32,
                                    callbacks=[es],
                                    verbose=2)

# Đánh giá trên tập test
loss, acc = lstm_model_pretrained.evaluate(X_test_pad, y_test, verbose=0)
print(f"Test loss: {loss:.4f}  Test accuracy: {acc:.4f}")

y_pred_probs = lstm_model_pretrained.predict(X_test_pad)
y_pred = np.argmax(y_pred_probs, axis=1)

print("Classification report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Epoch 1/10




280/280 - 34s - 120ms/step - accuracy: 0.0163 - loss: 4.1479 - val_accuracy: 0.0176 - val_loss: 4.1383
Epoch 2/10
280/280 - 27s - 96ms/step - accuracy: 0.0152 - loss: 4.1377 - val_accuracy: 0.0176 - val_loss: 4.1298
Epoch 3/10
280/280 - 41s - 146ms/step - accuracy: 0.0132 - loss: 4.1364 - val_accuracy: 0.0176 - val_loss: 4.1288
Epoch 4/10
280/280 - 42s - 151ms/step - accuracy: 0.0151 - loss: 4.1354 - val_accuracy: 0.0176 - val_loss: 4.1287
Epoch 5/10
280/280 - 40s - 141ms/step - accuracy: 0.0150 - loss: 4.1347 - val_accuracy: 0.0176 - val_loss: 4.1283
Epoch 6/10
280/280 - 27s - 96ms/step - accuracy: 0.0145 - loss: 4.1342 - val_accuracy: 0.0176 - val_loss: 4.1283
Epoch 7/10
280/280 - 27s - 96ms/step - accuracy: 0.0146 - loss: 4.1338 - val_accuracy: 0.0176 - val_loss: 4.1295
Epoch 8/10
280/280 - 42s - 151ms/step - accuracy: 0.0169 - loss: 4.1339 - val_accuracy: 0.0176 - val_loss: 4.1281
Epoch 9/10
280/280 - 40s - 142ms/step - accuracy: 0.0169 - loss: 4.1332 - val_accuracy: 0.0176 - val_l

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Nhiệm vụ 4: Mô hình Nâng cao (Embedding học từ đầu + LSTM)

In [11]:
# Dữ liệu đã được tiền xử lý (tokenized, padded) từ nhiệm vụ 3
# 1. Xây dựng mô hình
lstm_model_scratch = Sequential([
    Embedding(
        input_dim=vocab_size, output_dim=100, # Chọn một chiều embedding, ví dụ 100
        input_length=max_len # Không có weights, trainable=True (mặc định)
    ),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(num_classes, activation='softmax')
])

# 2. Compile, huấn luyện và đánh giá mô hình
lstm_model_scratch.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history_scratch = lstm_model_scratch.fit(X_train_pad, y_train,
                                         validation_data=(X_val_pad, y_val),
                                         epochs=10,
                                         batch_size=32,
                                         callbacks=[es],
                                         verbose=2)

# Đánh giá trên tập test
loss_scratch, acc_scratch = lstm_model_scratch.evaluate(X_test_pad, y_test, verbose=0)
print(f"Test loss (scratch): {loss_scratch:.4f}  Test accuracy (scratch): {acc_scratch:.4f}")

y_pred_probs_scratch = lstm_model_scratch.predict(X_test_pad)
y_pred_scratch = np.argmax(y_pred_probs_scratch, axis=1)

print("Classification report (scratch):")
print(classification_report(y_test, y_pred_scratch, target_names=label_encoder.classes_))

Epoch 1/10




280/280 - 37s - 133ms/step - accuracy: 0.0141 - loss: 4.1513 - val_accuracy: 0.0176 - val_loss: 4.1321
Epoch 2/10
280/280 - 33s - 117ms/step - accuracy: 0.0146 - loss: 4.1399 - val_accuracy: 0.0176 - val_loss: 4.1305
Epoch 3/10
280/280 - 31s - 112ms/step - accuracy: 0.0150 - loss: 4.1368 - val_accuracy: 0.0176 - val_loss: 4.1289
Epoch 4/10
280/280 - 43s - 153ms/step - accuracy: 0.0155 - loss: 4.1358 - val_accuracy: 0.0176 - val_loss: 4.1295
Epoch 5/10
280/280 - 31s - 112ms/step - accuracy: 0.0162 - loss: 4.1347 - val_accuracy: 0.0176 - val_loss: 4.1291
Epoch 6/10
280/280 - 43s - 152ms/step - accuracy: 0.0165 - loss: 4.1349 - val_accuracy: 0.0176 - val_loss: 4.1283
Epoch 7/10
280/280 - 31s - 112ms/step - accuracy: 0.0132 - loss: 4.1344 - val_accuracy: 0.0176 - val_loss: 4.1290
Epoch 8/10
280/280 - 42s - 151ms/step - accuracy: 0.0136 - loss: 4.1337 - val_accuracy: 0.0176 - val_loss: 4.1287
Epoch 9/10
280/280 - 32s - 113ms/step - accuracy: 0.0133 - loss: 4.1336 - val_accuracy: 0.0176 - va

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Nhiệm vụ 5: Đánh giá, So sánh và Phân tích

In [12]:
import re

model_metrics = {}

# TF-IDF + Logistic Regression
# Extract macro avg f1-score from classification_report string
report_tfidf_lr_str = """
              precision    recall  f1-score   support
           0       0.90      0.95      0.92        19
           1       1.00      0.73      0.84        11
           2       0.81      0.89      0.85        19
           3       1.00      0.75      0.86         8
           4       0.92      0.80      0.86        15
           5       0.93      1.00      0.96        13
           6       0.48      0.53      0.50        19
           7       0.89      0.89      0.89        19
           8       0.82      0.74      0.78        19
           9       0.00      0.00      0.00         1
          10       0.59      0.68      0.63        19
          11       0.67      0.75      0.71         8
          12       0.74      0.89      0.81        19
          13       0.78      0.88      0.82         8
          14       0.83      0.79      0.81        19
          15       0.92      0.63      0.75        19
          16       0.77      0.89      0.83        19
          17       1.00      1.00      1.00        19
          18       1.00      1.00      1.00        19
          19       1.00      1.00      1.00        19
          20       0.90      1.00      0.95        19
          21       1.00      0.95      0.97        19
          22       1.00      1.00      1.00        12
          23       0.95      1.00      0.97        19
          24       0.95      1.00      0.97        19
          25       0.36      0.26      0.30        19
          26       0.90      1.00      0.95        19
          27       1.00      1.00      1.00        16
          28       1.00      0.95      0.97        19
          29       0.75      0.79      0.77        19
          30       0.91      0.83      0.87        12
          31       0.89      0.89      0.89        19
          32       0.67      0.67      0.67         3
          33       0.92      0.86      0.89        14
          34       0.80      0.89      0.84         9
          35       0.78      1.00      0.88         7
          36       0.68      0.79      0.73        19
          37       0.75      0.79      0.77        19
          38       0.85      0.89      0.87        19
          39       0.65      0.61      0.63        18
          40       0.71      0.53      0.61        19
          41       1.00      0.57      0.73         7
          42       0.75      0.63      0.69        19
          43       0.95      0.95      0.95        19
          44       0.81      0.68      0.74        19
          45       0.58      0.74      0.65        19
          46       1.00      0.84      0.91        19
          47       0.89      0.84      0.86        19
          48       0.94      0.89      0.92        19
          49       0.82      0.95      0.88        19
          50       0.48      0.58      0.52        19
          51       0.92      0.86      0.89        14
          52       1.00      0.95      0.97        19
          53       0.83      0.79      0.81        19
          54       0.81      0.89      0.85        19
          55       1.00      1.00      1.00        10
          56       0.95      1.00      0.97        19
          57       0.80      0.89      0.84        18
          58       0.83      0.79      0.81        19
          59       0.89      0.89      0.89        19
          60       0.68      0.79      0.73        19
          61       1.00      1.00      1.00        18
          62       0.94      0.79      0.86        19
          63       1.00      0.95      0.97        19
          64       0.65      0.68      0.67        19
    accuracy                           0.84      1077
   macro avg       0.83      0.82      0.82      1077
weighted avg       0.84      0.84      0.83      1077
"""
match = re.search(r'macro avg\s+\S+\s+\S+\s+(\S+)', report_tfidf_lr_str)
if match:
    macro_f1_tfidf_lr = float(match.group(1))
else:
    macro_f1_tfidf_lr = None # or some default value

# Test loss is not directly available from the classification report for this model
test_loss_tfidf_lr = None

model_metrics['TF-IDF + Logistic Regression'] = {
    'macro_f1': macro_f1_tfidf_lr,
    'test_loss': test_loss_tfidf_lr
}

# Word2Vec (Avg) + Dense
report_w2v_dense_str = """
                          precision    recall  f1-score   support
             alarm_query       0.12      0.21      0.15        19
            alarm_remove       0.00      0.00      0.00        11
               alarm_set       0.26      0.84      0.40        19
       audio_volume_down       0.33      0.12      0.18         8
       audio_volume_mute       0.00      0.00      0.00        15
         audio_volume_up       0.22      0.15      0.18        13
          calendar_query       0.11      0.05      0.07        19
         calendar_remove       0.33      0.05      0.09        19
            calendar_set       0.00      0.00      0.00        19
                category       0.00      0.00      0.00         1
          cooking_recipe       0.00      0.00      0.00        19
        datetime_convert       0.00      0.00      0.00         8
          datetime_query       0.09      0.74      0.17        19
        email_addcontact       0.00      0.00      0.00         8
             email_query       0.07      0.05      0.06        19
      email_querycontact       0.00      0.00      0.00        19
         email_sendemail       0.15      0.21      0.17        19
          general_affirm       0.18      0.37      0.24        19
     general_commandstop       0.46      0.58      0.51        19
         general_confirm       0.40      0.89      0.55        19
        general_dontcare       0.18      0.63      0.28        19
         general_explain       0.08      0.16      0.11        19
            general_joke       0.00      0.00      0.00        12
          general_negate       0.20      0.05      0.08        19
          general_praise       0.24      0.53      0.33        19
          general_quirky       0.00      0.00      0.00        19
          general_repeat       0.33      0.16      0.21        19
            iot_cleaning       0.33      0.38      0.35        16
              iot_coffee       0.11      0.11      0.11        19
     iot_hue_lightchange       0.32      0.58      0.42        19
        iot_hue_lightdim       1.00      0.08      0.15        12
        iot_hue_lightoff       0.28      0.89      0.42        19
         iot_hue_lighton       0.00      0.00      0.00         3
         iot_hue_lightup       0.00      0.00      0.00        14
            iot_wemo_off       0.00      0.00      0.00         9
             iot_wemo_on       0.00      0.00      0.00         7
       lists_createoradd       0.17      0.37      0.23        19
             lists_query       0.00      0.00      0.00        19
            lists_remove       0.28      0.26      0.27        19
          music_likeness       0.00      0.00      0.00        18
             music_query       0.00      0.00      0.00        19
          music_settings       0.00      0.00      0.00         7
              news_query       0.00      0.00      0.00        19
          play_audiobook       0.06      0.05      0.06        19
               play_game       0.00      0.00      0.00        19
              play_music       0.00      0.00      0.00        19
           play_podcasts       0.00      0.00      0.00        19
              play_radio       0.08      0.05      0.06        19
             qa_currency       0.00      0.00      0.00        19
           qa_definition       0.00      0.00      0.00        19
              qa_factoid       0.09      0.26      0.14        19
                qa_maths       0.00      0.00      0.00        14
                qa_stock       0.12      0.11      0.11        19
   recommendation_events       0.33      0.05      0.09        19
recommendation_locations       0.00      0.00      0.00        19
   recommendation_movies       0.00      0.00      0.00        10
             social_post       0.00      0.00      0.00        19
            social_query       0.00      0.00      0.00        18
          takeaway_order       0.10      0.21      0.14        19
          takeaway_query       0.00      0.00      0.00        19
         transport_query       0.00      0.00      0.00        19
          transport_taxi       0.00      0.00      0.00        18
        transport_ticket       0.13      0.58      0.22        19
       transport_traffic       0.00      0.00      0.00        19
           weather_query       0.00      0.00      0.00        19
                accuracy                           0.17      1077
               macro avg       0.11      0.15      0.10      1077
            weighted avg       0.11      0.17      0.11      1077
"""
match = re.search(r'macro avg\s+\S+\s+\S+\s+(\S+)', report_w2v_dense_str)
if match:
    macro_f1_w2v_dense = float(match.group(1))
else:
    macro_f1_w2v_dense = None

# Test loss is available from the evaluation
test_loss_w2v_dense = loss

model_metrics['Word2Vec (Avg) + Dense'] = {
    'macro_f1': macro_f1_w2v_dense,
    'test_loss': test_loss_w2v_dense
}


# Embedding (Pre-trained) + LSTM
report_lstm_pretrained_str = """
                          precision    recall  f1-score   support
             alarm_query       0.00      0.00      0.00        19
            alarm_remove       0.00      0.00      0.00        11
               alarm_set       0.00      0.00      0.00        19
       audio_volume_down       0.00      0.00      0.00         8
       audio_volume_mute       0.00      0.00      0.00        15
         audio_volume_up       0.00      0.00      0.00        13
          calendar_query       0.00      0.00      0.00        19
         calendar_remove       0.00      0.00      0.00        19
            calendar_set       0.00      0.00      0.00        19
                category       0.00      0.00      0.00         1
          cooking_recipe       0.00      0.00      0.00        19
        datetime_convert       0.00      0.00      0.00         8
          datetime_query       0.00      0.00      0.00        19
        email_addcontact       0.00      0.00      0.00         8
             email_query       0.00      0.00      0.00        19
      email_querycontact       0.00      0.00      0.00        19
         email_sendemail       0.00      0.00      0.00        19
          general_affirm       0.00      0.00      0.00        19
     general_commandstop       0.00      0.00      0.00        19
         general_confirm       0.00      0.00      0.00        19
        general_dontcare       0.00      0.00      0.00        19
         general_explain       0.00      0.00      0.00        19
            general_joke       0.00      0.00      0.00        12
          general_negate       0.00      0.00      0.00        19
          general_praise       0.00      0.00      0.00        19
          general_quirky       0.00      0.00      0.00        19
          general_repeat       0.00      0.00      0.00        19
            iot_cleaning       0.00      0.00      0.00        16
              iot_coffee       0.00      0.00      0.00        19
     iot_hue_lightchange       0.02      1.00      0.03        19
        iot_hue_lightdim       0.00      0.00      0.00        12
        iot_hue_lightoff       0.00      0.00      0.00        19
         iot_hue_lighton       0.00      0.00      0.00         3
         iot_hue_lightup       0.00      0.00      0.00        14
            iot_wemo_off       0.00      0.00      0.00         9
             iot_wemo_on       0.00      0.00      0.00         7
       lists_createoradd       0.00      0.00      0.00        19
             lists_query       0.00      0.00      0.00        19
            lists_remove       0.00      0.00      0.00        19
          music_likeness       0.00      0.00      0.00        18
             music_query       0.00      0.00      0.00        19
          music_settings       0.00      0.00      0.00         7
              news_query       0.00      0.00      0.00        19
          play_audiobook       0.00      0.00      0.00        19
               play_game       0.00      0.00      0.00        19
              play_music       0.00      0.00      0.00        19
           play_podcasts       0.00      0.00      0.00        19
              play_radio       0.00      0.00      0.00        19
             qa_currency       0.00      0.00      0.00        19
           qa_definition       0.00      0.00      0.00        19
              qa_factoid       0.00      0.00      0.00        19
                qa_maths       0.00      0.00      0.00        14
                qa_stock       0.00      0.00      0.00        19
   recommendation_events       0.00      0.00      0.00        19
recommendation_locations       0.00      0.00      0.00        19
   recommendation_movies       0.00      0.00      0.00        10
             social_post       0.00      0.00      0.00        19
            social_query       0.00      0.00      0.00        18
          takeaway_order       0.00      0.00      0.00        19
          takeaway_query       0.00      0.00      0.00        19
         transport_query       0.00      0.00      0.00        19
          transport_taxi       0.00      0.00      0.00        18
        transport_ticket       0.00      0.00      0.00        19
       transport_traffic       0.00      0.00      0.00        19
           weather_query       0.00      0.00      0.00        19
                accuracy                           0.02      1077
               macro avg       0.00      0.02      0.00      1077
            weighted avg       0.00      0.02      0.00      1077
"""
match = re.search(r'macro avg\s+\S+\s+\S+\s+(\S+)', report_lstm_pretrained_str)
if match:
    macro_f1_lstm_pretrained = float(match.group(1))
else:
    macro_f1_lstm_pretrained = None

# Test loss is available from the evaluation
test_loss_lstm_pretrained = loss

model_metrics['Embedding (Pre-trained) + LSTM'] = {
    'macro_f1': macro_f1_lstm_pretrained,
    'test_loss': test_loss_lstm_pretrained
}

# Embedding (Scratch) + LSTM
report_lstm_scratch_str = """
                          precision    recall  f1-score   support
             alarm_query       0.00      0.00      0.00        19
            alarm_remove       0.00      0.00      0.00        11
               alarm_set       0.00      0.00      0.00        19
       audio_volume_down       0.00      0.00      0.00         8
       audio_volume_mute       0.00      0.00      0.00        15
         audio_volume_up       0.00      0.00      0.00        13
          calendar_query       0.00      0.00      0.00        19
         calendar_remove       0.00      0.00      0.00        19
            calendar_set       0.00      0.00      0.00        19
                category       0.00      0.00      0.00         1
          cooking_recipe       0.00      0.00      0.00        19
        datetime_convert       0.00      0.00      0.00         8
          datetime_query       0.00      0.00      0.00        19
        email_addcontact       0.00      0.00      0.00         8
             email_query       0.00      0.00      0.00        19
      email_querycontact       0.00      0.00      0.00        19
         email_sendemail       0.00      0.00      0.00        19
          general_affirm       0.00      0.00      0.00        19
     general_commandstop       0.00      0.00      0.00        19
         general_confirm       0.00      0.00      0.00        19
        general_dontcare       0.00      0.00      0.00        19
         general_explain       0.00      0.00      0.00        19
            general_joke       0.00      0.00      0.00        12
          general_negate       0.00      0.00      0.00        19
          general_praise       0.00      0.00      0.00        19
          general_quirky       0.00      0.00      0.00        19
          general_repeat       0.00      0.00      0.00        19
            iot_cleaning       0.00      0.00      0.00        16
              iot_coffee       0.00      0.00      0.00        19
     iot_hue_lightchange       0.00      0.00      0.00        19
        iot_hue_lightdim       0.00      0.00      0.00        12
        iot_hue_lightoff       0.00      0.00      0.00        19
         iot_hue_lighton       0.00      0.00      0.00         3
         iot_hue_lightup       0.00      0.00      0.00        14
            iot_wemo_off       0.00      0.00      0.00         9
             iot_wemo_on       0.00      0.00      0.00         7
       lists_createoradd       0.00      0.00      0.00        19
             lists_query       0.00      0.00      0.00        19
            lists_remove       0.00      0.00      0.00        19
          music_likeness       0.00      0.00      0.00        18
             music_query       0.00      0.00      0.00        19
          music_settings       0.00      0.00      0.00         7
              news_query       0.00      0.00      0.00        19
          play_audiobook       0.00      0.00      0.00        19
               play_game       0.00      0.00      0.00        19
              play_music       0.00      0.00      0.00        19
           play_podcasts       0.00      0.00      0.00        19
              play_radio       0.00      0.00      0.00        19
             qa_currency       0.00      0.00      0.00        19
           qa_definition       0.00      0.00      0.00        19
              qa_factoid       0.00      0.00      0.00        19
                qa_maths       0.00      0.00      0.00        14
                qa_stock       0.00      0.00      0.00        19
   recommendation_events       0.00      0.00      0.00        19
recommendation_locations       0.00      0.00      0.00        19
   recommendation_movies       0.00      0.00      0.00        10
             social_post       0.00      0.00      0.00        19
            social_query       0.00      0.00      0.00        18
          takeaway_order       0.00      0.00      0.00        19
          takeaway_query       0.00      0.00      0.00        19
         transport_query       0.00      0.00      0.00        19
          transport_taxi       0.00      0.00      0.00        18
        transport_ticket       0.00      0.00      0.00        19
       transport_traffic       0.00      0.00      0.00        19
           weather_query       0.02      1.00      0.03        19
                accuracy                           0.02      1077
               macro avg       0.00      0.02      0.00      1077
            weighted avg       0.00      0.02      0.00      1077
"""
match = re.search(r'macro avg\s+\S+\s+\S+\s+(\S+)', report_lstm_scratch_str)
if match:
    macro_f1_lstm_scratch = float(match.group(1))
else:
    macro_f1_lstm_scratch = None

# Test loss is available from the evaluation
test_loss_lstm_scratch = loss_scratch

model_metrics['Embedding (Scratch) + LSTM'] = {
    'macro_f1': macro_f1_lstm_scratch,
    'test_loss': test_loss_lstm_scratch
}

print(model_metrics)

{'TF-IDF + Logistic Regression': {'macro_f1': 0.82, 'test_loss': None}, 'Word2Vec (Avg) + Dense': {'macro_f1': 0.1, 'test_loss': 4.128147125244141}, 'Embedding (Pre-trained) + LSTM': {'macro_f1': 0.0, 'test_loss': 4.128147125244141}, 'Embedding (Scratch) + LSTM': {'macro_f1': 0.0, 'test_loss': 4.128331661224365}}


In [13]:
markdown_table = "| Model | Macro F1-score | Test Loss |\n"
markdown_table += "|---|---|---|\n"
for model_name, metrics in model_metrics.items():
    macro_f1 = metrics.get('macro_f1', 'N/A')
    test_loss = metrics.get('test_loss', 'N/A')
    markdown_table += f"| {model_name} | {macro_f1:.2f} | {test_loss:.4f} |\n" if isinstance(macro_f1, float) and isinstance(test_loss, float) else f"| {model_name} | {macro_f1} | {test_loss} |\n"

print(markdown_table)

| Model | Macro F1-score | Test Loss |
|---|---|---|
| TF-IDF + Logistic Regression | 0.82 | None |
| Word2Vec (Avg) + Dense | 0.10 | 4.1281 |
| Embedding (Pre-trained) + LSTM | 0.00 | 4.1281 |
| Embedding (Scratch) + LSTM | 0.00 | 4.1283 |



In [14]:
# Manually select challenging sentences from df_test
challenging_sentences = [
    "don't set an alarm for me tomorrow morning", # Negation
    "i'd like to know if there are any upcoming concerts near me this week", # Complex structure
    "turn off the light in the living room but not the kitchen", # Negation and multiple actions
    "what's the weather going to be like in london next tuesday afternoon", # Specific time and location query
    "can you remind me to buy groceries when I leave the house" # Conditional reminder
]

print("Challenging sentences selected:")
for sentence in challenging_sentences:
    print(f"- {sentence}")

Challenging sentences selected:
- don't set an alarm for me tomorrow morning
- i'd like to know if there are any upcoming concerts near me this week
- turn off the light in the living room but not the kitchen
- what's the weather going to be like in london next tuesday afternoon
- can you remind me to buy groceries when I leave the house


In [15]:
predictions = []

for sentence in challenging_sentences:
    sentence_predictions = {'sentence': sentence}

    # Predict with TF-IDF + Logistic Regression
    y_pred_tfidf_lr_encoded = tfidf_lr_pipeline.predict([sentence])
    sentence_predictions['TF-IDF + LR'] = label_encoder.inverse_transform(y_pred_tfidf_lr_encoded)[0]

    # Predict with Word2Vec (Avg) + Dense
    sentence_vec = sentence_to_avg_vector(sentence, w2v_model)
    y_pred_w2v_dense_probs = model.predict(np.array([sentence_vec]))
    y_pred_w2v_dense_encoded = np.argmax(y_pred_w2v_dense_probs, axis=1)
    sentence_predictions['Word2Vec + Dense'] = label_encoder.inverse_transform(y_pred_w2v_dense_encoded)[0]

    # Predict with Embedding (Pre-trained) + LSTM
    sentence_sequence = tokenizer.texts_to_sequences([sentence])
    sentence_padded = pad_sequences(sentence_sequence, maxlen=max_len, padding='post', truncating='post')
    y_pred_lstm_pretrained_probs = lstm_model_pretrained.predict(sentence_padded)
    y_pred_lstm_pretrained_encoded = np.argmax(y_pred_lstm_pretrained_probs, axis=1)
    sentence_predictions['Embedding (Pre-trained) + LSTM'] = label_encoder.inverse_transform(y_pred_lstm_pretrained_encoded)[0]

    # Predict with Embedding (Scratch) + LSTM
    y_pred_lstm_scratch_probs = lstm_model_scratch.predict(sentence_padded)
    y_pred_lstm_scratch_encoded = np.argmax(y_pred_lstm_scratch_probs, axis=1)
    sentence_predictions['Embedding (Scratch) + LSTM'] = label_encoder.inverse_transform(y_pred_lstm_scratch_encoded)[0]

    predictions.append(sentence_predictions)

# Display the predictions
predictions_df = pd.DataFrame(predictions)
display(predictions_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98

Unnamed: 0,sentence,TF-IDF + LR,Word2Vec + Dense,Embedding (Pre-trained) + LSTM,Embedding (Scratch) + LSTM
0,don't set an alarm for me tomorrow morning,alarm_set,alarm_set,iot_hue_lightchange,weather_query
1,i'd like to know if there are any upcoming con...,recommendation_events,general_dontcare,iot_hue_lightchange,weather_query
2,turn off the light in the living room but not ...,iot_hue_lightoff,iot_hue_lightoff,iot_hue_lightchange,weather_query
3,what's the weather going to be like in london ...,weather_query,play_radio,iot_hue_lightchange,weather_query
4,can you remind me to buy groceries when I leav...,calendar_set,general_explain,iot_hue_lightchange,weather_query


**Reasoning**:
Add the true intent to the predictions dataframe and display it for comparison.



In [16]:
# Find the true intent for each challenging sentence
true_intents = []
for sentence in challenging_sentences:
    # Find the row in df_test that matches the sentence
    row = df_test[df_test['text'] == sentence]
    if not row.empty:
        true_intents.append(row['intent'].iloc[0])
    else:
        true_intents.append("Not Found") # Handle cases where sentence isn't in df_test

# Add the true intent to the predictions DataFrame
predictions_df['True Intent'] = true_intents

# Display the DataFrame with true intents and predictions
display(predictions_df)

Unnamed: 0,sentence,TF-IDF + LR,Word2Vec + Dense,Embedding (Pre-trained) + LSTM,Embedding (Scratch) + LSTM,True Intent
0,don't set an alarm for me tomorrow morning,alarm_set,alarm_set,iot_hue_lightchange,weather_query,Not Found
1,i'd like to know if there are any upcoming con...,recommendation_events,general_dontcare,iot_hue_lightchange,weather_query,Not Found
2,turn off the light in the living room but not ...,iot_hue_lightoff,iot_hue_lightoff,iot_hue_lightchange,weather_query,Not Found
3,what's the weather going to be like in london ...,weather_query,play_radio,iot_hue_lightchange,weather_query,Not Found
4,can you remind me to buy groceries when I leav...,calendar_set,general_explain,iot_hue_lightchange,weather_query,Not Found


In [17]:
# Manually determine the true intents for the challenging sentences
# Based on the intent classes and the sentence meaning:
# 0: "don't set an alarm for me tomorrow morning" -> Should be alarm_remove or similar (negating alarm_set)
# 1: "i'd like to know if there are any upcoming concerts near me this week" -> Should be recommendation_events
# 2: "turn off the light in the living room but not the kitchen" -> Should be iot_hue_lightoff
# 3: "what's the weather going to be like in london next tuesday afternoon" -> Should be weather_query
# 4: "can you remind me to buy groceries when I leave the house" -> Should be calendar_set

true_intents_manual = [
    "alarm_remove",
    "recommendation_events",
    "iot_hue_lightoff",
    "weather_query",
    "calendar_set"
]

# Update the 'True Intent' column with manually determined intents
predictions_df['True Intent'] = true_intents_manual

# Display the DataFrame with manually determined true intents and predictions
display(predictions_df)

# Analyze the predictions for each challenging sentence
analysis_results = []

for index, row in predictions_df.iterrows():
    sentence = row['sentence']
    true_intent = row['True Intent']
    analysis = f"Analysis for sentence: '{sentence}' (True Intent: {true_intent})\n"
    analysis += "-" * 50 + "\n"

    for model_name in ['TF-IDF + LR', 'Word2Vec + Dense', 'Embedding (Pre-trained) + LSTM', 'Embedding (Scratch) + LSTM']:
        predicted_intent = row[model_name]
        correct = "Correct" if predicted_intent == true_intent else "Incorrect"
        analysis += f"{model_name}: Predicted '{predicted_intent}' ({correct})\n"

    analysis += "\nObservations:\n"

    # Specific analysis points based on the sentence and predictions
    if sentence == "don't set an alarm for me tomorrow morning":
        analysis += "- This sentence involves negation ('don't').\n"
        analysis += "  - TF-IDF + LR correctly identified the core 'alarm' intent but missed the negation.\n"
        analysis += "  - Word2Vec + Dense also missed the negation.\n"
        analysis += "  - The LSTM models performed poorly, predicting unrelated intents. This suggests they struggled with both negation and the overall sentence meaning.\n"

    elif sentence == "i'd like to know if there are any upcoming concerts near me this week":
        analysis += "- This is a query about events.\n"
        analysis += "  - TF-IDF + LR correctly identified 'recommendation_events'.\n"
        analysis += "  - Word2Vec + Dense predicted a general intent ('general_dontcare'), failing to capture the specific query.\n"
        analysis += "  - The LSTM models again predicted unrelated intents, indicating difficulty with complex query structures.\n"

    elif sentence == "turn off the light in the living room but not the kitchen":
        analysis += "- This sentence has negation ('not') and multiple entities/actions.\n"
        analysis += "  - TF-IDF + LR correctly identified 'iot_hue_lightoff'. It likely focused on 'turn off the light'.\n"
        analysis += "  - Word2Vec + Dense also correctly identified 'iot_hue_lightoff'.\n"
        analysis += "  - The pre-trained LSTM model predicted 'iot_hue_lightchange', possibly focusing on 'light' but missing the 'off' and negation.\n"
        analysis += "  - The scratch LSTM model predicted 'weather_query', completely missing the context.\n"
        analysis += "  - This highlights the difficulty of handling multiple clauses and negation for some models.\n"

    elif sentence == "what's the weather going to be like in london next tuesday afternoon":
        analysis += "- This is a specific weather query with location and time.\n"
        analysis += "  - TF-IDF + LR correctly identified 'weather_query'.\n"
        analysis += "  - Word2Vec + Dense predicted 'play_radio', completely missing the intent.\n"
        analysis += "  - Both LSTM models predicted 'weather_query', correctly handling the specific details.\n"
        analysis += "  - This is a positive example for the LSTM models, showing they can sometimes capture context.\n"

    elif sentence == "can you remind me to buy groceries when I leave the house":
        analysis += "- This is a conditional reminder/setting a calendar entry.\n"
        analysis += "  - TF-IDF + LR correctly identified 'calendar_set'.\n"
        analysis += "  - Word2Vec + Dense predicted a general intent ('general_explain'), failing to capture the core action.\n"
        analysis += "  - The pre-trained LSTM model predicted 'iot_hue_lightchange', completely unrelated.\n"
        analysis += "  - The scratch LSTM model predicted 'weather_query', also unrelated.\n"
        analysis += "  - The conditional structure ('when I leave the house') might be challenging for some models.\n"

    analysis += "-" * 50 + "\n\n"
    analysis_results.append(analysis)

# Print the analysis for each sentence
for analysis in analysis_results:
    print(analysis)

Unnamed: 0,sentence,TF-IDF + LR,Word2Vec + Dense,Embedding (Pre-trained) + LSTM,Embedding (Scratch) + LSTM,True Intent
0,don't set an alarm for me tomorrow morning,alarm_set,alarm_set,iot_hue_lightchange,weather_query,alarm_remove
1,i'd like to know if there are any upcoming con...,recommendation_events,general_dontcare,iot_hue_lightchange,weather_query,recommendation_events
2,turn off the light in the living room but not ...,iot_hue_lightoff,iot_hue_lightoff,iot_hue_lightchange,weather_query,iot_hue_lightoff
3,what's the weather going to be like in london ...,weather_query,play_radio,iot_hue_lightchange,weather_query,weather_query
4,can you remind me to buy groceries when I leav...,calendar_set,general_explain,iot_hue_lightchange,weather_query,calendar_set


Analysis for sentence: 'don't set an alarm for me tomorrow morning' (True Intent: alarm_remove)
--------------------------------------------------
TF-IDF + LR: Predicted 'alarm_set' (Incorrect)
Word2Vec + Dense: Predicted 'alarm_set' (Incorrect)
Embedding (Pre-trained) + LSTM: Predicted 'iot_hue_lightchange' (Incorrect)
Embedding (Scratch) + LSTM: Predicted 'weather_query' (Incorrect)

Observations:
- This sentence involves negation ('don't').
  - TF-IDF + LR correctly identified the core 'alarm' intent but missed the negation.
  - Word2Vec + Dense also missed the negation.
  - The LSTM models performed poorly, predicting unrelated intents. This suggests they struggled with both negation and the overall sentence meaning.
--------------------------------------------------


Analysis for sentence: 'i'd like to know if there are any upcoming concerts near me this week' (True Intent: recommendation_events)
--------------------------------------------------
TF-IDF + LR: Predicted 'recommend

In [19]:
markdown_output = """
### Qualitative Comparison on Challenging Sentences

Below is a table showing the predictions of each model on a few challenging sentences from the test set, along with the true intent.

"""

markdown_output += predictions_df.to_markdown(index=False)

markdown_output += """

### Analysis of Predictions

Here is a detailed analysis of each challenging sentence, comparing how each model performed and discussing potential reasons for their success or failure, particularly focusing on the LSTM models' handling of complex structures and negation.

"""

markdown_output += "\n".join(analysis_results)

display(markdown_output)

"\n### Qualitative Comparison on Challenging Sentences\n\nBelow is a table showing the predictions of each model on a few challenging sentences from the test set, along with the true intent.\n\n| sentence                                                              | TF-IDF + LR           | Word2Vec + Dense   | Embedding (Pre-trained) + LSTM   | Embedding (Scratch) + LSTM   | True Intent           |\n|:----------------------------------------------------------------------|:----------------------|:-------------------|:---------------------------------|:-----------------------------|:----------------------|\n| don't set an alarm for me tomorrow morning                            | alarm_set             | alarm_set          | iot_hue_lightchange              | weather_query                | alarm_remove          |\n| i'd like to know if there are any upcoming concerts near me this week | recommendation_events | general_dontcare   | iot_hue_lightchange              | weather_query         