<a href="https://colab.research.google.com/github/elephanti/NLPProject2024/blob/main/Train_and_Test_with_augmented_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/elephanti/NLPProject2024.git

Cloning into 'NLPProject2024'...
remote: Enumerating objects: 1073, done.[K
remote: Counting objects: 100% (1073/1073), done.[K
remote: Compressing objects: 100% (882/882), done.[K
remote: Total 1073 (delta 312), reused 873 (delta 184), pack-reused 0 (from 0)[K
Receiving objects: 100% (1073/1073), 32.84 MiB | 16.00 MiB/s, done.
Resolving deltas: 100% (312/312), done.


In [2]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove*.zip -d NLPProject2024/classifiers/embeddings

--2024-08-14 17:26:53--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2024-08-14 17:26:54--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2024-08-14 17:26:54--  https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


202

In [1]:
!pip install tensorflow==2.15.1
!pip install ktrain




In [2]:
!pip install transformers==4.37.2



In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder


def load_data(full_train_path, generated_train_path=None, test_path=None, val_path=None):
    print(f"\tFull train path: {full_train_path}")
    print(f"\tGenerated train path: {generated_train_path}")
    print(f"\tTest path: {test_path}")
    train_df = pd.read_csv(full_train_path)

    if generated_train_path:
        train_df = pd.concat([train_df, pd.read_csv(generated_train_path)], ignore_index=True)

    train_text = train_df['text'].tolist()
    train_label = train_df['label']

    le = LabelEncoder()
    y_train = le.fit_transform(train_label)

    test_df = pd.read_csv(test_path)
    test_text = test_df['text'].tolist()
    test_label = test_df['label']
    y_test = le.transform(test_label)

    if val_path:
      val_df = pd.read_csv(val_path)
      val_text = val_df['text'].tolist()
      val_label = val_df['label']
      y_val = le.transform(val_label)
      return train_text, y_train, test_text, y_test, le, val_text, y_val

    return train_text, y_train, test_text, y_test, le

In [7]:
import pandas as pd
import glob
import os
import joblib
from NLPProject2024.classifiers.bert_ktrain import DistilBERT
from NLPProject2024.classifiers.lstm_glove import LSTMGlove
from NLPProject2024.classifiers.svm_glove import SVMGlove
from NLPProject2024.classifiers.svm_tfidf import SVMTFIDF
import time

class ModelTrainer:
    def __init__(self, classifier_name, glove_file, num_labels):
        self.classifier = None
        self.classifier_name = classifier_name
        self.glove_file = glove_file
        self.num_labels = num_labels

    def train_classifier(self, X_train, y_train, model_path, X_test=None, y_test=None, X_val=None, y_val=None):
        print(f"Training {self.classifier_name}...")
        start_time = time.time()
        if self.classifier_name == 'lstm_glove':
            self.classifier = LSTMGlove
            clf_instance = self.classifier(self.glove_file, self.num_labels)
        elif self.classifier_name == 'svm_glove':
            self.classifier = SVMGlove
            clf_instance = self.classifier(self.glove_file)
        elif self.classifier_name == 'bert_ktrain':
            self.classifier = DistilBERT
            clf_instance = self.classifier(maxlen=50, batch_size=10, learning_rate=5e-5, epochs=20,
                                           early_stopping=4, reduce_on_plateau=2)
        else:
            self.classifier = SVMTFIDF
            clf_instance = self.classifier()

        clf_instance.train(X_train, y_train, X_val, y_val)

        accuracy, precision, recall, f1 = clf_instance.evaluate(X_test, y_test)
        if self.classifier_name in ['lstm_glove', 'bert_ktrain']:
            results = {"classifier": self.classifier_name,
                       "test_accuracy": round(accuracy.item(), 4),
                       "precision": round(precision.item(), 4),
                       "recall": round(recall.item(), 4),
                       "f1_score": round(f1.item(), 4)}
        else:
            results = {"classifier": self.classifier_name,
                       "test_accuracy": round(accuracy, 4),
                       "precision": round(precision, 4),
                       "recall": round(recall, 4),
                       "f1_score": round(f1, 4)}

        clf_instance.save(f'{model_path}/{self.classifier_name}')
        end_time = time.time()
        results["training_time"] = round(end_time - start_time, 2)

        return results


In [5]:
df_result = pd.DataFrame(columns=['dataset', 'model', 'test_accuracy', 'precision', 'recall', 'f1_score'])
glove_file = 'NLPProject2024/classifiers/embeddings/glove.6B.100d.txt'

classifiers = {
    'svm_tfidf': SVMTFIDF,
    'svm_glove': SVMGlove,
    'lstm_glove': LSTMGlove,
    'bert_ktrain': DistilBERT
}

lambadas = ['Lambada', 'Lambada+', 'Lambada+Instruct']
llms = ['Llama3', 'Mistral', 'GPT2']
dataset_names = ['ATIS', 'TREC']

In [14]:
full_df_result = pd.DataFrame(columns=['dataset', 'model', 'test_accuracy', 'precision', 'recall', 'f1_score'])

# Train on full training data
for name in dataset_names:
    X_train, y_train, X_test, y_test, le, X_val, y_val = load_data(f'NLPProject2024/datasets/{name}/{name.lower()}.train.csv',
                                                                   test_path=f'NLPProject2024/datasets/{name}/{name.lower()}.test.csv',
                                                                   val_path=f'NLPProject2024/datasets/{name}/{name.lower()}.valid.csv')
    model_path = f'final_results/models/{name.lower()}_train'
    os.makedirs(model_path, exist_ok=True)

    joblib.dump(le, f'{model_path}/label_encoder.pkl')

    for classifier in classifiers:
        print(f"Training {name} with {classifier}...")
        trainer = ModelTrainer(classifier, glove_file, len(le.classes_))
        train_results = trainer.train_classifier(X_train, y_train, model_path, X_test=X_test, y_test=y_test, X_val=X_val, y_val=y_val)

        train_results["dataset"] = f'{name.lower()}_full'
        full_df_result = pd.concat([full_df_result, pd.DataFrame([train_results])], ignore_index=True)
        print(f"Finished training {name} with {classifier}")
        print(train_results)
        print("-" * 50)

	Full train path: NLPProject2024/datasets/ATIS/atis.train.csv
	Generated train path: None
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training ATIS with svm_tfidf...
Training svm_tfidf...

Validation Results:
Accuracy: 0.9743
Precision: 0.9704
Recall: 0.9743
F1 Score: 0.9711


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9588
Precision: 0.9516
Recall: 0.9588
F1 Score: 0.9532
Model saved.
Finished training ATIS with svm_tfidf
{'classifier': 'svm_tfidf', 'test_accuracy': 0.9588, 'precision': 0.9516, 'recall': 0.9588, 'f1_score': 0.9532, 'training_time': 6.87, 'dataset': 'atis_full'}
--------------------------------------------------
Training ATIS with svm_glove...
Training svm_glove...

Validation Results:
Accuracy: 0.9383
Precision: 0.9332
Recall: 0.9383
F1 Score: 0.9349


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9280
Precision: 0.9121
Recall: 0.9280
F1 Score: 0.9170
Model saved.
Finished training ATIS with svm_glove
{'classifier': 'svm_glove', 'test_accuracy': 0.928, 'precision': 0.9121, 'recall': 0.928, 'f1_score': 0.917, 'training_time': 11.51, 'dataset': 'atis_full'}
--------------------------------------------------
Training ATIS with lstm_glove...
Training lstm_glove...
Epoch 1/20, Loss: 0.9069, Accuracy: 0.7774
Accuracy: 0.9280
Precision: 0.9068
Recall: 0.9280
F1 Score: 0.9128
Validation - Accuracy: 0.9280, Precision: 0.9068, Recall: 0.9280, F1 Score: 0.9128


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/20, Loss: 0.3726, Accuracy: 0.9087
Accuracy: 0.9537
Precision: 0.9356
Recall: 0.9537
F1 Score: 0.9437
Validation - Accuracy: 0.9537, Precision: 0.9356, Recall: 0.9537, F1 Score: 0.9437


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3/20, Loss: 0.2476, Accuracy: 0.9378
Accuracy: 0.9640
Precision: 0.9582
Recall: 0.9640
F1 Score: 0.9592
Validation - Accuracy: 0.9640, Precision: 0.9582, Recall: 0.9640, F1 Score: 0.9592


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4/20, Loss: 0.1724, Accuracy: 0.9575
Accuracy: 0.9743
Precision: 0.9724
Recall: 0.9743
F1 Score: 0.9717
Validation - Accuracy: 0.9743, Precision: 0.9724, Recall: 0.9743, F1 Score: 0.9717


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5/20, Loss: 0.1421, Accuracy: 0.9625
Accuracy: 0.9777
Precision: 0.9751
Recall: 0.9777
F1 Score: 0.9759
Validation - Accuracy: 0.9777, Precision: 0.9751, Recall: 0.9777, F1 Score: 0.9759


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6/20, Loss: 0.1146, Accuracy: 0.9693
Accuracy: 0.9811
Precision: 0.9786
Recall: 0.9811
F1 Score: 0.9792
Validation - Accuracy: 0.9811, Precision: 0.9786, Recall: 0.9811, F1 Score: 0.9792


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7/20, Loss: 0.0996, Accuracy: 0.9760
Accuracy: 0.9828
Precision: 0.9806
Recall: 0.9828
F1 Score: 0.9812
Validation - Accuracy: 0.9828, Precision: 0.9806, Recall: 0.9828, F1 Score: 0.9812


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 8/20, Loss: 0.0859, Accuracy: 0.9779
Accuracy: 0.9880
Precision: 0.9868
Recall: 0.9880
F1 Score: 0.9871
Validation - Accuracy: 0.9880, Precision: 0.9868, Recall: 0.9880, F1 Score: 0.9871


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 9/20, Loss: 0.0629, Accuracy: 0.9850
Accuracy: 0.9794
Precision: 0.9785
Recall: 0.9794
F1 Score: 0.9784
Validation - Accuracy: 0.9794, Precision: 0.9785, Recall: 0.9794, F1 Score: 0.9784


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 10/20, Loss: 0.0564, Accuracy: 0.9861
Accuracy: 0.9863
Precision: 0.9835
Recall: 0.9863
F1 Score: 0.9845
Validation - Accuracy: 0.9863, Precision: 0.9835, Recall: 0.9863, F1 Score: 0.9845


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 11/20, Loss: 0.0450, Accuracy: 0.9893
Accuracy: 0.9863
Precision: 0.9848
Recall: 0.9863
F1 Score: 0.9852
Validation - Accuracy: 0.9863, Precision: 0.9848, Recall: 0.9863, F1 Score: 0.9852


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 12/20, Loss: 0.0434, Accuracy: 0.9899
Accuracy: 0.9811
Precision: 0.9802
Recall: 0.9811
F1 Score: 0.9801
Validation - Accuracy: 0.9811, Precision: 0.9802, Recall: 0.9811, F1 Score: 0.9801


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 13/20, Loss: 0.0404, Accuracy: 0.9889
Accuracy: 0.9880
Precision: 0.9885
Recall: 0.9880
F1 Score: 0.9876
Validation - Accuracy: 0.9880, Precision: 0.9885, Recall: 0.9880, F1 Score: 0.9876
Epoch 14/20, Loss: 0.0304, Accuracy: 0.9912
Accuracy: 0.9931
Precision: 0.9933
Recall: 0.9931
F1 Score: 0.9930
Validation - Accuracy: 0.9931, Precision: 0.9933, Recall: 0.9931, F1 Score: 0.9930
Epoch 15/20, Loss: 0.0266, Accuracy: 0.9938
Accuracy: 0.9914
Precision: 0.9916
Recall: 0.9914
F1 Score: 0.9913
Validation - Accuracy: 0.9914, Precision: 0.9916, Recall: 0.9914, F1 Score: 0.9913
Epoch 16/20, Loss: 0.0336, Accuracy: 0.9921
Accuracy: 0.9897
Precision: 0.9897
Recall: 0.9897
F1 Score: 0.9895
Validation - Accuracy: 0.9897, Precision: 0.9897, Recall: 0.9897, F1 Score: 0.9895
Epoch 17/20, Loss: 0.0324, Accuracy: 0.9921
Accuracy: 0.9846
Precision: 0.9840
Recall: 0.9846
F1 Score: 0.9836
Validation - Accuracy: 0.9846, Precision: 0.9840, Recall: 0.9846, F1 Score: 0.9836


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 18/20, Loss: 0.0259, Accuracy: 0.9938
Accuracy: 0.9811
Precision: 0.9812
Recall: 0.9811
F1 Score: 0.9807
Validation - Accuracy: 0.9811, Precision: 0.9812, Recall: 0.9811, F1 Score: 0.9807
Epoch 19/20, Loss: 0.0251, Accuracy: 0.9940
Accuracy: 0.9846
Precision: 0.9831
Recall: 0.9846
F1 Score: 0.9834
Validation - Accuracy: 0.9846, Precision: 0.9831, Recall: 0.9846, F1 Score: 0.9834


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 20/20, Loss: 0.0234, Accuracy: 0.9931
Accuracy: 0.9931
Precision: 0.9933
Recall: 0.9931
F1 Score: 0.9930
Validation - Accuracy: 0.9931, Precision: 0.9933, Recall: 0.9931, F1 Score: 0.9930


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9811
Precision: 0.9811
Recall: 0.9811
F1 Score: 0.9805
Model and embeddings saved.
Finished training ATIS with lstm_glove
{'classifier': 'lstm_glove', 'test_accuracy': 0.9811, 'precision': 0.9811, 'recall': 0.9811, 'f1_score': 0.9805, 'training_time': 60.87, 'dataset': 'atis_full'}
--------------------------------------------------
Training ATIS with bert_ktrain...
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 11
	95percentile : 19
	99percentile : 25


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 11
	95percentile : 19
	99percentile : 24




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 00006: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 00009: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Epoch 10/20
Epoch 11/20
Epoch 00011: Reducing Max LR on Plateau: new max lr will be 6.25e-06 (if not early_stopping).
Restoring model weights from the end of the best epoch: 7.
Epoch 11: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.99%
Precision: 0.99
Recall: 0.99
F1 Score: 0.99
Model saved to final_results/models/atis_train/bert_ktrain
Finished training ATIS with bert_ktrain
{'classifier': 'bert_ktrain', 'test_accuracy': 0.9897, 'precision': 0.9888, 'recall': 0.9897, 'f1_score': 0.9891, 'training_time': 262.77, 'dataset': 'atis_full'}
--------------------------------------------------
	Full train path: NLPProject2024/datasets/TREC/trec.train.csv
	Generated train path: None
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training TREC with svm_tfidf...
Training svm_tfidf...

Validation Results:
Accuracy: 0.9542
Precision: 0.9553
Recall: 0.9542
F1 Score: 0.9533


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9599
Precision: 0.9611
Recall: 0.9599
F1 Score: 0.9598
Model saved.
Finished training TREC with svm_tfidf
{'classifier': 'svm_tfidf', 'test_accuracy': 0.9599, 'precision': 0.9611, 'recall': 0.9599, 'f1_score': 0.9598, 'training_time': 81.7, 'dataset': 'trec_full'}
--------------------------------------------------
Training TREC with svm_glove...
Training svm_glove...

Validation Results:
Accuracy: 0.7774
Precision: 0.7825
Recall: 0.7774
F1 Score: 0.7731


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.8177
Precision: 0.8224
Recall: 0.8177
F1 Score: 0.8168
Model saved.
Finished training TREC with svm_glove
{'classifier': 'svm_glove', 'test_accuracy': 0.8177, 'precision': 0.8224, 'recall': 0.8177, 'f1_score': 0.8168, 'training_time': 40.77, 'dataset': 'trec_full'}
--------------------------------------------------
Training TREC with lstm_glove...
Training lstm_glove...
Epoch 1/20, Loss: 2.0675, Accuracy: 0.4915


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.6257
Precision: 0.5945
Recall: 0.6257
F1 Score: 0.5714
Validation - Accuracy: 0.6257, Precision: 0.5945, Recall: 0.6257, F1 Score: 0.5714
Epoch 2/20, Loss: 1.1791, Accuracy: 0.6929


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.7567
Precision: 0.7513
Recall: 0.7567
F1 Score: 0.7319
Validation - Accuracy: 0.7567, Precision: 0.7513, Recall: 0.7567, F1 Score: 0.7319
Epoch 3/20, Loss: 0.8924, Accuracy: 0.7590


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.8063
Precision: 0.8093
Recall: 0.8063
F1 Score: 0.7940
Validation - Accuracy: 0.8063, Precision: 0.8093, Recall: 0.8063, F1 Score: 0.7940
Epoch 4/20, Loss: 0.7134, Accuracy: 0.8022


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.8282
Precision: 0.8305
Recall: 0.8282
F1 Score: 0.8182
Validation - Accuracy: 0.8282, Precision: 0.8305, Recall: 0.8282, F1 Score: 0.8182
Epoch 5/20, Loss: 0.5840, Accuracy: 0.8356


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.8740
Precision: 0.8740
Recall: 0.8740
F1 Score: 0.8670
Validation - Accuracy: 0.8740, Precision: 0.8740, Recall: 0.8740, F1 Score: 0.8670
Epoch 6/20, Loss: 0.4840, Accuracy: 0.8626


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.8909
Precision: 0.8940
Recall: 0.8909
F1 Score: 0.8873
Validation - Accuracy: 0.8909, Precision: 0.8940, Recall: 0.8909, F1 Score: 0.8873
Epoch 7/20, Loss: 0.4154, Accuracy: 0.8799


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.8922
Precision: 0.8958
Recall: 0.8922
F1 Score: 0.8889
Validation - Accuracy: 0.8922, Precision: 0.8958, Recall: 0.8922, F1 Score: 0.8889
Epoch 8/20, Loss: 0.3655, Accuracy: 0.8938


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9129
Precision: 0.9175
Recall: 0.9129
F1 Score: 0.9111
Validation - Accuracy: 0.9129, Precision: 0.9175, Recall: 0.9129, F1 Score: 0.9111
Epoch 9/20, Loss: 0.2957, Accuracy: 0.9145


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9223
Precision: 0.9228
Recall: 0.9223
F1 Score: 0.9189
Validation - Accuracy: 0.9223, Precision: 0.9228, Recall: 0.9223, F1 Score: 0.9189
Epoch 10/20, Loss: 0.2642, Accuracy: 0.9242


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9329
Precision: 0.9372
Recall: 0.9329
F1 Score: 0.9322
Validation - Accuracy: 0.9329, Precision: 0.9372, Recall: 0.9329, F1 Score: 0.9322
Epoch 11/20, Loss: 0.2363, Accuracy: 0.9297


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9342
Precision: 0.9376
Recall: 0.9342
F1 Score: 0.9316
Validation - Accuracy: 0.9342, Precision: 0.9376, Recall: 0.9342, F1 Score: 0.9316
Epoch 12/20, Loss: 0.2137, Accuracy: 0.9397


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9367
Precision: 0.9374
Recall: 0.9367
F1 Score: 0.9352
Validation - Accuracy: 0.9367, Precision: 0.9374, Recall: 0.9367, F1 Score: 0.9352
Epoch 13/20, Loss: 0.1861, Accuracy: 0.9460


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9461
Precision: 0.9482
Recall: 0.9461
F1 Score: 0.9439
Validation - Accuracy: 0.9461, Precision: 0.9482, Recall: 0.9461, F1 Score: 0.9439
Epoch 14/20, Loss: 0.1653, Accuracy: 0.9517


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9561
Precision: 0.9581
Recall: 0.9561
F1 Score: 0.9544
Validation - Accuracy: 0.9561, Precision: 0.9581, Recall: 0.9561, F1 Score: 0.9544
Epoch 15/20, Loss: 0.1436, Accuracy: 0.9567


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9511
Precision: 0.9533
Recall: 0.9511
F1 Score: 0.9500
Validation - Accuracy: 0.9511, Precision: 0.9533, Recall: 0.9511, F1 Score: 0.9500
Epoch 16/20, Loss: 0.1225, Accuracy: 0.9636


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9618
Precision: 0.9620
Recall: 0.9618
F1 Score: 0.9610
Validation - Accuracy: 0.9618, Precision: 0.9620, Recall: 0.9618, F1 Score: 0.9610
Epoch 17/20, Loss: 0.1273, Accuracy: 0.9625


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9492
Precision: 0.9530
Recall: 0.9492
F1 Score: 0.9485
Validation - Accuracy: 0.9492, Precision: 0.9530, Recall: 0.9492, F1 Score: 0.9485
Epoch 18/20, Loss: 0.1053, Accuracy: 0.9687


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9555
Precision: 0.9565
Recall: 0.9555
F1 Score: 0.9546
Validation - Accuracy: 0.9555, Precision: 0.9565, Recall: 0.9555, F1 Score: 0.9546
Epoch 19/20, Loss: 0.1015, Accuracy: 0.9715


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9517
Precision: 0.9541
Recall: 0.9517
F1 Score: 0.9499
Validation - Accuracy: 0.9517, Precision: 0.9541, Recall: 0.9517, F1 Score: 0.9499
Epoch 20/20, Loss: 0.0944, Accuracy: 0.9703


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.9536
Precision: 0.9546
Recall: 0.9536
F1 Score: 0.9527
Validation - Accuracy: 0.9536, Precision: 0.9546, Recall: 0.9536, F1 Score: 0.9527
Accuracy: 0.9630
Precision: 0.9649
Recall: 0.9630
F1 Score: 0.9631
Model and embeddings saved.
Finished training TREC with lstm_glove
{'classifier': 'lstm_glove', 'test_accuracy': 0.963, 'precision': 0.9649, 'recall': 0.963, 'f1_score': 0.9631, 'training_time': 109.84, 'dataset': 'trec_full'}
--------------------------------------------------
Training TREC with bert_ktrain...
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 10
	95percentile : 17
	99percentile : 22


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 10
	95percentile : 17
	99percentile : 21




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 00007: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 00012: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Epoch 13/20
Epoch 14/20
Epoch 00014: Reducing Max LR on Plateau: new max lr will be 6.25e-06 (if not early_stopping).
Restoring model weights from the end of the best epoch: 10.
Epoch 14: early stopping
Weights from best epoch have been loaded into model.
Accuracy: 0.98%
Precision: 0.98
Recall: 0.98
F1 Score: 0.98
Model saved to final_results/models/trec_train/bert_ktrain
Finished training TREC with bert_ktrain
{'classifier': 'bert_ktrain', 'test_accuracy': 0.9831, 'precision': 0.9838, 'recall': 0.9831, 'f1_score': 0.9831, 'training_time': 875.9, 'dataset': 'trec_full'}
----------------------

In [15]:
full_df_result.head(10)

Unnamed: 0,dataset,model,test_accuracy,precision,recall,f1_score,classifier,training_time
0,atis_full,,0.9588,0.9516,0.9588,0.9532,svm_tfidf,6.87
1,atis_full,,0.928,0.9121,0.928,0.917,svm_glove,11.51
2,atis_full,,0.9811,0.9811,0.9811,0.9805,lstm_glove,60.87
3,atis_full,,0.9897,0.9888,0.9897,0.9891,bert_ktrain,262.77
4,trec_full,,0.9599,0.9611,0.9599,0.9598,svm_tfidf,81.7
5,trec_full,,0.8177,0.8224,0.8177,0.8168,svm_glove,40.77
6,trec_full,,0.963,0.9649,0.963,0.9631,lstm_glove,109.84
7,trec_full,,0.9831,0.9838,0.9831,0.9831,bert_ktrain,875.9


In [12]:
subsets_df_result = pd.DataFrame(columns=['dataset', 'classifier', 'model', 'subset_size', 'method', 'test_accuracy',
                                          'precision', 'recall', 'f1_score'])

# Train on the subsets
for name in dataset_names:
    for subset_size in [5]:
        subset_path = f'NLPProject2024/datasets/{name}/sampled_subsets/ver1/{name.lower()}_{subset_size}_subset.csv'

        # subset only training
        X_train, y_train, X_test, y_test, le, X_val, y_val = load_data(subset_path,
                                                                       test_path=f'NLPProject2024/datasets/{name}/{name.lower()}.test.csv',
                                                                       val_path=f'NLPProject2024/datasets/{name}/{name.lower()}.valid.csv')
        model_path = f'final_results/models/{name.lower()}_{subset_size}_subset_train'
        os.makedirs(model_path, exist_ok=True)

        joblib.dump(le, f'{model_path}/label_encoder.pkl')

        for classifier in classifiers:
            print(f"Training {name} with {classifier} and subset size {subset_size}...")
            trainer = ModelTrainer(classifier, glove_file, len(le.classes_))
            train_results = trainer.train_classifier(X_train, y_train, model_path, X_test=X_test, y_test=y_test, X_val=X_val, y_val=y_val)

            train_results["dataset"] = f'{name.lower()}_{subset_size}_subset_train'
            train_results["method"] = None
            train_results["model"] = None
            train_results["subset_size"] = subset_size
            subsets_df_result = pd.concat([subsets_df_result, pd.DataFrame([train_results])], ignore_index=True)
            print(f"Finished training {name} with {classifier} and subset size {subset_size}")
            print(train_results)
            print("-" * 50)

	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: None
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training ATIS with svm_tfidf and subset size 5...
Training svm_tfidf...

Validation Results:
Accuracy: 0.5798
Precision: 0.8624
Recall: 0.5798
F1 Score: 0.6576
Accuracy: 0.6055
Precision: 0.8235
Recall: 0.6055
F1 Score: 0.6602
Model saved.
Finished training ATIS with svm_tfidf and subset size 5
{'classifier': 'svm_tfidf', 'test_accuracy': 0.6055, 'precision': 0.8235, 'recall': 0.6055, 'f1_score': 0.6602, 'training_time': 0.06, 'dataset': 'atis_5_subset_train', 'method': None, 'model': None, 'subset_size': 5}
--------------------------------------------------
Training ATIS with svm_glove and subset size 5...
Training svm_glove...


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Validation Results:
Accuracy: 0.5832
Precision: 0.8216
Recall: 0.5832
F1 Score: 0.6616
Accuracy: 0.5815
Precision: 0.7998
Recall: 0.5815
F1 Score: 0.6436
Model saved.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Finished training ATIS with svm_glove and subset size 5
{'classifier': 'svm_glove', 'test_accuracy': 0.5815, 'precision': 0.7998, 'recall': 0.5815, 'f1_score': 0.6436, 'training_time': 8.44, 'dataset': 'atis_5_subset_train', 'method': None, 'model': None, 'subset_size': 5}
--------------------------------------------------
Training ATIS with lstm_glove and subset size 5...
Training lstm_glove...
Epoch 1/20, Loss: 3.2355, Accuracy: 0.0244
Accuracy: 0.1149
Precision: 0.6463
Recall: 0.1149
F1 Score: 0.1660
Validation - Accuracy: 0.1149, Precision: 0.6463, Recall: 0.1149, F1 Score: 0.1660
Epoch 2/20, Loss: 2.8502, Accuracy: 0.0976


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0635
Precision: 0.6769
Recall: 0.0635
F1 Score: 0.0713
Validation - Accuracy: 0.0635, Precision: 0.6769, Recall: 0.0635, F1 Score: 0.0713
Epoch 3/20, Loss: 2.8233, Accuracy: 0.0854
Accuracy: 0.0429
Precision: 0.7705
Recall: 0.0429
F1 Score: 0.0596
Validation - Accuracy: 0.0429, Precision: 0.7705, Recall: 0.0429, F1 Score: 0.0596
Epoch 4/20, Loss: 2.7666, Accuracy: 0.1463


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0600
Precision: 0.7959
Recall: 0.0600
F1 Score: 0.0690
Validation - Accuracy: 0.0600, Precision: 0.7959, Recall: 0.0600, F1 Score: 0.0690
Epoch 5/20, Loss: 2.6045, Accuracy: 0.2073
Accuracy: 0.0703
Precision: 0.7917
Recall: 0.0703
F1 Score: 0.0785
Validation - Accuracy: 0.0703, Precision: 0.7917, Recall: 0.0703, F1 Score: 0.0785
Epoch 6/20, Loss: 2.6993, Accuracy: 0.0976


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1115
Precision: 0.8530
Recall: 0.1115
F1 Score: 0.1438
Validation - Accuracy: 0.1115, Precision: 0.8530, Recall: 0.1115, F1 Score: 0.1438
Epoch 7/20, Loss: 2.5625, Accuracy: 0.2073
Accuracy: 0.1750
Precision: 0.8078
Recall: 0.1750
F1 Score: 0.2354
Validation - Accuracy: 0.1750, Precision: 0.8078, Recall: 0.1750, F1 Score: 0.2354
Epoch 8/20, Loss: 2.3230, Accuracy: 0.3049
Accuracy: 0.3225
Precision: 0.8242
Recall: 0.3225
F1 Score: 0.4370
Validation - Accuracy: 0.3225, Precision: 0.8242, Recall: 0.3225, F1 Score: 0.4370
Epoch 9/20, Loss: 2.3603, Accuracy: 0.3171
Accuracy: 0.4082
Precision: 0.8252
Recall: 0.4082
F1 Score: 0.5241
Validation - Accuracy: 0.4082, Precision: 0.8252, Recall: 0.4082, F1 Score: 0.5241
Epoch 10/20, Loss: 2.2242, Accuracy: 0.4268


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4580
Precision: 0.8243
Recall: 0.4580
F1 Score: 0.5740
Validation - Accuracy: 0.4580, Precision: 0.8243, Recall: 0.4580, F1 Score: 0.5740
Epoch 11/20, Loss: 2.1628, Accuracy: 0.4512
Accuracy: 0.4048
Precision: 0.8038
Recall: 0.4048
F1 Score: 0.5202
Validation - Accuracy: 0.4048, Precision: 0.8038, Recall: 0.4048, F1 Score: 0.5202
Epoch 12/20, Loss: 1.9413, Accuracy: 0.4390


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3877
Precision: 0.7996
Recall: 0.3877
F1 Score: 0.4971
Validation - Accuracy: 0.3877, Precision: 0.7996, Recall: 0.3877, F1 Score: 0.4971
Epoch 13/20, Loss: 1.9314, Accuracy: 0.4634
Accuracy: 0.4031
Precision: 0.8047
Recall: 0.4031
F1 Score: 0.5083
Validation - Accuracy: 0.4031, Precision: 0.8047, Recall: 0.4031, F1 Score: 0.5083
Epoch 14/20, Loss: 1.8607, Accuracy: 0.5244


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4374
Precision: 0.8136
Recall: 0.4374
F1 Score: 0.5427
Validation - Accuracy: 0.4374, Precision: 0.8136, Recall: 0.4374, F1 Score: 0.5427
Epoch 15/20, Loss: 1.6062, Accuracy: 0.5610
Accuracy: 0.4408
Precision: 0.8153
Recall: 0.4408
F1 Score: 0.5509
Validation - Accuracy: 0.4408, Precision: 0.8153, Recall: 0.4408, F1 Score: 0.5509
Epoch 16/20, Loss: 1.6253, Accuracy: 0.6463


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4460
Precision: 0.8247
Recall: 0.4460
F1 Score: 0.5571
Validation - Accuracy: 0.4460, Precision: 0.8247, Recall: 0.4460, F1 Score: 0.5571
Epoch 17/20, Loss: 1.4224, Accuracy: 0.6829
Accuracy: 0.4391
Precision: 0.8353
Recall: 0.4391
F1 Score: 0.5545
Validation - Accuracy: 0.4391, Precision: 0.8353, Recall: 0.4391, F1 Score: 0.5545
Epoch 18/20, Loss: 1.4084, Accuracy: 0.6098


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4545
Precision: 0.8393
Recall: 0.4545
F1 Score: 0.5691
Validation - Accuracy: 0.4545, Precision: 0.8393, Recall: 0.4545, F1 Score: 0.5691
Epoch 19/20, Loss: 1.3592, Accuracy: 0.6707
Accuracy: 0.4322
Precision: 0.8471
Recall: 0.4322
F1 Score: 0.5511
Validation - Accuracy: 0.4322, Precision: 0.8471, Recall: 0.4322, F1 Score: 0.5511
Epoch 20/20, Loss: 1.1758, Accuracy: 0.7073


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3825
Precision: 0.8386
Recall: 0.3825
F1 Score: 0.4911
Validation - Accuracy: 0.3825, Precision: 0.8386, Recall: 0.3825, F1 Score: 0.4911
Accuracy: 0.3894
Precision: 0.8163
Recall: 0.3894
F1 Score: 0.4870


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model and embeddings saved.
Finished training ATIS with lstm_glove and subset size 5
{'classifier': 'lstm_glove', 'test_accuracy': 0.3894, 'precision': 0.8163, 'recall': 0.3894, 'f1_score': 0.487, 'training_time': 37.51, 'dataset': 'atis_5_subset_train', 'method': None, 'model': None, 'subset_size': 5}
--------------------------------------------------
Training ATIS with bert_ktrain and subset size 5...
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 10
	95percentile : 18
	99percentile : 23


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 11
	95percentile : 19
	99percentile : 24




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 00009: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 00014: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Epoch 15/20
Epoch 16/20
Epoch 00016: Reducing Max LR on Plateau: new max lr will be 6.25e-06 (if not early_stopping).
Restoring model weights from the end of the best epoch: 12.
Epoch 16: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.84%
Precision: 0.92
Recall: 0.84
F1 Score: 0.86
Model saved to final_results/models/atis_5_subset_train/bert_ktrain
Finished training ATIS with bert_ktrain and subset size 5
{'classifier': 'bert_ktrain', 'test_accuracy': 0.8388, 'precision': 0.9214, 'recall': 0.8388, 'f1_score': 0.8625, 'training_time': 31.98, 'dataset': 'atis_5_subset_train', 'method': None, 'model': None, 'subset_size': 5}
--------------------------------------------------
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: None
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training TREC with svm_tfidf and subset size 5...
Training svm_tfidf...

Validation Results:
Accuracy: 0.4169
Precision: 0.5822
Recall: 0.4169
F1 Score: 0.4316
Accuracy: 0.4599
Precision: 0.6290
Recall: 0.4599
F1 Score: 0.4769
Model saved.
Finished training TREC with svm_tfidf and subset size 5
{'classifier': 'svm_tfidf', 'test_accuracy': 0.4599, 'precision': 0.629, 'rec

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Validation Results:
Accuracy: 0.3022
Precision: 0.4393
Recall: 0.3022
F1 Score: 0.3139


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3177
Precision: 0.4731
Recall: 0.3177
F1 Score: 0.3372
Model saved.
Finished training TREC with svm_glove and subset size 5
{'classifier': 'svm_glove', 'test_accuracy': 0.3177, 'precision': 0.4731, 'recall': 0.3177, 'f1_score': 0.3372, 'training_time': 8.95, 'dataset': 'trec_5_subset_train', 'method': None, 'model': None, 'subset_size': 5}
--------------------------------------------------
Training TREC with lstm_glove and subset size 5...
Training lstm_glove...


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/20, Loss: 4.1986, Accuracy: 0.0320


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0257
Precision: 0.0283
Recall: 0.0257
F1 Score: 0.0208
Validation - Accuracy: 0.0257, Precision: 0.0283, Recall: 0.0257, F1 Score: 0.0208
Epoch 2/20, Loss: 4.0355, Accuracy: 0.0360


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0414
Precision: 0.0593
Recall: 0.0414
F1 Score: 0.0297
Validation - Accuracy: 0.0414, Precision: 0.0593, Recall: 0.0414, F1 Score: 0.0297
Epoch 3/20, Loss: 3.9343, Accuracy: 0.0360


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0828
Precision: 0.1108
Recall: 0.0828
F1 Score: 0.0753
Validation - Accuracy: 0.0828, Precision: 0.1108, Recall: 0.0828, F1 Score: 0.0753
Epoch 4/20, Loss: 3.8328, Accuracy: 0.0320


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1292
Precision: 0.2135
Recall: 0.1292
F1 Score: 0.1064
Validation - Accuracy: 0.1292, Precision: 0.2135, Recall: 0.1292, F1 Score: 0.1064
Epoch 5/20, Loss: 3.6714, Accuracy: 0.0960


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1310
Precision: 0.2320
Recall: 0.1310
F1 Score: 0.0989
Validation - Accuracy: 0.1310, Precision: 0.2320, Recall: 0.1310, F1 Score: 0.0989
Epoch 6/20, Loss: 3.6289, Accuracy: 0.1160


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1379
Precision: 0.2232
Recall: 0.1379
F1 Score: 0.1116
Validation - Accuracy: 0.1379, Precision: 0.2232, Recall: 0.1379, F1 Score: 0.1116
Epoch 7/20, Loss: 3.3363, Accuracy: 0.1840


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1906
Precision: 0.2673
Recall: 0.1906
F1 Score: 0.1773
Validation - Accuracy: 0.1906, Precision: 0.2673, Recall: 0.1906, F1 Score: 0.1773
Epoch 8/20, Loss: 3.1410, Accuracy: 0.2240


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1975
Precision: 0.3056
Recall: 0.1975
F1 Score: 0.1827
Validation - Accuracy: 0.1975, Precision: 0.3056, Recall: 0.1975, F1 Score: 0.1827
Epoch 9/20, Loss: 2.8671, Accuracy: 0.3000


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2226
Precision: 0.3176
Recall: 0.2226
F1 Score: 0.2106
Validation - Accuracy: 0.2226, Precision: 0.3176, Recall: 0.2226, F1 Score: 0.2106
Epoch 10/20, Loss: 2.6977, Accuracy: 0.3680


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2107
Precision: 0.3132
Recall: 0.2107
F1 Score: 0.1976
Validation - Accuracy: 0.2107, Precision: 0.3132, Recall: 0.2107, F1 Score: 0.1976
Epoch 11/20, Loss: 2.5547, Accuracy: 0.4360


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2502
Precision: 0.3522
Recall: 0.2502
F1 Score: 0.2358
Validation - Accuracy: 0.2502, Precision: 0.3522, Recall: 0.2502, F1 Score: 0.2358
Epoch 12/20, Loss: 2.3282, Accuracy: 0.5080


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2665
Precision: 0.4150
Recall: 0.2665
F1 Score: 0.2553
Validation - Accuracy: 0.2665, Precision: 0.4150, Recall: 0.2665, F1 Score: 0.2553
Epoch 13/20, Loss: 2.2211, Accuracy: 0.5040


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2815
Precision: 0.4074
Recall: 0.2815
F1 Score: 0.2738
Validation - Accuracy: 0.2815, Precision: 0.4074, Recall: 0.2815, F1 Score: 0.2738
Epoch 14/20, Loss: 1.9887, Accuracy: 0.5920


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2871
Precision: 0.4515
Recall: 0.2871
F1 Score: 0.2868
Validation - Accuracy: 0.2871, Precision: 0.4515, Recall: 0.2871, F1 Score: 0.2868
Epoch 15/20, Loss: 1.8470, Accuracy: 0.5960


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3028
Precision: 0.4865
Recall: 0.3028
F1 Score: 0.3080
Validation - Accuracy: 0.3028, Precision: 0.4865, Recall: 0.3028, F1 Score: 0.3080
Epoch 16/20, Loss: 1.7181, Accuracy: 0.6480


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2959
Precision: 0.4620
Recall: 0.2959
F1 Score: 0.2968
Validation - Accuracy: 0.2959, Precision: 0.4620, Recall: 0.2959, F1 Score: 0.2968
Epoch 17/20, Loss: 1.5677, Accuracy: 0.6960


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3367
Precision: 0.4934
Recall: 0.3367
F1 Score: 0.3488
Validation - Accuracy: 0.3367, Precision: 0.4934, Recall: 0.3367, F1 Score: 0.3488
Epoch 18/20, Loss: 1.5245, Accuracy: 0.7080


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3498
Precision: 0.5169
Recall: 0.3498
F1 Score: 0.3615
Validation - Accuracy: 0.3498, Precision: 0.5169, Recall: 0.3498, F1 Score: 0.3615
Epoch 19/20, Loss: 1.4133, Accuracy: 0.7640


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3592
Precision: 0.5089
Recall: 0.3592
F1 Score: 0.3706
Validation - Accuracy: 0.3592, Precision: 0.5089, Recall: 0.3592, F1 Score: 0.3706
Epoch 20/20, Loss: 1.2778, Accuracy: 0.7800


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3480
Precision: 0.5127
Recall: 0.3480
F1 Score: 0.3572
Validation - Accuracy: 0.3480, Precision: 0.5127, Recall: 0.3480, F1 Score: 0.3572


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3803
Precision: 0.5676
Recall: 0.3803
F1 Score: 0.3964
Model and embeddings saved.
Finished training TREC with lstm_glove and subset size 5
{'classifier': 'lstm_glove', 'test_accuracy': 0.3803, 'precision': 0.5676, 'recall': 0.3803, 'f1_score': 0.3964, 'training_time': 42.89, 'dataset': 'trec_5_subset_train', 'method': None, 'model': None, 'subset_size': 5}
--------------------------------------------------
Training TREC with bert_ktrain and subset size 5...
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 10
	95percentile : 16
	99percentile : 21


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 10
	95percentile : 17
	99percentile : 21




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 00012: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 13/20
Epoch 14/20
Epoch 00014: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Restoring model weights from the end of the best epoch: 10.
Epoch 14: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.54%
Precision: 0.59
Recall: 0.54
F1 Score: 0.53
Model saved to final_results/models/trec_5_subset_train/bert_ktrain
Finished training TREC with bert_ktrain and subset size 5
{'classifier': 'bert_ktrain', 'test_accuracy': 0.5407, 'precision': 0.5854, 'recall': 0.5407, 'f1_score': 0.5334, 'training_time': 51.72, 'dataset': 'trec_5_subset_train', 'method': None, 'model': None, 'subset_size': 5}
--------------------------------------------------


In [13]:
subsets_df_result.head(20)

Unnamed: 0,dataset,classifier,model,subset_size,method,test_accuracy,precision,recall,f1_score,training_time
0,atis_5_subset_train,svm_tfidf,,5,,0.6055,0.8235,0.6055,0.6602,0.06
1,atis_5_subset_train,svm_glove,,5,,0.5815,0.7998,0.5815,0.6436,8.44
2,atis_5_subset_train,lstm_glove,,5,,0.3894,0.8163,0.3894,0.487,37.51
3,atis_5_subset_train,bert_ktrain,,5,,0.8388,0.9214,0.8388,0.8625,31.98
4,trec_5_subset_train,svm_tfidf,,5,,0.4599,0.629,0.4599,0.4769,0.28
5,trec_5_subset_train,svm_glove,,5,,0.3177,0.4731,0.3177,0.3372,8.95
6,trec_5_subset_train,lstm_glove,,5,,0.3803,0.5676,0.3803,0.3964,42.89
7,trec_5_subset_train,bert_ktrain,,5,,0.5407,0.5854,0.5407,0.5334,51.72


In [8]:
augmented_atis_df_result = pd.DataFrame(columns=['dataset', 'classifier', 'test_accuracy', 'model', 'subset_size', 'method'])

# Train on subset + matching augmented data
for name in ['ATIS']:
    for subset_size in [5]:
        subset_path = f'NLPProject2024/datasets/{name}/sampled_subsets/ver1/{name.lower()}_{subset_size}_subset.csv'

        for lambada in lambadas:
            for llm in llms:
                if lambada == 'Lambada+Instruct' and llm == 'Mistral':
                    continue
                if lambada == 'Lambada+' and llm == 'GPT2':
                    continue
                if lambada == 'Lambada+Instruct' and llm == 'GPT2':
                    continue

                for classifier in classifiers:
                    directory_path = f'NLPProject2024/filtered_datasets/{lambada}/{llm}/{name}/{classifier}'

                    data_files = glob.glob(os.path.join(directory_path, '*.csv'))

                    for file in data_files:
                      # Hack to train with only matching augmented data
                      if f"{name.lower()}_{subset_size}_" not in file.lower():
                        continue
                      try:
                        dataset_file_name = file.replace(f'{directory_path}', "").replace('.csv', "").replace('/', "")
                        model_path = f'final_results/models/{dataset_file_name}'
                        os.makedirs(model_path, exist_ok=True)
                        print(f"Training {name} with {classifier} on {dataset_file_name}...")
                        X_train, y_train, X_test, y_test, le, X_val, y_val = load_data(subset_path,
                                                                                generated_train_path=file,
                                                                                test_path=f'NLPProject2024/datasets/{name}/{name.lower()}.test.csv',
                                                                                val_path=f'NLPProject2024/datasets/{name}/{name.lower()}.valid.csv')
                        joblib.dump(le, f'{model_path}/label_encoder.pkl')

                        trainer = ModelTrainer(classifier, glove_file, len(le.classes_))
                        train_results = trainer.train_classifier(X_train, y_train, model_path, X_test=X_test, y_test=y_test, X_val=X_val, y_val=y_val)

                        train_results["dataset"] = dataset_file_name
                        train_results["method"] = lambada
                        train_results["model"] = llm
                        train_results["subset_size"] = subset_size
                        augmented_atis_df_result = pd.concat([augmented_atis_df_result, pd.DataFrame([train_results])], ignore_index=True)
                        print(f"Finished training {name} with {classifier} on {dataset_file_name}")
                        print(train_results)
                        print("-" * 50)
                      except Exception as e:
                        print(f"Error training {name} with {classifier} on {dataset_file_name}: {e}")
                        continue

Training ATIS with svm_tfidf on Llama3_8B_ATIS_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Llama3/ATIS/svm_tfidf/Llama3_8B_ATIS_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training svm_tfidf...

Validation Results:
Accuracy: 0.6707
Precision: 0.8934
Recall: 0.6707
F1 Score: 0.7354
Accuracy: 0.6930
Precision: 0.8575
Recall: 0.6930
F1 Score: 0.7358
Model saved.
Finished training ATIS with svm_tfidf on Llama3_8B_ATIS_5_augmented_data_svm_tfidf_filtered
{'classifier': 'svm_tfidf', 'test_accuracy': 0.693, 'precision': 0.8575, 'recall': 0.693, 'f1_score': 0.7358, 'training_time': 0.15, 'dataset': 'Llama3_8B_ATIS_5_augmented_data_svm_tfidf_filtered', 'method': 'Lambada', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training ATIS with svm_glove on Llama3_8B_A

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Validation Results:
Accuracy: 0.6415
Precision: 0.8506
Recall: 0.6415
F1 Score: 0.7090
Accuracy: 0.6261
Precision: 0.8045
Recall: 0.6261
F1 Score: 0.6777
Model saved.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Finished training ATIS with svm_glove on Llama3_8B_ATIS_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.6261, 'precision': 0.8045, 'recall': 0.6261, 'f1_score': 0.6777, 'training_time': 8.63, 'dataset': 'Llama3_8B_ATIS_5_augmented_data_svm_glove_filtered', 'method': 'Lambada', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training ATIS with lstm_glove on Llama3_8B_ATIS_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Llama3/ATIS/lstm_glove/Llama3_8B_ATIS_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training lstm_glove...
Epoch 1/20, Loss: 3.0082, Accuracy: 0.0523
Accuracy: 0.0223
Precision: 0.0038
Recall: 0.0223
F1 Score: 0.0065
Validation - Accuracy: 0.0223, Precision: 0.0038, Recall: 0.0223, F1 Score: 0.0065
Epoch 2/20,

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1938
Precision: 0.6550
Recall: 0.1938
F1 Score: 0.2650
Validation - Accuracy: 0.1938, Precision: 0.6550, Recall: 0.1938, F1 Score: 0.2650
Epoch 3/20, Loss: 2.5854, Accuracy: 0.2092
Accuracy: 0.4082
Precision: 0.6670
Recall: 0.4082
F1 Score: 0.4865
Validation - Accuracy: 0.4082, Precision: 0.6670, Recall: 0.4082, F1 Score: 0.4865


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4/20, Loss: 2.5547, Accuracy: 0.1961
Accuracy: 0.4065
Precision: 0.6622
Recall: 0.4065
F1 Score: 0.4813
Validation - Accuracy: 0.4065, Precision: 0.6622, Recall: 0.4065, F1 Score: 0.4813
Epoch 5/20, Loss: 2.3559, Accuracy: 0.3203


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4340
Precision: 0.6496
Recall: 0.4340
F1 Score: 0.5011
Validation - Accuracy: 0.4340, Precision: 0.6496, Recall: 0.4340, F1 Score: 0.5011
Epoch 6/20, Loss: 2.2145, Accuracy: 0.3660
Accuracy: 0.4477
Precision: 0.6489
Recall: 0.4477
F1 Score: 0.5179
Validation - Accuracy: 0.4477, Precision: 0.6489, Recall: 0.4477, F1 Score: 0.5179
Epoch 7/20, Loss: 2.0224, Accuracy: 0.4052
Accuracy: 0.4597
Precision: 0.7089
Recall: 0.4597
F1 Score: 0.5363
Validation - Accuracy: 0.4597, Precision: 0.7089, Recall: 0.4597, F1 Score: 0.5363
Epoch 8/20, Loss: 1.8851, Accuracy: 0.5163


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4614
Precision: 0.7104
Recall: 0.4614
F1 Score: 0.5395
Validation - Accuracy: 0.4614, Precision: 0.7104, Recall: 0.4614, F1 Score: 0.5395
Epoch 9/20, Loss: 1.6975, Accuracy: 0.5229
Accuracy: 0.4803
Precision: 0.7817
Recall: 0.4803
F1 Score: 0.5585
Validation - Accuracy: 0.4803, Precision: 0.7817, Recall: 0.4803, F1 Score: 0.5585


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 10/20, Loss: 1.5019, Accuracy: 0.5817
Accuracy: 0.5077
Precision: 0.7875
Recall: 0.5077
F1 Score: 0.5901
Validation - Accuracy: 0.5077, Precision: 0.7875, Recall: 0.5077, F1 Score: 0.5901
Epoch 11/20, Loss: 1.4011, Accuracy: 0.6471


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5746
Precision: 0.7352
Recall: 0.5746
F1 Score: 0.6361
Validation - Accuracy: 0.5746, Precision: 0.7352, Recall: 0.5746, F1 Score: 0.6361
Epoch 12/20, Loss: 1.2737, Accuracy: 0.6993
Accuracy: 0.5883
Precision: 0.8357
Recall: 0.5883
F1 Score: 0.6544
Validation - Accuracy: 0.5883, Precision: 0.8357, Recall: 0.5883, F1 Score: 0.6544
Epoch 13/20, Loss: 1.1490, Accuracy: 0.7386
Accuracy: 0.5540
Precision: 0.8556
Recall: 0.5540
F1 Score: 0.6370
Validation - Accuracy: 0.5540, Precision: 0.8556, Recall: 0.5540, F1 Score: 0.6370
Epoch 14/20, Loss: 1.0053, Accuracy: 0.7712


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5780
Precision: 0.8352
Recall: 0.5780
F1 Score: 0.6664
Validation - Accuracy: 0.5780, Precision: 0.8352, Recall: 0.5780, F1 Score: 0.6664
Epoch 15/20, Loss: 0.9675, Accuracy: 0.8039
Accuracy: 0.6244
Precision: 0.8490
Recall: 0.6244
F1 Score: 0.7083
Validation - Accuracy: 0.6244, Precision: 0.8490, Recall: 0.6244, F1 Score: 0.7083


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 16/20, Loss: 0.8578, Accuracy: 0.7843
Accuracy: 0.6655
Precision: 0.8446
Recall: 0.6655
F1 Score: 0.7345
Validation - Accuracy: 0.6655, Precision: 0.8446, Recall: 0.6655, F1 Score: 0.7345
Epoch 17/20, Loss: 0.7573, Accuracy: 0.8627


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.6346
Precision: 0.8640
Recall: 0.6346
F1 Score: 0.7156
Validation - Accuracy: 0.6346, Precision: 0.8640, Recall: 0.6346, F1 Score: 0.7156
Epoch 18/20, Loss: 0.6904, Accuracy: 0.8758
Accuracy: 0.6278
Precision: 0.8653
Recall: 0.6278
F1 Score: 0.7099
Validation - Accuracy: 0.6278, Precision: 0.8653, Recall: 0.6278, F1 Score: 0.7099
Epoch 19/20, Loss: 0.5924, Accuracy: 0.9216
Accuracy: 0.6055
Precision: 0.8700
Recall: 0.6055
F1 Score: 0.6946
Validation - Accuracy: 0.6055, Precision: 0.8700, Recall: 0.6055, F1 Score: 0.6946
Epoch 20/20, Loss: 0.5390, Accuracy: 0.9346


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.6604
Precision: 0.8670
Recall: 0.6604
F1 Score: 0.7326
Validation - Accuracy: 0.6604, Precision: 0.8670, Recall: 0.6604, F1 Score: 0.7326
Accuracy: 0.6123
Precision: 0.8174
Recall: 0.6123
F1 Score: 0.6790


  _warn_prf(average, modifier, msg_start, len(result))


Model and embeddings saved.
Finished training ATIS with lstm_glove on Llama3_8B_ATIS_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.6123, 'precision': 0.8174, 'recall': 0.6123, 'f1_score': 0.679, 'training_time': 37.76, 'dataset': 'Llama3_8B_ATIS_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training ATIS with bert_ktrain on Llama3_8B_ATIS_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Llama3/ATIS/bert_ktrain/Llama3_8B_ATIS_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 10
	95percentile : 16
	99percentile : 20


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 11
	95percentile : 19
	99percentile : 24




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 00006: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 7/20
Epoch 8/20
Epoch 00008: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Restoring model weights from the end of the best epoch: 4.
Epoch 8: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.89%
Precision: 0.93
Recall: 0.89
F1 Score: 0.90
Model saved to final_results/models/Llama3_8B_ATIS_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training ATIS with bert_ktrain on Llama3_8B_ATIS_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.8902, 'precision': 0.9273, 'recall': 0.8902, 'f1_score': 0.9014, 'training_time': 29.58, 'dataset': 'Llama3_8B_ATIS_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training ATIS with svm_tfidf on Mistral_7B_ATIS_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Mistral/ATIS/svm_tfidf/Mistral_7B_ATIS_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training svm_tfidf...

Validation Results:
Accuracy: 0.737

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Validation Results:
Accuracy: 0.6398
Precision: 0.8468
Recall: 0.6398
F1 Score: 0.7126
Accuracy: 0.6449
Precision: 0.8115
Recall: 0.6449
F1 Score: 0.7015


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model saved.
Finished training ATIS with svm_glove on Mistral_7B_ATIS_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.6449, 'precision': 0.8115, 'recall': 0.6449, 'f1_score': 0.7015, 'training_time': 8.6, 'dataset': 'Mistral_7B_ATIS_5_augmented_data_svm_glove_filtered', 'method': 'Lambada', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training ATIS with lstm_glove on Mistral_7B_ATIS_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Mistral/ATIS/lstm_glove/Mistral_7B_ATIS_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training lstm_glove...
Epoch 1/20, Loss: 3.1102, Accuracy: 0.0260
Accuracy: 0.0652
Precision: 0.7590
Recall: 0.0652
F1 Score: 0.0237
Validation - Accuracy: 0.0652, Precision: 0.7590, Recall: 0.0652, F1 Score: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0738
Precision: 0.7490
Recall: 0.0738
F1 Score: 0.0238
Validation - Accuracy: 0.0738, Precision: 0.7490, Recall: 0.0738, F1 Score: 0.0238
Epoch 3/20, Loss: 2.4571, Accuracy: 0.3182
Accuracy: 0.0789
Precision: 0.0313
Recall: 0.0789
F1 Score: 0.0391
Validation - Accuracy: 0.0789, Precision: 0.0313, Recall: 0.0789, F1 Score: 0.0391


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4/20, Loss: 2.4087, Accuracy: 0.2468
Accuracy: 0.1269
Precision: 0.7608
Recall: 0.1269
F1 Score: 0.0963
Validation - Accuracy: 0.1269, Precision: 0.7608, Recall: 0.1269, F1 Score: 0.0963
Epoch 5/20, Loss: 2.1728, Accuracy: 0.4026


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2007
Precision: 0.7203
Recall: 0.2007
F1 Score: 0.2088
Validation - Accuracy: 0.2007, Precision: 0.7203, Recall: 0.2007, F1 Score: 0.2088
Epoch 6/20, Loss: 1.9978, Accuracy: 0.4870
Accuracy: 0.2367
Precision: 0.7124
Recall: 0.2367
F1 Score: 0.2688
Validation - Accuracy: 0.2367, Precision: 0.7124, Recall: 0.2367, F1 Score: 0.2688
Epoch 7/20, Loss: 1.7512, Accuracy: 0.5455
Accuracy: 0.3362
Precision: 0.7219
Recall: 0.3362
F1 Score: 0.3996
Validation - Accuracy: 0.3362, Precision: 0.7219, Recall: 0.3362, F1 Score: 0.3996
Epoch 8/20, Loss: 1.5155, Accuracy: 0.6039


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4391
Precision: 0.7282
Recall: 0.4391
F1 Score: 0.5163
Validation - Accuracy: 0.4391, Precision: 0.7282, Recall: 0.4391, F1 Score: 0.5163
Epoch 9/20, Loss: 1.3462, Accuracy: 0.6623
Accuracy: 0.4700
Precision: 0.8011
Recall: 0.4700
F1 Score: 0.5468
Validation - Accuracy: 0.4700, Precision: 0.8011, Recall: 0.4700, F1 Score: 0.5468


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 10/20, Loss: 1.2382, Accuracy: 0.6948
Accuracy: 0.4957
Precision: 0.7478
Recall: 0.4957
F1 Score: 0.5741
Validation - Accuracy: 0.4957, Precision: 0.7478, Recall: 0.4957, F1 Score: 0.5741
Epoch 11/20, Loss: 1.0807, Accuracy: 0.7792


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4871
Precision: 0.7620
Recall: 0.4871
F1 Score: 0.5729
Validation - Accuracy: 0.4871, Precision: 0.7620, Recall: 0.4871, F1 Score: 0.5729
Epoch 12/20, Loss: 0.9176, Accuracy: 0.8182
Accuracy: 0.5077
Precision: 0.7834
Recall: 0.5077
F1 Score: 0.5921
Validation - Accuracy: 0.5077, Precision: 0.7834, Recall: 0.5077, F1 Score: 0.5921
Epoch 13/20, Loss: 0.8230, Accuracy: 0.8247
Accuracy: 0.5403
Precision: 0.7950
Recall: 0.5403
F1 Score: 0.6146
Validation - Accuracy: 0.5403, Precision: 0.7950, Recall: 0.5403, F1 Score: 0.6146
Epoch 14/20, Loss: 0.7903, Accuracy: 0.8442


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5352
Precision: 0.8267
Recall: 0.5352
F1 Score: 0.6155
Validation - Accuracy: 0.5352, Precision: 0.8267, Recall: 0.5352, F1 Score: 0.6155
Epoch 15/20, Loss: 0.6329, Accuracy: 0.9026
Accuracy: 0.5660
Precision: 0.8191
Recall: 0.5660
F1 Score: 0.6479
Validation - Accuracy: 0.5660, Precision: 0.8191, Recall: 0.5660, F1 Score: 0.6479


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 16/20, Loss: 0.6374, Accuracy: 0.8831
Accuracy: 0.6089
Precision: 0.8260
Recall: 0.6089
F1 Score: 0.6840
Validation - Accuracy: 0.6089, Precision: 0.8260, Recall: 0.6089, F1 Score: 0.6840
Epoch 17/20, Loss: 0.5433, Accuracy: 0.9221
Accuracy: 0.6141
Precision: 0.8373
Recall: 0.6141
F1 Score: 0.6881
Validation - Accuracy: 0.6141, Precision: 0.8373, Recall: 0.6141, F1 Score: 0.6881
Epoch 18/20, Loss: 0.5342, Accuracy: 0.9221
Accuracy: 0.6226
Precision: 0.8312
Recall: 0.6226
F1 Score: 0.6913
Validation - Accuracy: 0.6226, Precision: 0.8312, Recall: 0.6226, F1 Score: 0.6913
Epoch 19/20, Loss: 0.4591, Accuracy: 0.9351
Accuracy: 0.6003
Precision: 0.8436
Recall: 0.6003
F1 Score: 0.6815
Validation - Accuracy: 0.6003, Precision: 0.8436, Recall: 0.6003, F1 Score: 0.6815
Epoch 20/20, Loss: 0.4368, Accuracy: 0.9416


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5901
Precision: 0.8497
Recall: 0.5901
F1 Score: 0.6750
Validation - Accuracy: 0.5901, Precision: 0.8497, Recall: 0.5901, F1 Score: 0.6750
Accuracy: 0.5643
Precision: 0.8030
Recall: 0.5643
F1 Score: 0.6414


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model and embeddings saved.
Finished training ATIS with lstm_glove on Mistral_7B_ATIS_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.5643, 'precision': 0.803, 'recall': 0.5643, 'f1_score': 0.6414, 'training_time': 38.64, 'dataset': 'Mistral_7B_ATIS_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training ATIS with bert_ktrain on Mistral_7B_ATIS_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Mistral/ATIS/bert_ktrain/Mistral_7B_ATIS_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 10
	95percentile : 16
	99percentile : 20


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 11
	95percentile : 19
	99percentile : 24




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 00007: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 8/20
Epoch 9/20
Epoch 00009: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Restoring model weights from the end of the best epoch: 5.
Epoch 9: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.89%
Precision: 0.92
Recall: 0.89
F1 Score: 0.90
Model saved to final_results/models/Mistral_7B_ATIS_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training ATIS with bert_ktrain on Mistral_7B_ATIS_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.8851, 'precision': 0.9243, 'recall': 0.8851, 'f1_score': 0.8987, 'training_time': 31.43, 'dataset': 'Mistral_7B_ATIS_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training ATIS with svm_tfidf on GPT2_ATIS_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/GPT2/ATIS/svm_tfidf/GPT2_ATIS_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training svm_tfidf...

Validation Results:
Accuracy: 0.4357
Precision

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Validation Results:
Accuracy: 0.5746
Precision: 0.8199
Recall: 0.5746
F1 Score: 0.6543
Accuracy: 0.5609
Precision: 0.7969
Recall: 0.5609
F1 Score: 0.6274
Model saved.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Finished training ATIS with svm_glove on GPT2_ATIS_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.5609, 'precision': 0.7969, 'recall': 0.5609, 'f1_score': 0.6274, 'training_time': 8.61, 'dataset': 'GPT2_ATIS_5_augmented_data_svm_glove_filtered', 'method': 'Lambada', 'model': 'GPT2', 'subset_size': 5}
--------------------------------------------------
Training ATIS with lstm_glove on GPT2_ATIS_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/GPT2/ATIS/lstm_glove/GPT2_ATIS_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training lstm_glove...
Epoch 1/20, Loss: 2.9853, Accuracy: 0.0602
Accuracy: 0.0240
Precision: 0.0068
Recall: 0.0240
F1 Score: 0.0101
Validation - Accuracy: 0.0240, Precision: 0.0068, Recall: 0.0240, F1 Score: 0.0101
Epoch 2/20, Loss: 2.7203, Accuracy:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0583
Precision: 0.0081
Recall: 0.0583
F1 Score: 0.0142
Validation - Accuracy: 0.0583, Precision: 0.0081, Recall: 0.0583, F1 Score: 0.0142
Epoch 3/20, Loss: 2.4464, Accuracy: 0.2530
Accuracy: 0.0926
Precision: 0.5136
Recall: 0.0926
F1 Score: 0.0457
Validation - Accuracy: 0.0926, Precision: 0.5136, Recall: 0.0926, F1 Score: 0.0457


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4/20, Loss: 2.2865, Accuracy: 0.2651
Accuracy: 0.2367
Precision: 0.6622
Recall: 0.2367
F1 Score: 0.2971
Validation - Accuracy: 0.2367, Precision: 0.6622, Recall: 0.2367, F1 Score: 0.2971
Epoch 5/20, Loss: 2.3426, Accuracy: 0.3313


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2779
Precision: 0.7084
Recall: 0.2779
F1 Score: 0.3629
Validation - Accuracy: 0.2779, Precision: 0.7084, Recall: 0.2779, F1 Score: 0.3629
Epoch 6/20, Loss: 2.1995, Accuracy: 0.3916
Accuracy: 0.2058
Precision: 0.7023
Recall: 0.2058
F1 Score: 0.2915
Validation - Accuracy: 0.2058, Precision: 0.7023, Recall: 0.2058, F1 Score: 0.2915
Epoch 7/20, Loss: 2.0277, Accuracy: 0.4458
Accuracy: 0.1561
Precision: 0.7295
Recall: 0.1561
F1 Score: 0.2076
Validation - Accuracy: 0.1561, Precision: 0.7295, Recall: 0.1561, F1 Score: 0.2076
Epoch 8/20, Loss: 1.8537, Accuracy: 0.4819


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1784
Precision: 0.7648
Recall: 0.1784
F1 Score: 0.2244
Validation - Accuracy: 0.1784, Precision: 0.7648, Recall: 0.1784, F1 Score: 0.2244
Epoch 9/20, Loss: 1.5982, Accuracy: 0.5602
Accuracy: 0.2075
Precision: 0.7887
Recall: 0.2075
F1 Score: 0.2676
Validation - Accuracy: 0.2075, Precision: 0.7887, Recall: 0.2075, F1 Score: 0.2676


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 10/20, Loss: 1.6428, Accuracy: 0.5602
Accuracy: 0.2333
Precision: 0.7635
Recall: 0.2333
F1 Score: 0.3108
Validation - Accuracy: 0.2333, Precision: 0.7635, Recall: 0.2333, F1 Score: 0.3108
Epoch 11/20, Loss: 1.4023, Accuracy: 0.6807


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2967
Precision: 0.7779
Recall: 0.2967
F1 Score: 0.3960
Validation - Accuracy: 0.2967, Precision: 0.7779, Recall: 0.2967, F1 Score: 0.3960
Epoch 12/20, Loss: 1.2412, Accuracy: 0.6506
Accuracy: 0.2933
Precision: 0.7575
Recall: 0.2933
F1 Score: 0.3815
Validation - Accuracy: 0.2933, Precision: 0.7575, Recall: 0.2933, F1 Score: 0.3815
Epoch 13/20, Loss: 1.1344, Accuracy: 0.7169
Accuracy: 0.2967
Precision: 0.7375
Recall: 0.2967
F1 Score: 0.3624
Validation - Accuracy: 0.2967, Precision: 0.7375, Recall: 0.2967, F1 Score: 0.3624
Epoch 14/20, Loss: 1.0805, Accuracy: 0.7711


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2882
Precision: 0.7434
Recall: 0.2882
F1 Score: 0.3573
Validation - Accuracy: 0.2882, Precision: 0.7434, Recall: 0.2882, F1 Score: 0.3573
Epoch 15/20, Loss: 0.9676, Accuracy: 0.7952
Accuracy: 0.3122
Precision: 0.7503
Recall: 0.3122
F1 Score: 0.3953
Validation - Accuracy: 0.3122, Precision: 0.7503, Recall: 0.3122, F1 Score: 0.3953


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 16/20, Loss: 0.8755, Accuracy: 0.8012
Accuracy: 0.3173
Precision: 0.7547
Recall: 0.3173
F1 Score: 0.3976
Validation - Accuracy: 0.3173, Precision: 0.7547, Recall: 0.3173, F1 Score: 0.3976
Epoch 17/20, Loss: 0.7901, Accuracy: 0.8373


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3396
Precision: 0.7697
Recall: 0.3396
F1 Score: 0.4225
Validation - Accuracy: 0.3396, Precision: 0.7697, Recall: 0.3396, F1 Score: 0.4225
Epoch 18/20, Loss: 0.7617, Accuracy: 0.8253
Accuracy: 0.3842
Precision: 0.7829
Recall: 0.3842
F1 Score: 0.4741
Validation - Accuracy: 0.3842, Precision: 0.7829, Recall: 0.3842, F1 Score: 0.4741
Epoch 19/20, Loss: 0.6987, Accuracy: 0.8554
Accuracy: 0.5506
Precision: 0.7794
Recall: 0.5506
F1 Score: 0.6235
Validation - Accuracy: 0.5506, Precision: 0.7794, Recall: 0.5506, F1 Score: 0.6235
Epoch 20/20, Loss: 0.6019, Accuracy: 0.9096


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5660
Precision: 0.8267
Recall: 0.5660
F1 Score: 0.6523
Validation - Accuracy: 0.5660, Precision: 0.8267, Recall: 0.5660, F1 Score: 0.6523
Accuracy: 0.5163
Precision: 0.7618
Recall: 0.5163
F1 Score: 0.5935


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model and embeddings saved.
Finished training ATIS with lstm_glove on GPT2_ATIS_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.5163, 'precision': 0.7618, 'recall': 0.5163, 'f1_score': 0.5935, 'training_time': 37.72, 'dataset': 'GPT2_ATIS_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada', 'model': 'GPT2', 'subset_size': 5}
--------------------------------------------------
Training ATIS with bert_ktrain on GPT2_ATIS_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/GPT2/ATIS/bert_ktrain/GPT2_ATIS_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 10
	95percentile : 20
	99percentile : 34


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 11
	95percentile : 19
	99percentile : 24




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 00008: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 00011: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Epoch 12/20
Epoch 13/20
Epoch 00013: Reducing Max LR on Plateau: new max lr will be 6.25e-06 (if not early_stopping).
Restoring model weights from the end of the best epoch: 9.
Epoch 13: early stopping
Weights from best epoch have been loaded into model.
Accuracy: 0.72%
Precision: 0.89
Recall: 0.72
F1 Score: 0.76
Model saved to final_results/models/GPT2_ATIS_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training ATIS with bert_ktrain on GPT2_ATIS_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.7153, 'precision': 0.8932, 'recall': 0.7153, 'f1_score': 0.7

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Validation Results:
Accuracy: 0.5901
Precision: 0.8351
Recall: 0.5901
F1 Score: 0.6744
Accuracy: 0.5678
Precision: 0.7877
Recall: 0.5678
F1 Score: 0.6369
Model saved.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Finished training ATIS with svm_glove on Llama3_8B_lambada_plus_ATIS_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.5678, 'precision': 0.7877, 'recall': 0.5678, 'f1_score': 0.6369, 'training_time': 8.5, 'dataset': 'Llama3_8B_lambada_plus_ATIS_5_augmented_data_svm_glove_filtered', 'method': 'Lambada+', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training ATIS with lstm_glove on Llama3_8B_lambada_plus_ATIS_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Llama3/ATIS/lstm_glove/Llama3_8B_lambada_plus_ATIS_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training lstm_glove...
Epoch 1/20, Loss: 3.0709, Accuracy: 0.0360
Accuracy: 0.0463
Precision: 0.0042
Recall: 0.0463
F1 Score: 0.0077
Validation - Accuracy: 0.0463, Precision:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0686
Precision: 0.0090
Recall: 0.0686
F1 Score: 0.0155
Validation - Accuracy: 0.0686, Precision: 0.0090, Recall: 0.0686, F1 Score: 0.0155
Epoch 3/20, Loss: 2.6089, Accuracy: 0.1871
Accuracy: 0.0738
Precision: 0.0123
Recall: 0.0738
F1 Score: 0.0200
Validation - Accuracy: 0.0738, Precision: 0.0123, Recall: 0.0738, F1 Score: 0.0200


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4/20, Loss: 2.5292, Accuracy: 0.3022
Accuracy: 0.0600
Precision: 0.0193
Recall: 0.0600
F1 Score: 0.0267
Validation - Accuracy: 0.0600, Precision: 0.0193, Recall: 0.0600, F1 Score: 0.0267
Epoch 5/20, Loss: 2.4005, Accuracy: 0.3094


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0617
Precision: 0.5701
Recall: 0.0617
F1 Score: 0.0345
Validation - Accuracy: 0.0617, Precision: 0.5701, Recall: 0.0617, F1 Score: 0.0345
Epoch 6/20, Loss: 2.1627, Accuracy: 0.4245
Accuracy: 0.2024
Precision: 0.7264
Recall: 0.2024
F1 Score: 0.2400
Validation - Accuracy: 0.2024, Precision: 0.7264, Recall: 0.2024, F1 Score: 0.2400
Epoch 7/20, Loss: 2.0298, Accuracy: 0.3741
Accuracy: 0.1973
Precision: 0.7217
Recall: 0.1973
F1 Score: 0.2241
Validation - Accuracy: 0.1973, Precision: 0.7217, Recall: 0.1973, F1 Score: 0.2241
Epoch 8/20, Loss: 1.9405, Accuracy: 0.4245


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2882
Precision: 0.7262
Recall: 0.2882
F1 Score: 0.3526
Validation - Accuracy: 0.2882, Precision: 0.7262, Recall: 0.2882, F1 Score: 0.3526
Epoch 9/20, Loss: 1.7909, Accuracy: 0.4820
Accuracy: 0.3516
Precision: 0.7412
Recall: 0.3516
F1 Score: 0.4210
Validation - Accuracy: 0.3516, Precision: 0.7412, Recall: 0.3516, F1 Score: 0.4210


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 10/20, Loss: 1.5397, Accuracy: 0.5827
Accuracy: 0.3448
Precision: 0.7576
Recall: 0.3448
F1 Score: 0.4139
Validation - Accuracy: 0.3448, Precision: 0.7576, Recall: 0.3448, F1 Score: 0.4139
Epoch 11/20, Loss: 1.4955, Accuracy: 0.5396


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3722
Precision: 0.7708
Recall: 0.3722
F1 Score: 0.4470
Validation - Accuracy: 0.3722, Precision: 0.7708, Recall: 0.3722, F1 Score: 0.4470
Epoch 12/20, Loss: 1.3987, Accuracy: 0.5971
Accuracy: 0.4031
Precision: 0.7819
Recall: 0.4031
F1 Score: 0.4784
Validation - Accuracy: 0.4031, Precision: 0.7819, Recall: 0.4031, F1 Score: 0.4784
Epoch 13/20, Loss: 1.2664, Accuracy: 0.6331
Accuracy: 0.4545
Precision: 0.7707
Recall: 0.4545
F1 Score: 0.5376
Validation - Accuracy: 0.4545, Precision: 0.7707, Recall: 0.4545, F1 Score: 0.5376
Epoch 14/20, Loss: 1.2513, Accuracy: 0.6691


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4271
Precision: 0.8594
Recall: 0.4271
F1 Score: 0.5142
Validation - Accuracy: 0.4271, Precision: 0.8594, Recall: 0.4271, F1 Score: 0.5142
Epoch 15/20, Loss: 1.0166, Accuracy: 0.7554
Accuracy: 0.3877
Precision: 0.8469
Recall: 0.3877
F1 Score: 0.4804
Validation - Accuracy: 0.3877, Precision: 0.8469, Recall: 0.3877, F1 Score: 0.4804


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 16/20, Loss: 0.9122, Accuracy: 0.7626
Accuracy: 0.4768
Precision: 0.8539
Recall: 0.4768
F1 Score: 0.5637
Validation - Accuracy: 0.4768, Precision: 0.8539, Recall: 0.4768, F1 Score: 0.5637
Epoch 17/20, Loss: 0.9974, Accuracy: 0.8058


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5111
Precision: 0.8555
Recall: 0.5111
F1 Score: 0.6039
Validation - Accuracy: 0.5111, Precision: 0.8555, Recall: 0.5111, F1 Score: 0.6039
Epoch 18/20, Loss: 0.7727, Accuracy: 0.8201
Accuracy: 0.5369
Precision: 0.8467
Recall: 0.5369
F1 Score: 0.6310
Validation - Accuracy: 0.5369, Precision: 0.8467, Recall: 0.5369, F1 Score: 0.6310
Epoch 19/20, Loss: 0.7413, Accuracy: 0.8345
Accuracy: 0.5094
Precision: 0.8561
Recall: 0.5094
F1 Score: 0.6015
Validation - Accuracy: 0.5094, Precision: 0.8561, Recall: 0.5094, F1 Score: 0.6015
Epoch 20/20, Loss: 0.7024, Accuracy: 0.8489


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4768
Precision: 0.8737
Recall: 0.4768
F1 Score: 0.5838
Validation - Accuracy: 0.4768, Precision: 0.8737, Recall: 0.4768, F1 Score: 0.5838
Accuracy: 0.4528
Precision: 0.7974
Recall: 0.4528
F1 Score: 0.5468


  _warn_prf(average, modifier, msg_start, len(result))


Model and embeddings saved.
Finished training ATIS with lstm_glove on Llama3_8B_lambada_plus_ATIS_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.4528, 'precision': 0.7974, 'recall': 0.4528, 'f1_score': 0.5468, 'training_time': 37.91, 'dataset': 'Llama3_8B_lambada_plus_ATIS_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada+', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training ATIS with bert_ktrain on Llama3_8B_lambada_plus_ATIS_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Llama3/ATIS/bert_ktrain/Llama3_8B_lambada_plus_ATIS_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 11
	95percentile : 19
	99percentile : 25


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 11
	95percentile : 19
	99percentile : 24




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 00010: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 11/20
Epoch 12/20
Epoch 00012: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Restoring model weights from the end of the best epoch: 8.
Epoch 12: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.69%
Precision: 0.92
Recall: 0.69
F1 Score: 0.77
Model saved to final_results/models/Llama3_8B_lambada_plus_ATIS_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training ATIS with bert_ktrain on Llama3_8B_lambada_plus_ATIS_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.6913, 'precision': 0.922, 'recall': 0.6913, 'f1_score': 0.7677, 'training_time': 35.35, 'dataset': 'Llama3_8B_lambada_plus_ATIS_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada+', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training ATIS with svm_tfidf on Mistral_7B_lambada_plus_ATIS_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Mistral/ATIS/svm_tfidf/Mistral_7B_lambada_plus_ATIS_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.t

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Validation Results:
Accuracy: 0.4563
Precision: 0.8260
Recall: 0.4563
F1 Score: 0.5583
Accuracy: 0.4734
Precision: 0.7974
Recall: 0.4734
F1 Score: 0.5535
Model saved.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Finished training ATIS with svm_glove on Mistral_7B_lambada_plus_ATIS_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.4734, 'precision': 0.7974, 'recall': 0.4734, 'f1_score': 0.5535, 'training_time': 8.53, 'dataset': 'Mistral_7B_lambada_plus_ATIS_5_augmented_data_svm_glove_filtered', 'method': 'Lambada+', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training ATIS with lstm_glove on Mistral_7B_lambada_plus_ATIS_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Mistral/ATIS/lstm_glove/Mistral_7B_lambada_plus_ATIS_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training lstm_glove...
Epoch 1/20, Loss: 3.3767, Accuracy: 0.0584
Accuracy: 0.3070
Precision: 0.6133
Recall: 0.3070
F1 Score: 0.3842
Validation - Accuracy: 0.3070, Pre

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.6158
Precision: 0.6084
Recall: 0.6158
F1 Score: 0.6076
Validation - Accuracy: 0.6158, Precision: 0.6084, Recall: 0.6158, F1 Score: 0.6076
Epoch 3/20, Loss: 2.6344, Accuracy: 0.2208
Accuracy: 0.4082
Precision: 0.6432
Recall: 0.4082
F1 Score: 0.4869
Validation - Accuracy: 0.4082, Precision: 0.6432, Recall: 0.4082, F1 Score: 0.4869


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4/20, Loss: 2.4839, Accuracy: 0.3247
Accuracy: 0.3276
Precision: 0.6801
Recall: 0.3276
F1 Score: 0.4215
Validation - Accuracy: 0.3276, Precision: 0.6801, Recall: 0.3276, F1 Score: 0.4215
Epoch 5/20, Loss: 2.3517, Accuracy: 0.3506


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3070
Precision: 0.6907
Recall: 0.3070
F1 Score: 0.3871
Validation - Accuracy: 0.3070, Precision: 0.6907, Recall: 0.3070, F1 Score: 0.3871
Epoch 6/20, Loss: 2.1467, Accuracy: 0.3766
Accuracy: 0.3087
Precision: 0.6731
Recall: 0.3087
F1 Score: 0.3736
Validation - Accuracy: 0.3087, Precision: 0.6731, Recall: 0.3087, F1 Score: 0.3736
Epoch 7/20, Loss: 2.0936, Accuracy: 0.3961
Accuracy: 0.3310
Precision: 0.6627
Recall: 0.3310
F1 Score: 0.3865
Validation - Accuracy: 0.3310, Precision: 0.6627, Recall: 0.3310, F1 Score: 0.3865
Epoch 8/20, Loss: 1.8570, Accuracy: 0.5195


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3396
Precision: 0.6668
Recall: 0.3396
F1 Score: 0.3953
Validation - Accuracy: 0.3396, Precision: 0.6668, Recall: 0.3396, F1 Score: 0.3953
Epoch 9/20, Loss: 1.7577, Accuracy: 0.4935
Accuracy: 0.3448
Precision: 0.6719
Recall: 0.3448
F1 Score: 0.3967
Validation - Accuracy: 0.3448, Precision: 0.6719, Recall: 0.3448, F1 Score: 0.3967


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 10/20, Loss: 1.6160, Accuracy: 0.5714
Accuracy: 0.3791
Precision: 0.6782
Recall: 0.3791
F1 Score: 0.4347
Validation - Accuracy: 0.3791, Precision: 0.6782, Recall: 0.3791, F1 Score: 0.4347
Epoch 11/20, Loss: 1.4607, Accuracy: 0.6169


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3894
Precision: 0.6930
Recall: 0.3894
F1 Score: 0.4475
Validation - Accuracy: 0.3894, Precision: 0.6930, Recall: 0.3894, F1 Score: 0.4475
Epoch 12/20, Loss: 1.3294, Accuracy: 0.6429
Accuracy: 0.4305
Precision: 0.7948
Recall: 0.4305
F1 Score: 0.5017
Validation - Accuracy: 0.4305, Precision: 0.7948, Recall: 0.4305, F1 Score: 0.5017
Epoch 13/20, Loss: 1.2523, Accuracy: 0.6558
Accuracy: 0.5214
Precision: 0.8243
Recall: 0.5214
F1 Score: 0.5888
Validation - Accuracy: 0.5214, Precision: 0.8243, Recall: 0.5214, F1 Score: 0.5888
Epoch 14/20, Loss: 1.1034, Accuracy: 0.7143


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4974
Precision: 0.8098
Recall: 0.4974
F1 Score: 0.5582
Validation - Accuracy: 0.4974, Precision: 0.8098, Recall: 0.4974, F1 Score: 0.5582
Epoch 15/20, Loss: 1.0281, Accuracy: 0.7532
Accuracy: 0.4923
Precision: 0.8223
Recall: 0.4923
F1 Score: 0.5569
Validation - Accuracy: 0.4923, Precision: 0.8223, Recall: 0.4923, F1 Score: 0.5569


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 16/20, Loss: 0.8793, Accuracy: 0.8117
Accuracy: 0.5609
Precision: 0.8418
Recall: 0.5609
F1 Score: 0.6325
Validation - Accuracy: 0.5609, Precision: 0.8418, Recall: 0.5609, F1 Score: 0.6325
Epoch 17/20, Loss: 0.8047, Accuracy: 0.8377


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5626
Precision: 0.8482
Recall: 0.5626
F1 Score: 0.6411
Validation - Accuracy: 0.5626, Precision: 0.8482, Recall: 0.5626, F1 Score: 0.6411
Epoch 18/20, Loss: 0.7217, Accuracy: 0.8117
Accuracy: 0.5403
Precision: 0.8242
Recall: 0.5403
F1 Score: 0.6168
Validation - Accuracy: 0.5403, Precision: 0.8242, Recall: 0.5403, F1 Score: 0.6168
Epoch 19/20, Loss: 0.6570, Accuracy: 0.8571
Accuracy: 0.5643
Precision: 0.8428
Recall: 0.5643
F1 Score: 0.6388
Validation - Accuracy: 0.5643, Precision: 0.8428, Recall: 0.5643, F1 Score: 0.6388
Epoch 20/20, Loss: 0.6270, Accuracy: 0.8766


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5455
Precision: 0.8248
Recall: 0.5455
F1 Score: 0.6188
Validation - Accuracy: 0.5455, Precision: 0.8248, Recall: 0.5455, F1 Score: 0.6188
Accuracy: 0.5214
Precision: 0.7456
Recall: 0.5214
F1 Score: 0.5794


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model and embeddings saved.
Finished training ATIS with lstm_glove on Mistral_7B_lambada_plus_ATIS_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.5214, 'precision': 0.7456, 'recall': 0.5214, 'f1_score': 0.5794, 'training_time': 37.42, 'dataset': 'Mistral_7B_lambada_plus_ATIS_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada+', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training ATIS with bert_ktrain on Mistral_7B_lambada_plus_ATIS_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Mistral/ATIS/bert_ktrain/Mistral_7B_lambada_plus_ATIS_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 10
	95percentile : 16
	99percentile : 

Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 11
	95percentile : 19
	99percentile : 24




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 00007: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 8/20
Epoch 9/20
Epoch 00009: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Restoring model weights from the end of the best epoch: 5.
Epoch 9: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.82%
Precision: 0.93
Recall: 0.82
F1 Score: 0.86
Model saved to final_results/models/Mistral_7B_lambada_plus_ATIS_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training ATIS with bert_ktrain on Mistral_7B_lambada_plus_ATIS_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.8233, 'precision': 0.931, 'recall': 0.8233, 'f1_score': 0.8629, 'training_time': 31.41, 'dataset': 'Mistral_7B_lambada_plus_ATIS_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada+', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training ATIS with svm_tfidf on Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+Instruct/Llama3/ATIS/svm_tfidf/Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Validation Results:
Accuracy: 0.5437
Precision: 0.8296
Recall: 0.5437
F1 Score: 0.6315
Accuracy: 0.5214
Precision: 0.7956
Recall: 0.5214
F1 Score: 0.6018
Model saved.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Finished training ATIS with svm_glove on Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.5214, 'precision': 0.7956, 'recall': 0.5214, 'f1_score': 0.6018, 'training_time': 8.65, 'dataset': 'Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_svm_glove_filtered', 'method': 'Lambada+Instruct', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training ATIS with lstm_glove on Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+Instruct/Llama3/ATIS/lstm_glove/Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training lstm_glove...
Epoch 1/20, Loss: 3.1322, Accuracy: 0.0909
Accuracy: 0.0617
Precision: 0.7560
Recall: 0.0617
F1 Score:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0703
Precision: 0.7469
Recall: 0.0703
F1 Score: 0.0268
Validation - Accuracy: 0.0703, Precision: 0.7469, Recall: 0.0703, F1 Score: 0.0268
Epoch 3/20, Loss: 2.5106, Accuracy: 0.2121
Accuracy: 0.1732
Precision: 0.6973
Recall: 0.1732
F1 Score: 0.1770
Validation - Accuracy: 0.1732, Precision: 0.6973, Recall: 0.1732, F1 Score: 0.1770


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4/20, Loss: 2.4309, Accuracy: 0.2652
Accuracy: 0.3259
Precision: 0.6934
Recall: 0.3259
F1 Score: 0.4010
Validation - Accuracy: 0.3259, Precision: 0.6934, Recall: 0.3259, F1 Score: 0.4010
Epoch 5/20, Loss: 2.2540, Accuracy: 0.2727


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3328
Precision: 0.7153
Recall: 0.3328
F1 Score: 0.4141
Validation - Accuracy: 0.3328, Precision: 0.7153, Recall: 0.3328, F1 Score: 0.4141
Epoch 6/20, Loss: 2.3326, Accuracy: 0.3182
Accuracy: 0.3019
Precision: 0.7327
Recall: 0.3019
F1 Score: 0.3985
Validation - Accuracy: 0.3019, Precision: 0.7327, Recall: 0.3019, F1 Score: 0.3985
Epoch 7/20, Loss: 2.1777, Accuracy: 0.3485
Accuracy: 0.3070
Precision: 0.7349
Recall: 0.3070
F1 Score: 0.4034
Validation - Accuracy: 0.3070, Precision: 0.7349, Recall: 0.3070, F1 Score: 0.4034
Epoch 8/20, Loss: 2.1623, Accuracy: 0.4167


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3208
Precision: 0.7330
Recall: 0.3208
F1 Score: 0.4137
Validation - Accuracy: 0.3208, Precision: 0.7330, Recall: 0.3208, F1 Score: 0.4137
Epoch 9/20, Loss: 1.8097, Accuracy: 0.4242
Accuracy: 0.3448
Precision: 0.7467
Recall: 0.3448
F1 Score: 0.4465
Validation - Accuracy: 0.3448, Precision: 0.7467, Recall: 0.3448, F1 Score: 0.4465


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 10/20, Loss: 1.8133, Accuracy: 0.5227
Accuracy: 0.4683
Precision: 0.7417
Recall: 0.4683
F1 Score: 0.5540
Validation - Accuracy: 0.4683, Precision: 0.7417, Recall: 0.4683, F1 Score: 0.5540
Epoch 11/20, Loss: 1.5500, Accuracy: 0.4848


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5403
Precision: 0.8104
Recall: 0.5403
F1 Score: 0.6171
Validation - Accuracy: 0.5403, Precision: 0.8104, Recall: 0.5403, F1 Score: 0.6171
Epoch 12/20, Loss: 1.5856, Accuracy: 0.5985
Accuracy: 0.5129
Precision: 0.7384
Recall: 0.5129
F1 Score: 0.5835
Validation - Accuracy: 0.5129, Precision: 0.7384, Recall: 0.5129, F1 Score: 0.5835
Epoch 13/20, Loss: 1.4473, Accuracy: 0.6136
Accuracy: 0.5472
Precision: 0.7500
Recall: 0.5472
F1 Score: 0.6047
Validation - Accuracy: 0.5472, Precision: 0.7500, Recall: 0.5472, F1 Score: 0.6047
Epoch 14/20, Loss: 1.4742, Accuracy: 0.5833


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5540
Precision: 0.8350
Recall: 0.5540
F1 Score: 0.6323
Validation - Accuracy: 0.5540, Precision: 0.8350, Recall: 0.5540, F1 Score: 0.6323
Epoch 15/20, Loss: 1.2464, Accuracy: 0.6591
Accuracy: 0.5918
Precision: 0.7981
Recall: 0.5918
F1 Score: 0.6639
Validation - Accuracy: 0.5918, Precision: 0.7981, Recall: 0.5918, F1 Score: 0.6639


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 16/20, Loss: 1.1450, Accuracy: 0.7121
Accuracy: 0.5283
Precision: 0.7903
Recall: 0.5283
F1 Score: 0.6121
Validation - Accuracy: 0.5283, Precision: 0.7903, Recall: 0.5283, F1 Score: 0.6121
Epoch 17/20, Loss: 1.1808, Accuracy: 0.6894


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5232
Precision: 0.8002
Recall: 0.5232
F1 Score: 0.6072
Validation - Accuracy: 0.5232, Precision: 0.8002, Recall: 0.5232, F1 Score: 0.6072
Epoch 18/20, Loss: 1.0478, Accuracy: 0.7500
Accuracy: 0.5214
Precision: 0.8059
Recall: 0.5214
F1 Score: 0.6104
Validation - Accuracy: 0.5214, Precision: 0.8059, Recall: 0.5214, F1 Score: 0.6104
Epoch 19/20, Loss: 0.9155, Accuracy: 0.7803
Accuracy: 0.4889
Precision: 0.7960
Recall: 0.4889
F1 Score: 0.5702
Validation - Accuracy: 0.4889, Precision: 0.7960, Recall: 0.4889, F1 Score: 0.5702
Epoch 20/20, Loss: 0.7891, Accuracy: 0.7727


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5678
Precision: 0.7988
Recall: 0.5678
F1 Score: 0.6335
Validation - Accuracy: 0.5678, Precision: 0.7988, Recall: 0.5678, F1 Score: 0.6335
Accuracy: 0.5300
Precision: 0.7624
Recall: 0.5300
F1 Score: 0.5986


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Model and embeddings saved.
Finished training ATIS with lstm_glove on Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.53, 'precision': 0.7624, 'recall': 0.53, 'f1_score': 0.5986, 'training_time': 37.96, 'dataset': 'Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada+Instruct', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training ATIS with bert_ktrain on Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/ATIS/sampled_subsets/ver1/atis_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+Instruct/Llama3/ATIS/bert_ktrain/Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/ATIS/atis.test.csv
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 

Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 11
	95percentile : 19
	99percentile : 24




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 00008: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 00014: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Epoch 15/20
Epoch 16/20
Epoch 00016: Reducing Max LR on Plateau: new max lr will be 6.25e-06 (if not early_stopping).
Restoring model weights from the end of the best epoch: 12.
Epoch 16: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.79%
Precision: 0.91
Recall: 0.79
F1 Score: 0.83
Model saved to final_results/models/Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training ATIS with bert_ktrain on Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.7942, 'precision': 0.9064, 'recall': 0.7942, 'f1_score': 0.8329, 'training_time': 38.72, 'dataset': 'Llama_8B_lambada_plus_instruct_ATIS_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada+Instruct', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------


In [9]:
augmented_atis_df_result.sort_values(by=['classifier', 'test_accuracy']).head(50)

Unnamed: 0,dataset,classifier,test_accuracy,model,subset_size,method,precision,recall,f1_score,training_time
15,Llama3_8B_lambada_plus_ATIS_5_augmented_data_b...,bert_ktrain,0.6913,Llama3,5,Lambada+,0.922,0.6913,0.7677,35.35
11,GPT2_ATIS_5_augmented_data_bert_ktrain_filtered,bert_ktrain,0.7153,GPT2,5,Lambada,0.8932,0.7153,0.7641,33.99
23,Llama_8B_lambada_plus_instruct_ATIS_5_augmente...,bert_ktrain,0.7942,Llama3,5,Lambada+Instruct,0.9064,0.7942,0.8329,38.72
19,Mistral_7B_lambada_plus_ATIS_5_augmented_data_...,bert_ktrain,0.8233,Mistral,5,Lambada+,0.931,0.8233,0.8629,31.41
7,Mistral_7B_ATIS_5_augmented_data_bert_ktrain_f...,bert_ktrain,0.8851,Mistral,5,Lambada,0.9243,0.8851,0.8987,31.43
3,Llama3_8B_ATIS_5_augmented_data_bert_ktrain_fi...,bert_ktrain,0.8902,Llama3,5,Lambada,0.9273,0.8902,0.9014,29.58
14,Llama3_8B_lambada_plus_ATIS_5_augmented_data_l...,lstm_glove,0.4528,Llama3,5,Lambada+,0.7974,0.4528,0.5468,37.91
10,GPT2_ATIS_5_augmented_data_lstm_glove_filtered,lstm_glove,0.5163,GPT2,5,Lambada,0.7618,0.5163,0.5935,37.72
18,Mistral_7B_lambada_plus_ATIS_5_augmented_data_...,lstm_glove,0.5214,Mistral,5,Lambada+,0.7456,0.5214,0.5794,37.42
22,Llama_8B_lambada_plus_instruct_ATIS_5_augmente...,lstm_glove,0.53,Llama3,5,Lambada+Instruct,0.7624,0.53,0.5986,37.96


In [10]:
augmented_trec_df_result = pd.DataFrame(columns=['dataset', 'classifier', 'test_accuracy', 'model', 'subset_size', 'method'])

# Train on subset + matching augmented data
for name in ['TREC']:
    for subset_size in [5]:
        subset_path = f'NLPProject2024/datasets/{name}/sampled_subsets/ver1/{name.lower()}_{subset_size}_subset.csv'

        for lambada in lambadas:
            for llm in llms:
                if lambada == 'Lambada+Instruct' and llm == 'Mistral':
                    continue
                if lambada == 'Lambada+' and llm == 'GPT2':
                    continue
                if lambada == 'Lambada+Instruct' and llm == 'GPT2':
                    continue

                for classifier in classifiers:
                    directory_path = f'NLPProject2024/filtered_datasets/{lambada}/{llm}/{name}/{classifier}'

                    data_files = glob.glob(os.path.join(directory_path, '*.csv'))

                    for file in data_files:
                      # Hack to train with only matching augmented data
                      if f"{name.lower()}_{subset_size}_" not in file.lower():
                        continue
                      try:
                        dataset_file_name = file.replace(f'{directory_path}', "").replace('.csv', "").replace('/', "")
                        model_path = f'final_results/models/{dataset_file_name}'
                        os.makedirs(model_path, exist_ok=True)
                        print(f"Training {name} with {classifier} on {dataset_file_name}...")
                        X_train, y_train, X_test, y_test, le, X_val, y_val = load_data(subset_path,
                                                                                generated_train_path=file,
                                                                                test_path=f'NLPProject2024/datasets/{name}/{name.lower()}.test.csv',
                                                                                val_path=f'NLPProject2024/datasets/{name}/{name.lower()}.valid.csv')
                        joblib.dump(le, f'{model_path}/label_encoder.pkl')

                        trainer = ModelTrainer(classifier, glove_file, len(le.classes_))
                        train_results = trainer.train_classifier(X_train, y_train, model_path, X_test=X_test, y_test=y_test, X_val=X_val, y_val=y_val)

                        train_results["dataset"] = dataset_file_name
                        train_results["method"] = lambada
                        train_results["model"] = llm
                        train_results["subset_size"] = subset_size
                        augmented_trec_df_result = pd.concat([augmented_trec_df_result, pd.DataFrame([train_results])], ignore_index=True)
                        print(f"Finished training {name} with {classifier} on {dataset_file_name}")
                        print(train_results)
                        print("-" * 50)
                      except Exception as e:
                        print(f"Error training {name} with {classifier} on {dataset_file_name}: {e}")
                        continue

Training TREC with svm_tfidf on Llama3_8B_TREC_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Llama3/TREC/svm_tfidf/Llama3_8B_TREC_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training svm_tfidf...

Validation Results:
Accuracy: 0.4639
Precision: 0.6540
Recall: 0.4639
F1 Score: 0.4726


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4956
Precision: 0.7240
Recall: 0.4956
F1 Score: 0.5090
Model saved.
Finished training TREC with svm_tfidf on Llama3_8B_TREC_5_augmented_data_svm_tfidf_filtered
{'classifier': 'svm_tfidf', 'test_accuracy': 0.4956, 'precision': 0.724, 'recall': 0.4956, 'f1_score': 0.509, 'training_time': 1.06, 'dataset': 'Llama3_8B_TREC_5_augmented_data_svm_tfidf_filtered', 'method': 'Lambada', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with svm_glove on Llama3_8B_TREC_5_augmented_data_svm_glove_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Llama3/TREC/svm_glove/Llama3_8B_TREC_5_augmented_data_svm_glove_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training svm_glove...

Validation Results:
Accuracy: 0.3273
Precision: 0.4691
Recall: 0.3273
F1 Score: 0.3350


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3377
Precision: 0.4810
Recall: 0.3377
F1 Score: 0.3504
Model saved.
Finished training TREC with svm_glove on Llama3_8B_TREC_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.3377, 'precision': 0.481, 'recall': 0.3377, 'f1_score': 0.3504, 'training_time': 9.63, 'dataset': 'Llama3_8B_TREC_5_augmented_data_svm_glove_filtered', 'method': 'Lambada', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with lstm_glove on Llama3_8B_TREC_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Llama3/TREC/lstm_glove/Llama3_8B_TREC_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training lstm_glove...


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/20, Loss: 4.0665, Accuracy: 0.0283


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1379
Precision: 0.1771
Recall: 0.1379
F1 Score: 0.1333
Validation - Accuracy: 0.1379, Precision: 0.1771, Recall: 0.1379, F1 Score: 0.1333
Epoch 2/20, Loss: 3.7680, Accuracy: 0.0813


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2063
Precision: 0.2367
Recall: 0.2063
F1 Score: 0.2133
Validation - Accuracy: 0.2063, Precision: 0.2367, Recall: 0.2063, F1 Score: 0.2133
Epoch 3/20, Loss: 3.3869, Accuracy: 0.1678


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2414
Precision: 0.3331
Recall: 0.2414
F1 Score: 0.2397
Validation - Accuracy: 0.2414, Precision: 0.3331, Recall: 0.2414, F1 Score: 0.2397
Epoch 4/20, Loss: 2.9127, Accuracy: 0.3269


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2984
Precision: 0.4023
Recall: 0.2984
F1 Score: 0.2977
Validation - Accuracy: 0.2984, Precision: 0.4023, Recall: 0.2984, F1 Score: 0.2977
Epoch 5/20, Loss: 2.5792, Accuracy: 0.4081


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3110
Precision: 0.3721
Recall: 0.3110
F1 Score: 0.2926
Validation - Accuracy: 0.3110, Precision: 0.3721, Recall: 0.3110, F1 Score: 0.2926
Epoch 6/20, Loss: 2.1549, Accuracy: 0.5548


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3266
Precision: 0.4881
Recall: 0.3266
F1 Score: 0.3214
Validation - Accuracy: 0.3266, Precision: 0.4881, Recall: 0.3266, F1 Score: 0.3214
Epoch 7/20, Loss: 1.8755, Accuracy: 0.6007


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3868
Precision: 0.4893
Recall: 0.3868
F1 Score: 0.3740
Validation - Accuracy: 0.3868, Precision: 0.4893, Recall: 0.3868, F1 Score: 0.3740
Epoch 8/20, Loss: 1.6714, Accuracy: 0.6590


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3925
Precision: 0.4628
Recall: 0.3925
F1 Score: 0.3673
Validation - Accuracy: 0.3925, Precision: 0.4628, Recall: 0.3925, F1 Score: 0.3673
Epoch 9/20, Loss: 1.4466, Accuracy: 0.7085


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4006
Precision: 0.5005
Recall: 0.4006
F1 Score: 0.3960
Validation - Accuracy: 0.4006, Precision: 0.5005, Recall: 0.4006, F1 Score: 0.3960
Epoch 10/20, Loss: 1.2602, Accuracy: 0.7385


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4069
Precision: 0.5261
Recall: 0.4069
F1 Score: 0.4095
Validation - Accuracy: 0.4069, Precision: 0.5261, Recall: 0.4069, F1 Score: 0.4095
Epoch 11/20, Loss: 1.1494, Accuracy: 0.7703


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3981
Precision: 0.5082
Recall: 0.3981
F1 Score: 0.3863
Validation - Accuracy: 0.3981, Precision: 0.5082, Recall: 0.3981, F1 Score: 0.3863
Epoch 12/20, Loss: 1.0496, Accuracy: 0.8021


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4113
Precision: 0.5505
Recall: 0.4113
F1 Score: 0.4072
Validation - Accuracy: 0.4113, Precision: 0.5505, Recall: 0.4113, F1 Score: 0.4072
Epoch 13/20, Loss: 0.9530, Accuracy: 0.8039


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4395
Precision: 0.5604
Recall: 0.4395
F1 Score: 0.4430
Validation - Accuracy: 0.4395, Precision: 0.5604, Recall: 0.4395, F1 Score: 0.4430
Epoch 14/20, Loss: 0.8463, Accuracy: 0.8481


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4263
Precision: 0.5580
Recall: 0.4263
F1 Score: 0.4273
Validation - Accuracy: 0.4263, Precision: 0.5580, Recall: 0.4263, F1 Score: 0.4273
Epoch 15/20, Loss: 0.7274, Accuracy: 0.8498


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4420
Precision: 0.5782
Recall: 0.4420
F1 Score: 0.4407
Validation - Accuracy: 0.4420, Precision: 0.5782, Recall: 0.4420, F1 Score: 0.4407
Epoch 16/20, Loss: 0.6496, Accuracy: 0.8852


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4451
Precision: 0.5790
Recall: 0.4451
F1 Score: 0.4485
Validation - Accuracy: 0.4451, Precision: 0.5790, Recall: 0.4451, F1 Score: 0.4485
Epoch 17/20, Loss: 0.6317, Accuracy: 0.8799


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4671
Precision: 0.5907
Recall: 0.4671
F1 Score: 0.4742
Validation - Accuracy: 0.4671, Precision: 0.5907, Recall: 0.4671, F1 Score: 0.4742
Epoch 18/20, Loss: 0.5726, Accuracy: 0.9064


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4564
Precision: 0.5989
Recall: 0.4564
F1 Score: 0.4675
Validation - Accuracy: 0.4564, Precision: 0.5989, Recall: 0.4564, F1 Score: 0.4675
Epoch 19/20, Loss: 0.5539, Accuracy: 0.9028


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4364
Precision: 0.5992
Recall: 0.4364
F1 Score: 0.4459
Validation - Accuracy: 0.4364, Precision: 0.5992, Recall: 0.4364, F1 Score: 0.4459
Epoch 20/20, Loss: 0.4900, Accuracy: 0.9064


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4483
Precision: 0.5896
Recall: 0.4483
F1 Score: 0.4540
Validation - Accuracy: 0.4483, Precision: 0.5896, Recall: 0.4483, F1 Score: 0.4540


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4806
Precision: 0.6227
Recall: 0.4806
F1 Score: 0.4898
Model and embeddings saved.
Finished training TREC with lstm_glove on Llama3_8B_TREC_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.4806, 'precision': 0.6227, 'recall': 0.4806, 'f1_score': 0.4898, 'training_time': 44.66, 'dataset': 'Llama3_8B_TREC_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with bert_ktrain on Llama3_8B_TREC_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Llama3/TREC/bert_ktrain/Llama3_8B_TREC_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 12
	95percentile : 21
	99percen

Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 10
	95percentile : 17
	99percentile : 21




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 00007: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 8/20
Epoch 9/20
Epoch 00009: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Restoring model weights from the end of the best epoch: 5.
Epoch 9: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.57%
Precision: 0.73
Recall: 0.57
F1 Score: 0.57
Model saved to final_results/models/Llama3_8B_TREC_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training TREC with bert_ktrain on Llama3_8B_TREC_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.567, 'precision': 0.7301, 'recall': 0.567, 'f1_score': 0.5666, 'training_time': 56.85, 'dataset': 'Llama3_8B_TREC_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with svm_tfidf on Mistral_7B_TREC_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Mistral/TREC/svm_tfidf/Mistral_7B_TREC_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training svm_tfidf...

Validation Results:
Accuracy: 0.4063


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4254
Precision: 0.5984
Recall: 0.4254
F1 Score: 0.4250
Model saved.
Finished training TREC with svm_tfidf on Mistral_7B_TREC_5_augmented_data_svm_tfidf_filtered
{'classifier': 'svm_tfidf', 'test_accuracy': 0.4254, 'precision': 0.5984, 'recall': 0.4254, 'f1_score': 0.425, 'training_time': 0.97, 'dataset': 'Mistral_7B_TREC_5_augmented_data_svm_tfidf_filtered', 'method': 'Lambada', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training TREC with svm_glove on Mistral_7B_TREC_5_augmented_data_svm_glove_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Mistral/TREC/svm_glove/Mistral_7B_TREC_5_augmented_data_svm_glove_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training svm_glove...

Validation Results:
Accuracy: 0.3185
Precision: 0.4654
Recall: 0.3185
F1 Score: 0.3344
Accuracy: 0.3315
Precision: 0.4893
R

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/20, Loss: 4.0953, Accuracy: 0.0387


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1210
Precision: 0.1311
Recall: 0.1210
F1 Score: 0.1040
Validation - Accuracy: 0.1210, Precision: 0.1311, Recall: 0.1210, F1 Score: 0.1040
Epoch 2/20, Loss: 3.7158, Accuracy: 0.1160


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1455
Precision: 0.1596
Recall: 0.1455
F1 Score: 0.1226
Validation - Accuracy: 0.1455, Precision: 0.1596, Recall: 0.1455, F1 Score: 0.1226
Epoch 3/20, Loss: 3.3630, Accuracy: 0.1849


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1975
Precision: 0.1985
Recall: 0.1975
F1 Score: 0.1686
Validation - Accuracy: 0.1975, Precision: 0.1985, Recall: 0.1975, F1 Score: 0.1686
Epoch 4/20, Loss: 2.7961, Accuracy: 0.3529


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2232
Precision: 0.2609
Recall: 0.2232
F1 Score: 0.1997
Validation - Accuracy: 0.2232, Precision: 0.2609, Recall: 0.2232, F1 Score: 0.1997
Epoch 5/20, Loss: 2.3616, Accuracy: 0.4622


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2533
Precision: 0.2822
Recall: 0.2533
F1 Score: 0.2286
Validation - Accuracy: 0.2533, Precision: 0.2822, Recall: 0.2533, F1 Score: 0.2286
Epoch 6/20, Loss: 2.0327, Accuracy: 0.5731


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2821
Precision: 0.3451
Recall: 0.2821
F1 Score: 0.2483
Validation - Accuracy: 0.2821, Precision: 0.3451, Recall: 0.2821, F1 Score: 0.2483
Epoch 7/20, Loss: 1.7565, Accuracy: 0.6387


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3091
Precision: 0.3698
Recall: 0.3091
F1 Score: 0.2914
Validation - Accuracy: 0.3091, Precision: 0.3698, Recall: 0.3091, F1 Score: 0.2914
Epoch 8/20, Loss: 1.5476, Accuracy: 0.6655


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3179
Precision: 0.4087
Recall: 0.3179
F1 Score: 0.3104
Validation - Accuracy: 0.3179, Precision: 0.4087, Recall: 0.3179, F1 Score: 0.3104
Epoch 9/20, Loss: 1.4232, Accuracy: 0.6840


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3298
Precision: 0.4476
Recall: 0.3298
F1 Score: 0.3275
Validation - Accuracy: 0.3298, Precision: 0.4476, Recall: 0.3298, F1 Score: 0.3275
Epoch 10/20, Loss: 1.2030, Accuracy: 0.7462


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3335
Precision: 0.4146
Recall: 0.3335
F1 Score: 0.3206
Validation - Accuracy: 0.3335, Precision: 0.4146, Recall: 0.3335, F1 Score: 0.3206
Epoch 11/20, Loss: 1.1069, Accuracy: 0.7479


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3423
Precision: 0.4520
Recall: 0.3423
F1 Score: 0.3388
Validation - Accuracy: 0.3423, Precision: 0.4520, Recall: 0.3423, F1 Score: 0.3388
Epoch 12/20, Loss: 0.9902, Accuracy: 0.7983


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3574
Precision: 0.4494
Recall: 0.3574
F1 Score: 0.3433
Validation - Accuracy: 0.3574, Precision: 0.4494, Recall: 0.3574, F1 Score: 0.3433
Epoch 13/20, Loss: 0.8624, Accuracy: 0.8168


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3492
Precision: 0.4390
Recall: 0.3492
F1 Score: 0.3404
Validation - Accuracy: 0.3492, Precision: 0.4390, Recall: 0.3492, F1 Score: 0.3404
Epoch 14/20, Loss: 0.7702, Accuracy: 0.8588


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3674
Precision: 0.4828
Recall: 0.3674
F1 Score: 0.3622
Validation - Accuracy: 0.3674, Precision: 0.4828, Recall: 0.3674, F1 Score: 0.3622
Epoch 15/20, Loss: 0.6871, Accuracy: 0.8622


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3962
Precision: 0.4783
Recall: 0.3962
F1 Score: 0.3943
Validation - Accuracy: 0.3962, Precision: 0.4783, Recall: 0.3962, F1 Score: 0.3943
Epoch 16/20, Loss: 0.6492, Accuracy: 0.8739


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3755
Precision: 0.4912
Recall: 0.3755
F1 Score: 0.3820
Validation - Accuracy: 0.3755, Precision: 0.4912, Recall: 0.3755, F1 Score: 0.3820
Epoch 17/20, Loss: 0.5904, Accuracy: 0.8840


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4044
Precision: 0.4809
Recall: 0.4044
F1 Score: 0.4059
Validation - Accuracy: 0.4044, Precision: 0.4809, Recall: 0.4044, F1 Score: 0.4059
Epoch 18/20, Loss: 0.5094, Accuracy: 0.9109


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4125
Precision: 0.4925
Recall: 0.4125
F1 Score: 0.4113
Validation - Accuracy: 0.4125, Precision: 0.4925, Recall: 0.4125, F1 Score: 0.4113
Epoch 19/20, Loss: 0.4549, Accuracy: 0.9126


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3969
Precision: 0.4917
Recall: 0.3969
F1 Score: 0.3960
Validation - Accuracy: 0.3969, Precision: 0.4917, Recall: 0.3969, F1 Score: 0.3960
Epoch 20/20, Loss: 0.4139, Accuracy: 0.9361


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4157
Precision: 0.5057
Recall: 0.4157
F1 Score: 0.4256
Validation - Accuracy: 0.4157, Precision: 0.5057, Recall: 0.4157, F1 Score: 0.4256


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4511
Precision: 0.5539
Recall: 0.4511
F1 Score: 0.4623
Model and embeddings saved.
Finished training TREC with lstm_glove on Mistral_7B_TREC_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.4511, 'precision': 0.5539, 'recall': 0.4511, 'f1_score': 0.4623, 'training_time': 44.87, 'dataset': 'Mistral_7B_TREC_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training TREC with bert_ktrain on Mistral_7B_TREC_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/Mistral/TREC/bert_ktrain/Mistral_7B_TREC_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training bert_ktrain...
preprocessing train...
language: en
train sequence lengths:
	mean : 10
	95percentile : 15
	99

Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 10
	95percentile : 17
	99percentile : 21




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 00010: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 11/20
Epoch 12/20
Epoch 00012: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Restoring model weights from the end of the best epoch: 8.
Epoch 12: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.56%
Precision: 0.68
Recall: 0.56
F1 Score: 0.55
Model saved to final_results/models/Mistral_7B_TREC_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training TREC with bert_ktrain on Mistral_7B_TREC_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.5645, 'precision': 0.6779, 'recall': 0.5645, 'f1_score': 0.5524, 'training_time': 68.38, 'dataset': 'Mistral_7B_TREC_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training TREC with svm_tfidf on GPT2_TREC_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/GPT2/TREC/svm_tfidf/GPT2_TREC_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training svm_tfidf...

Validation Results:
Accuracy: 0.4251
Precision

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3327
Precision: 0.4808
Recall: 0.3327
F1 Score: 0.3509
Model saved.
Finished training TREC with svm_glove on GPT2_TREC_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.3327, 'precision': 0.4808, 'recall': 0.3327, 'f1_score': 0.3509, 'training_time': 9.36, 'dataset': 'GPT2_TREC_5_augmented_data_svm_glove_filtered', 'method': 'Lambada', 'model': 'GPT2', 'subset_size': 5}
--------------------------------------------------
Training TREC with lstm_glove on GPT2_TREC_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/GPT2/TREC/lstm_glove/GPT2_TREC_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training lstm_glove...


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/20, Loss: 4.1030, Accuracy: 0.0220


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0470
Precision: 0.0530
Recall: 0.0470
F1 Score: 0.0324
Validation - Accuracy: 0.0470, Precision: 0.0530, Recall: 0.0470, F1 Score: 0.0324
Epoch 2/20, Loss: 3.8754, Accuracy: 0.0440


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1041
Precision: 0.2399
Recall: 0.1041
F1 Score: 0.1014
Validation - Accuracy: 0.1041, Precision: 0.2399, Recall: 0.1041, F1 Score: 0.1014
Epoch 3/20, Loss: 3.6948, Accuracy: 0.0857


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1285
Precision: 0.3048
Recall: 0.1285
F1 Score: 0.1180
Validation - Accuracy: 0.1285, Precision: 0.3048, Recall: 0.1285, F1 Score: 0.1180
Epoch 4/20, Loss: 3.4052, Accuracy: 0.1868


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1455
Precision: 0.3246
Recall: 0.1455
F1 Score: 0.1434
Validation - Accuracy: 0.1455, Precision: 0.3246, Recall: 0.1455, F1 Score: 0.1434
Epoch 5/20, Loss: 3.1086, Accuracy: 0.2879


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1887
Precision: 0.2546
Recall: 0.1887
F1 Score: 0.1697
Validation - Accuracy: 0.1887, Precision: 0.2546, Recall: 0.1887, F1 Score: 0.1697
Epoch 6/20, Loss: 2.7347, Accuracy: 0.3538


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2602
Precision: 0.3180
Recall: 0.2602
F1 Score: 0.2365
Validation - Accuracy: 0.2602, Precision: 0.3180, Recall: 0.2602, F1 Score: 0.2365
Epoch 7/20, Loss: 2.4402, Accuracy: 0.4044


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2583
Precision: 0.3482
Recall: 0.2583
F1 Score: 0.2451
Validation - Accuracy: 0.2583, Precision: 0.3482, Recall: 0.2583, F1 Score: 0.2451
Epoch 8/20, Loss: 2.2326, Accuracy: 0.5055


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2777
Precision: 0.3829
Recall: 0.2777
F1 Score: 0.2575
Validation - Accuracy: 0.2777, Precision: 0.3829, Recall: 0.2777, F1 Score: 0.2575
Epoch 9/20, Loss: 2.0632, Accuracy: 0.5341


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3110
Precision: 0.4029
Recall: 0.3110
F1 Score: 0.2893
Validation - Accuracy: 0.3110, Precision: 0.4029, Recall: 0.3110, F1 Score: 0.2893
Epoch 10/20, Loss: 1.8911, Accuracy: 0.5604


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3310
Precision: 0.4028
Recall: 0.3310
F1 Score: 0.3106
Validation - Accuracy: 0.3310, Precision: 0.4028, Recall: 0.3310, F1 Score: 0.3106
Epoch 11/20, Loss: 1.6289, Accuracy: 0.6352


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3455
Precision: 0.3942
Recall: 0.3455
F1 Score: 0.3280
Validation - Accuracy: 0.3455, Precision: 0.3942, Recall: 0.3455, F1 Score: 0.3280
Epoch 12/20, Loss: 1.5604, Accuracy: 0.6725


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3818
Precision: 0.4268
Recall: 0.3818
F1 Score: 0.3625
Validation - Accuracy: 0.3818, Precision: 0.4268, Recall: 0.3818, F1 Score: 0.3625
Epoch 13/20, Loss: 1.4271, Accuracy: 0.6769


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3693
Precision: 0.4668
Recall: 0.3693
F1 Score: 0.3543
Validation - Accuracy: 0.3693, Precision: 0.4668, Recall: 0.3693, F1 Score: 0.3543
Epoch 14/20, Loss: 1.2692, Accuracy: 0.7363


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3818
Precision: 0.4360
Recall: 0.3818
F1 Score: 0.3688
Validation - Accuracy: 0.3818, Precision: 0.4360, Recall: 0.3818, F1 Score: 0.3688
Epoch 15/20, Loss: 1.2370, Accuracy: 0.7451


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3868
Precision: 0.4625
Recall: 0.3868
F1 Score: 0.3712
Validation - Accuracy: 0.3868, Precision: 0.4625, Recall: 0.3868, F1 Score: 0.3712
Epoch 16/20, Loss: 1.1169, Accuracy: 0.7516


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3824
Precision: 0.4433
Recall: 0.3824
F1 Score: 0.3763
Validation - Accuracy: 0.3824, Precision: 0.4433, Recall: 0.3824, F1 Score: 0.3763
Epoch 17/20, Loss: 1.0070, Accuracy: 0.7956


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3875
Precision: 0.4477
Recall: 0.3875
F1 Score: 0.3833
Validation - Accuracy: 0.3875, Precision: 0.4477, Recall: 0.3875, F1 Score: 0.3833
Epoch 18/20, Loss: 0.9602, Accuracy: 0.7934


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4188
Precision: 0.4783
Recall: 0.4188
F1 Score: 0.4083
Validation - Accuracy: 0.4188, Precision: 0.4783, Recall: 0.4188, F1 Score: 0.4083
Epoch 19/20, Loss: 0.8823, Accuracy: 0.8220


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4251
Precision: 0.5015
Recall: 0.4251
F1 Score: 0.4183
Validation - Accuracy: 0.4251, Precision: 0.5015, Recall: 0.4251, F1 Score: 0.4183
Epoch 20/20, Loss: 0.7565, Accuracy: 0.8725


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4063
Precision: 0.4954
Recall: 0.4063
F1 Score: 0.4040
Validation - Accuracy: 0.4063, Precision: 0.4954, Recall: 0.4063, F1 Score: 0.4040
Accuracy: 0.4110
Precision: 0.5350
Recall: 0.4110
F1 Score: 0.4217
Model and embeddings saved.
Finished training TREC with lstm_glove on GPT2_TREC_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.411, 'precision': 0.535, 'recall': 0.411, 'f1_score': 0.4217, 'training_time': 43.83, 'dataset': 'GPT2_TREC_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada', 'model': 'GPT2', 'subset_size': 5}
--------------------------------------------------
Training TREC with bert_ktrain on GPT2_TREC_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada/GPT2/TREC/bert_ktrain/GPT2_TREC_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 10
	95percentile : 17
	99percentile : 21




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 00009: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 00013: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Epoch 14/20
Epoch 15/20
Epoch 00015: Reducing Max LR on Plateau: new max lr will be 6.25e-06 (if not early_stopping).
Restoring model weights from the end of the best epoch: 11.
Epoch 15: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.46%
Precision: 0.57
Recall: 0.46
F1 Score: 0.44
Model saved to final_results/models/GPT2_TREC_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training TREC with bert_ktrain on GPT2_TREC_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.4561, 'precision': 0.5651, 'recall': 0.4561, 'f1_score': 0.4403, 'training_time': 72.65, 'dataset': 'GPT2_TREC_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada', 'model': 'GPT2', 'subset_size': 5}
--------------------------------------------------
Training TREC with svm_tfidf on Llama3_8B_lambada_plus_TREC_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Llama3/TREC/svm_tfidf/Llama3_8B_lambada_plus_TREC_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training svm_tfidf...

Validation Results:
Accuracy

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4712
Precision: 0.6564
Recall: 0.4712
F1 Score: 0.4816
Model saved.
Finished training TREC with svm_tfidf on Llama3_8B_lambada_plus_TREC_5_augmented_data_svm_tfidf_filtered
{'classifier': 'svm_tfidf', 'test_accuracy': 0.4712, 'precision': 0.6564, 'recall': 0.4712, 'f1_score': 0.4816, 'training_time': 1.06, 'dataset': 'Llama3_8B_lambada_plus_TREC_5_augmented_data_svm_tfidf_filtered', 'method': 'Lambada+', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with svm_glove on Llama3_8B_lambada_plus_TREC_5_augmented_data_svm_glove_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Llama3/TREC/svm_glove/Llama3_8B_lambada_plus_TREC_5_augmented_data_svm_glove_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training svm_glove...

Validation Results:
Accuracy: 0.3191
Precision: 0.4881
Recall: 0.3191
F1 S

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3233
Precision: 0.5032
Recall: 0.3233
F1 Score: 0.3388
Model saved.
Finished training TREC with svm_glove on Llama3_8B_lambada_plus_TREC_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.3233, 'precision': 0.5032, 'recall': 0.3233, 'f1_score': 0.3388, 'training_time': 9.51, 'dataset': 'Llama3_8B_lambada_plus_TREC_5_augmented_data_svm_glove_filtered', 'method': 'Lambada+', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with lstm_glove on Llama3_8B_lambada_plus_TREC_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Llama3/TREC/lstm_glove/Llama3_8B_lambada_plus_TREC_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training lstm_glove...


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/20, Loss: 4.0113, Accuracy: 0.0225


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1191
Precision: 0.2717
Recall: 0.1191
F1 Score: 0.1266
Validation - Accuracy: 0.1191, Precision: 0.2717, Recall: 0.1191, F1 Score: 0.1266
Epoch 2/20, Loss: 3.6630, Accuracy: 0.0763


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1799
Precision: 0.2065
Recall: 0.1799
F1 Score: 0.1541
Validation - Accuracy: 0.1799, Precision: 0.2065, Recall: 0.1799, F1 Score: 0.1541
Epoch 3/20, Loss: 3.3490, Accuracy: 0.1854


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2044
Precision: 0.3562
Recall: 0.2044
F1 Score: 0.1946
Validation - Accuracy: 0.2044, Precision: 0.3562, Recall: 0.2044, F1 Score: 0.1946
Epoch 4/20, Loss: 2.9390, Accuracy: 0.3016


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2364
Precision: 0.3514
Recall: 0.2364
F1 Score: 0.2351
Validation - Accuracy: 0.2364, Precision: 0.3514, Recall: 0.2364, F1 Score: 0.2351
Epoch 5/20, Loss: 2.6654, Accuracy: 0.3761


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3009
Precision: 0.3889
Recall: 0.3009
F1 Score: 0.2841
Validation - Accuracy: 0.3009, Precision: 0.3889, Recall: 0.3009, F1 Score: 0.2841
Epoch 6/20, Loss: 2.2129, Accuracy: 0.4697


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3335
Precision: 0.4310
Recall: 0.3335
F1 Score: 0.3407
Validation - Accuracy: 0.3335, Precision: 0.4310, Recall: 0.3335, F1 Score: 0.3407
Epoch 7/20, Loss: 2.0387, Accuracy: 0.5251


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3699
Precision: 0.4528
Recall: 0.3699
F1 Score: 0.3630
Validation - Accuracy: 0.3699, Precision: 0.4528, Recall: 0.3699, F1 Score: 0.3630
Epoch 8/20, Loss: 1.9262, Accuracy: 0.6031


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3386
Precision: 0.5201
Recall: 0.3386
F1 Score: 0.3336
Validation - Accuracy: 0.3386, Precision: 0.5201, Recall: 0.3386, F1 Score: 0.3336
Epoch 9/20, Loss: 1.6748, Accuracy: 0.6101


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3799
Precision: 0.5599
Recall: 0.3799
F1 Score: 0.3856
Validation - Accuracy: 0.3799, Precision: 0.5599, Recall: 0.3799, F1 Score: 0.3856
Epoch 10/20, Loss: 1.5001, Accuracy: 0.6724


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3755
Precision: 0.5498
Recall: 0.3755
F1 Score: 0.3733
Validation - Accuracy: 0.3755, Precision: 0.5498, Recall: 0.3755, F1 Score: 0.3733
Epoch 11/20, Loss: 1.3078, Accuracy: 0.7054


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3975
Precision: 0.5179
Recall: 0.3975
F1 Score: 0.3983
Validation - Accuracy: 0.3975, Precision: 0.5179, Recall: 0.3975, F1 Score: 0.3983
Epoch 12/20, Loss: 1.1408, Accuracy: 0.7470


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4031
Precision: 0.6193
Recall: 0.4031
F1 Score: 0.4072
Validation - Accuracy: 0.4031, Precision: 0.6193, Recall: 0.4031, F1 Score: 0.4072
Epoch 13/20, Loss: 1.0589, Accuracy: 0.7782


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4063
Precision: 0.5320
Recall: 0.4063
F1 Score: 0.4091
Validation - Accuracy: 0.4063, Precision: 0.5320, Recall: 0.4063, F1 Score: 0.4091
Epoch 14/20, Loss: 1.0031, Accuracy: 0.7834


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3969
Precision: 0.5359
Recall: 0.3969
F1 Score: 0.4109
Validation - Accuracy: 0.3969, Precision: 0.5359, Recall: 0.3969, F1 Score: 0.4109
Epoch 15/20, Loss: 0.9051, Accuracy: 0.8059


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4207
Precision: 0.5514
Recall: 0.4207
F1 Score: 0.4230
Validation - Accuracy: 0.4207, Precision: 0.5514, Recall: 0.4207, F1 Score: 0.4230
Epoch 16/20, Loss: 0.9008, Accuracy: 0.8076


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4044
Precision: 0.5391
Recall: 0.4044
F1 Score: 0.4084
Validation - Accuracy: 0.4044, Precision: 0.5391, Recall: 0.4044, F1 Score: 0.4084
Epoch 17/20, Loss: 0.8702, Accuracy: 0.7972


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4125
Precision: 0.5083
Recall: 0.4125
F1 Score: 0.4153
Validation - Accuracy: 0.4125, Precision: 0.5083, Recall: 0.4125, F1 Score: 0.4153
Epoch 18/20, Loss: 0.6594, Accuracy: 0.8596


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4307
Precision: 0.5789
Recall: 0.4307
F1 Score: 0.4372
Validation - Accuracy: 0.4307, Precision: 0.5789, Recall: 0.4307, F1 Score: 0.4372
Epoch 19/20, Loss: 0.6029, Accuracy: 0.8787


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4307
Precision: 0.5625
Recall: 0.4307
F1 Score: 0.4296
Validation - Accuracy: 0.4307, Precision: 0.5625, Recall: 0.4307, F1 Score: 0.4296
Epoch 20/20, Loss: 0.5568, Accuracy: 0.8943


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4238
Precision: 0.5771
Recall: 0.4238
F1 Score: 0.4286
Validation - Accuracy: 0.4238, Precision: 0.5771, Recall: 0.4238, F1 Score: 0.4286


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4593
Precision: 0.6618
Recall: 0.4593
F1 Score: 0.4704
Model and embeddings saved.
Finished training TREC with lstm_glove on Llama3_8B_lambada_plus_TREC_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.4593, 'precision': 0.6618, 'recall': 0.4593, 'f1_score': 0.4704, 'training_time': 45.06, 'dataset': 'Llama3_8B_lambada_plus_TREC_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada+', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with bert_ktrain on Llama3_8B_lambada_plus_TREC_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Llama3/TREC/bert_ktrain/Llama3_8B_lambada_plus_TREC_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training bert_ktrain...
preprocessing train...
language: en
train seq

Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 10
	95percentile : 17
	99percentile : 21




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 00008: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 9/20
Epoch 10/20
Epoch 00010: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Restoring model weights from the end of the best epoch: 6.
Epoch 10: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.60%
Precision: 0.73
Recall: 0.60
F1 Score: 0.59
Model saved to final_results/models/Llama3_8B_lambada_plus_TREC_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training TREC with bert_ktrain on Llama3_8B_lambada_plus_TREC_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.5971, 'precision': 0.7345, 'recall': 0.5971, 'f1_score': 0.5879, 'training_time': 61.29, 'dataset': 'Llama3_8B_lambada_plus_TREC_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada+', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with svm_tfidf on Mistral_7B_lambada_plus_TREC_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Mistral/TREC/svm_tfidf/Mistral_7B_lambada_plus_TREC_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4868
Precision: 0.6722
Recall: 0.4868
F1 Score: 0.4916
Model saved.
Finished training TREC with svm_tfidf on Mistral_7B_lambada_plus_TREC_5_augmented_data_svm_tfidf_filtered
{'classifier': 'svm_tfidf', 'test_accuracy': 0.4868, 'precision': 0.6722, 'recall': 0.4868, 'f1_score': 0.4916, 'training_time': 1.05, 'dataset': 'Mistral_7B_lambada_plus_TREC_5_augmented_data_svm_tfidf_filtered', 'method': 'Lambada+', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training TREC with svm_glove on Mistral_7B_lambada_plus_TREC_5_augmented_data_svm_glove_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Mistral/TREC/svm_glove/Mistral_7B_lambada_plus_TREC_5_augmented_data_svm_glove_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training svm_glove...

Validation Results:
Accuracy: 0.3116
Precision: 0.4745
Recall: 0.311

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3202
Precision: 0.4942
Recall: 0.3202
F1 Score: 0.3359
Model saved.
Finished training TREC with svm_glove on Mistral_7B_lambada_plus_TREC_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.3202, 'precision': 0.4942, 'recall': 0.3202, 'f1_score': 0.3359, 'training_time': 9.37, 'dataset': 'Mistral_7B_lambada_plus_TREC_5_augmented_data_svm_glove_filtered', 'method': 'Lambada+', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training TREC with lstm_glove on Mistral_7B_lambada_plus_TREC_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Mistral/TREC/lstm_glove/Mistral_7B_lambada_plus_TREC_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training lstm_glove...


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/20, Loss: 4.1181, Accuracy: 0.0256


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1091
Precision: 0.2100
Recall: 0.1091
F1 Score: 0.1272
Validation - Accuracy: 0.1091, Precision: 0.2100, Recall: 0.1091, F1 Score: 0.1272
Epoch 2/20, Loss: 3.7948, Accuracy: 0.0558


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2069
Precision: 0.3099
Recall: 0.2069
F1 Score: 0.2024
Validation - Accuracy: 0.2069, Precision: 0.3099, Recall: 0.2069, F1 Score: 0.2024
Epoch 3/20, Loss: 3.2916, Accuracy: 0.1976


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2056
Precision: 0.2953
Recall: 0.2056
F1 Score: 0.1895
Validation - Accuracy: 0.2056, Precision: 0.2953, Recall: 0.2056, F1 Score: 0.1895
Epoch 4/20, Loss: 2.7049, Accuracy: 0.3861


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2909
Precision: 0.4412
Recall: 0.2909
F1 Score: 0.2968
Validation - Accuracy: 0.2909, Precision: 0.4412, Recall: 0.2909, F1 Score: 0.2968
Epoch 5/20, Loss: 2.2541, Accuracy: 0.5038


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3204
Precision: 0.4426
Recall: 0.3204
F1 Score: 0.3316
Validation - Accuracy: 0.3204, Precision: 0.4426, Recall: 0.3204, F1 Score: 0.3316
Epoch 6/20, Loss: 1.8900, Accuracy: 0.6094


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3561
Precision: 0.4614
Recall: 0.3561
F1 Score: 0.3681
Validation - Accuracy: 0.3561, Precision: 0.4614, Recall: 0.3561, F1 Score: 0.3681
Epoch 7/20, Loss: 1.6356, Accuracy: 0.6697


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3586
Precision: 0.4803
Recall: 0.3586
F1 Score: 0.3602
Validation - Accuracy: 0.3586, Precision: 0.4803, Recall: 0.3586, F1 Score: 0.3602
Epoch 8/20, Loss: 1.4013, Accuracy: 0.7225


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3850
Precision: 0.4899
Recall: 0.3850
F1 Score: 0.3929
Validation - Accuracy: 0.3850, Precision: 0.4899, Recall: 0.3850, F1 Score: 0.3929
Epoch 9/20, Loss: 1.2133, Accuracy: 0.7511


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4226
Precision: 0.5183
Recall: 0.4226
F1 Score: 0.4299
Validation - Accuracy: 0.4226, Precision: 0.5183, Recall: 0.4226, F1 Score: 0.4299
Epoch 10/20, Loss: 1.0479, Accuracy: 0.7919


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4119
Precision: 0.5575
Recall: 0.4119
F1 Score: 0.4204
Validation - Accuracy: 0.4119, Precision: 0.5575, Recall: 0.4119, F1 Score: 0.4204
Epoch 11/20, Loss: 0.8914, Accuracy: 0.8431


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4263
Precision: 0.5435
Recall: 0.4263
F1 Score: 0.4251
Validation - Accuracy: 0.4263, Precision: 0.5435, Recall: 0.4263, F1 Score: 0.4251
Epoch 12/20, Loss: 0.8152, Accuracy: 0.8597


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4307
Precision: 0.5746
Recall: 0.4307
F1 Score: 0.4326
Validation - Accuracy: 0.4307, Precision: 0.5746, Recall: 0.4307, F1 Score: 0.4326
Epoch 13/20, Loss: 0.7489, Accuracy: 0.8718


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4470
Precision: 0.5589
Recall: 0.4470
F1 Score: 0.4473
Validation - Accuracy: 0.4470, Precision: 0.5589, Recall: 0.4470, F1 Score: 0.4473
Epoch 14/20, Loss: 0.6403, Accuracy: 0.8884


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4589
Precision: 0.5700
Recall: 0.4589
F1 Score: 0.4629
Validation - Accuracy: 0.4589, Precision: 0.5700, Recall: 0.4589, F1 Score: 0.4629
Epoch 15/20, Loss: 0.5893, Accuracy: 0.8974


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4621
Precision: 0.5717
Recall: 0.4621
F1 Score: 0.4606
Validation - Accuracy: 0.4621, Precision: 0.5717, Recall: 0.4621, F1 Score: 0.4606
Epoch 16/20, Loss: 0.5027, Accuracy: 0.9306


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4652
Precision: 0.5696
Recall: 0.4652
F1 Score: 0.4704
Validation - Accuracy: 0.4652, Precision: 0.5696, Recall: 0.4652, F1 Score: 0.4704
Epoch 17/20, Loss: 0.4370, Accuracy: 0.9457


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4577
Precision: 0.5809
Recall: 0.4577
F1 Score: 0.4607
Validation - Accuracy: 0.4577, Precision: 0.5809, Recall: 0.4577, F1 Score: 0.4607
Epoch 18/20, Loss: 0.4235, Accuracy: 0.9427


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4715
Precision: 0.5774
Recall: 0.4715
F1 Score: 0.4744
Validation - Accuracy: 0.4715, Precision: 0.5774, Recall: 0.4715, F1 Score: 0.4744
Epoch 19/20, Loss: 0.3904, Accuracy: 0.9397


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4740
Precision: 0.5887
Recall: 0.4740
F1 Score: 0.4829
Validation - Accuracy: 0.4740, Precision: 0.5887, Recall: 0.4740, F1 Score: 0.4829
Epoch 20/20, Loss: 0.3128, Accuracy: 0.9638


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4834
Precision: 0.5915
Recall: 0.4834
F1 Score: 0.4916
Validation - Accuracy: 0.4834, Precision: 0.5915, Recall: 0.4834, F1 Score: 0.4916


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5063
Precision: 0.6283
Recall: 0.5063
F1 Score: 0.5167
Model and embeddings saved.
Finished training TREC with lstm_glove on Mistral_7B_lambada_plus_TREC_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.5063, 'precision': 0.6283, 'recall': 0.5063, 'f1_score': 0.5167, 'training_time': 44.47, 'dataset': 'Mistral_7B_lambada_plus_TREC_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada+', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training TREC with bert_ktrain on Mistral_7B_lambada_plus_TREC_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+/Mistral/TREC/bert_ktrain/Mistral_7B_lambada_plus_TREC_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training bert_ktrain...
preprocessing train...
language: en
tra

Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 10
	95percentile : 17
	99percentile : 21




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 00006: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 7/20
Epoch 8/20
Epoch 00008: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Restoring model weights from the end of the best epoch: 4.
Epoch 8: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.61%
Precision: 0.72
Recall: 0.61
F1 Score: 0.60
Model saved to final_results/models/Mistral_7B_lambada_plus_TREC_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training TREC with bert_ktrain on Mistral_7B_lambada_plus_TREC_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.6096, 'precision': 0.7239, 'recall': 0.6096, 'f1_score': 0.6, 'training_time': 53.18, 'dataset': 'Mistral_7B_lambada_plus_TREC_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada+', 'model': 'Mistral', 'subset_size': 5}
--------------------------------------------------
Training TREC with svm_tfidf on Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_svm_tfidf_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+Instruct/Llama3/TREC/svm_tfidf/Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_svm_tfidf_filtered.csv
	Test path: NLPProject20

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5125
Precision: 0.6824
Recall: 0.5125
F1 Score: 0.5362
Model saved.
Finished training TREC with svm_tfidf on Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_svm_tfidf_filtered
{'classifier': 'svm_tfidf', 'test_accuracy': 0.5125, 'precision': 0.6824, 'recall': 0.5125, 'f1_score': 0.5362, 'training_time': 1.07, 'dataset': 'Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_svm_tfidf_filtered', 'method': 'Lambada+Instruct', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with svm_glove on Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_svm_glove_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+Instruct/Llama3/TREC/svm_glove/Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_svm_glove_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training svm_glove...

Validation Results:
Accura

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3365
Precision: 0.4963
Recall: 0.3365
F1 Score: 0.3538
Model saved.
Finished training TREC with svm_glove on Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_svm_glove_filtered
{'classifier': 'svm_glove', 'test_accuracy': 0.3365, 'precision': 0.4963, 'recall': 0.3365, 'f1_score': 0.3538, 'training_time': 9.34, 'dataset': 'Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_svm_glove_filtered', 'method': 'Lambada+Instruct', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with lstm_glove on Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_lstm_glove_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+Instruct/Llama3/TREC/lstm_glove/Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_lstm_glove_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training lstm_glove...


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/20, Loss: 4.0392, Accuracy: 0.0413


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.0571
Precision: 0.0825
Recall: 0.0571
F1 Score: 0.0315
Validation - Accuracy: 0.0571, Precision: 0.0825, Recall: 0.0571, F1 Score: 0.0315
Epoch 2/20, Loss: 3.7422, Accuracy: 0.0929


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1009
Precision: 0.2091
Recall: 0.1009
F1 Score: 0.0801
Validation - Accuracy: 0.1009, Precision: 0.2091, Recall: 0.1009, F1 Score: 0.0801
Epoch 3/20, Loss: 3.5352, Accuracy: 0.1360


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1661
Precision: 0.2662
Recall: 0.1661
F1 Score: 0.1591
Validation - Accuracy: 0.1661, Precision: 0.2662, Recall: 0.1661, F1 Score: 0.1591
Epoch 4/20, Loss: 3.0512, Accuracy: 0.2616


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.2295
Precision: 0.2731
Recall: 0.2295
F1 Score: 0.2003
Validation - Accuracy: 0.2295, Precision: 0.2731, Recall: 0.2295, F1 Score: 0.2003
Epoch 5/20, Loss: 2.7183, Accuracy: 0.3580


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3317
Precision: 0.4426
Recall: 0.3317
F1 Score: 0.3255
Validation - Accuracy: 0.3317, Precision: 0.4426, Recall: 0.3317, F1 Score: 0.3255
Epoch 6/20, Loss: 2.3083, Accuracy: 0.4664


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3643
Precision: 0.4294
Recall: 0.3643
F1 Score: 0.3576
Validation - Accuracy: 0.3643, Precision: 0.4294, Recall: 0.3643, F1 Score: 0.3576
Epoch 7/20, Loss: 2.0400, Accuracy: 0.5250


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.3787
Precision: 0.5030
Recall: 0.3787
F1 Score: 0.3808
Validation - Accuracy: 0.3787, Precision: 0.5030, Recall: 0.3787, F1 Score: 0.3808
Epoch 8/20, Loss: 1.7811, Accuracy: 0.5886


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4094
Precision: 0.5008
Recall: 0.4094
F1 Score: 0.4022
Validation - Accuracy: 0.4094, Precision: 0.5008, Recall: 0.4094, F1 Score: 0.4022
Epoch 9/20, Loss: 1.6213, Accuracy: 0.6627


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4100
Precision: 0.5500
Recall: 0.4100
F1 Score: 0.4117
Validation - Accuracy: 0.4100, Precision: 0.5500, Recall: 0.4100, F1 Score: 0.4117
Epoch 10/20, Loss: 1.5181, Accuracy: 0.7005


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4169
Precision: 0.5551
Recall: 0.4169
F1 Score: 0.4109
Validation - Accuracy: 0.4169, Precision: 0.5551, Recall: 0.4169, F1 Score: 0.4109
Epoch 11/20, Loss: 1.2862, Accuracy: 0.7435


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4069
Precision: 0.5974
Recall: 0.4069
F1 Score: 0.4098
Validation - Accuracy: 0.4069, Precision: 0.5974, Recall: 0.4069, F1 Score: 0.4098
Epoch 12/20, Loss: 1.1698, Accuracy: 0.7780


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4307
Precision: 0.6185
Recall: 0.4307
F1 Score: 0.4314
Validation - Accuracy: 0.4307, Precision: 0.6185, Recall: 0.4307, F1 Score: 0.4314
Epoch 13/20, Loss: 1.0020, Accuracy: 0.7986


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4169
Precision: 0.5927
Recall: 0.4169
F1 Score: 0.4135
Validation - Accuracy: 0.4169, Precision: 0.5927, Recall: 0.4169, F1 Score: 0.4135
Epoch 14/20, Loss: 0.9890, Accuracy: 0.7900


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4420
Precision: 0.6650
Recall: 0.4420
F1 Score: 0.4415
Validation - Accuracy: 0.4420, Precision: 0.6650, Recall: 0.4420, F1 Score: 0.4415
Epoch 15/20, Loss: 0.8403, Accuracy: 0.8365


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4445
Precision: 0.6498
Recall: 0.4445
F1 Score: 0.4480
Validation - Accuracy: 0.4445, Precision: 0.6498, Recall: 0.4445, F1 Score: 0.4480
Epoch 16/20, Loss: 0.7993, Accuracy: 0.8279


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4326
Precision: 0.6398
Recall: 0.4326
F1 Score: 0.4328
Validation - Accuracy: 0.4326, Precision: 0.6398, Recall: 0.4326, F1 Score: 0.4328
Epoch 17/20, Loss: 0.6843, Accuracy: 0.8589


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4470
Precision: 0.6261
Recall: 0.4470
F1 Score: 0.4530
Validation - Accuracy: 0.4470, Precision: 0.6261, Recall: 0.4470, F1 Score: 0.4530
Epoch 18/20, Loss: 0.6361, Accuracy: 0.8812


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4608
Precision: 0.6159
Recall: 0.4608
F1 Score: 0.4583
Validation - Accuracy: 0.4608, Precision: 0.6159, Recall: 0.4608, F1 Score: 0.4583
Epoch 19/20, Loss: 0.6055, Accuracy: 0.8864


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4433
Precision: 0.6309
Recall: 0.4433
F1 Score: 0.4436
Validation - Accuracy: 0.4433, Precision: 0.6309, Recall: 0.4433, F1 Score: 0.4436
Epoch 20/20, Loss: 0.5358, Accuracy: 0.8916


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4470
Precision: 0.6265
Recall: 0.4470
F1 Score: 0.4515
Validation - Accuracy: 0.4470, Precision: 0.6265, Recall: 0.4470, F1 Score: 0.4515


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4505
Precision: 0.6548
Recall: 0.4505
F1 Score: 0.4594
Model and embeddings saved.
Finished training TREC with lstm_glove on Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_lstm_glove_filtered
{'classifier': 'lstm_glove', 'test_accuracy': 0.4505, 'precision': 0.6548, 'recall': 0.4505, 'f1_score': 0.4594, 'training_time': 44.53, 'dataset': 'Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_lstm_glove_filtered', 'method': 'Lambada+Instruct', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------
Training TREC with bert_ktrain on Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_bert_ktrain_filtered...
	Full train path: NLPProject2024/datasets/TREC/sampled_subsets/ver1/trec_5_subset.csv
	Generated train path: NLPProject2024/filtered_datasets/Lambada+Instruct/Llama3/TREC/bert_ktrain/Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_bert_ktrain_filtered.csv
	Test path: NLPProject2024/datasets/TREC/trec.test.csv
Training bert_ktrain.

Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 10
	95percentile : 17
	99percentile : 21




begin training using triangular learning rate policy with max lr of 5e-05...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 00008: Reducing Max LR on Plateau: new max lr will be 2.5e-05 (if not early_stopping).
Epoch 9/20
Epoch 10/20
Epoch 00010: Reducing Max LR on Plateau: new max lr will be 1.25e-05 (if not early_stopping).
Restoring model weights from the end of the best epoch: 6.
Epoch 10: early stopping
Weights from best epoch have been loaded into model.


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.50%
Precision: 0.66
Recall: 0.50
F1 Score: 0.49
Model saved to final_results/models/Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_bert_ktrain_filtered/bert_ktrain
Finished training TREC with bert_ktrain on Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_bert_ktrain_filtered
{'classifier': 'bert_ktrain', 'test_accuracy': 0.5019, 'precision': 0.6555, 'recall': 0.5019, 'f1_score': 0.4878, 'training_time': 60.43, 'dataset': 'Llama_8B_lambada_plus_instruct_TREC_5_augmented_data_bert_ktrain_filtered', 'method': 'Lambada+Instruct', 'model': 'Llama3', 'subset_size': 5}
--------------------------------------------------


In [11]:
augmented_trec_df_result.sort_values(by=['classifier', 'test_accuracy']).head(40)

Unnamed: 0,dataset,classifier,test_accuracy,model,subset_size,method,precision,recall,f1_score,training_time
11,GPT2_TREC_5_augmented_data_bert_ktrain_filtered,bert_ktrain,0.4561,GPT2,5,Lambada,0.5651,0.4561,0.4403,72.65
23,Llama_8B_lambada_plus_instruct_TREC_5_augmente...,bert_ktrain,0.5019,Llama3,5,Lambada+Instruct,0.6555,0.5019,0.4878,60.43
7,Mistral_7B_TREC_5_augmented_data_bert_ktrain_f...,bert_ktrain,0.5645,Mistral,5,Lambada,0.6779,0.5645,0.5524,68.38
3,Llama3_8B_TREC_5_augmented_data_bert_ktrain_fi...,bert_ktrain,0.567,Llama3,5,Lambada,0.7301,0.567,0.5666,56.85
15,Llama3_8B_lambada_plus_TREC_5_augmented_data_b...,bert_ktrain,0.5971,Llama3,5,Lambada+,0.7345,0.5971,0.5879,61.29
19,Mistral_7B_lambada_plus_TREC_5_augmented_data_...,bert_ktrain,0.6096,Mistral,5,Lambada+,0.7239,0.6096,0.6,53.18
10,GPT2_TREC_5_augmented_data_lstm_glove_filtered,lstm_glove,0.411,GPT2,5,Lambada,0.535,0.411,0.4217,43.83
22,Llama_8B_lambada_plus_instruct_TREC_5_augmente...,lstm_glove,0.4505,Llama3,5,Lambada+Instruct,0.6548,0.4505,0.4594,44.53
6,Mistral_7B_TREC_5_augmented_data_lstm_glove_fi...,lstm_glove,0.4511,Mistral,5,Lambada,0.5539,0.4511,0.4623,44.87
14,Llama3_8B_lambada_plus_TREC_5_augmented_data_l...,lstm_glove,0.4593,Llama3,5,Lambada+,0.6618,0.4593,0.4704,45.06


In [16]:
df_result = pd.concat([full_df_result, subsets_df_result, augmented_atis_df_result, augmented_trec_df_result], ignore_index=True)
df_result.to_csv("final_results/test_results.csv")

In [18]:
df_result.head(50)

Unnamed: 0,dataset,model,test_accuracy,precision,recall,f1_score,classifier,training_time,subset_size,method
0,atis_full,,0.9588,0.9516,0.9588,0.9532,svm_tfidf,6.87,,
1,atis_full,,0.928,0.9121,0.928,0.917,svm_glove,11.51,,
2,atis_full,,0.9811,0.9811,0.9811,0.9805,lstm_glove,60.87,,
3,atis_full,,0.9897,0.9888,0.9897,0.9891,bert_ktrain,262.77,,
4,trec_full,,0.9599,0.9611,0.9599,0.9598,svm_tfidf,81.7,,
5,trec_full,,0.8177,0.8224,0.8177,0.8168,svm_glove,40.77,,
6,trec_full,,0.963,0.9649,0.963,0.9631,lstm_glove,109.84,,
7,trec_full,,0.9831,0.9838,0.9831,0.9831,bert_ktrain,875.9,,
8,atis_5_subset_train,,0.6055,0.8235,0.6055,0.6602,svm_tfidf,0.06,5.0,
9,atis_5_subset_train,,0.5815,0.7998,0.5815,0.6436,svm_glove,8.44,5.0,
