# Experiments of Different Model Arcitectures

This notebook runs all experiments for the final report. It uses the modular code from the `.py` modules to:
1.  Define multiple model configurations.
2.  Train and evaluate each configuration on multiple languages (English, Spanish, German).
3.  Collect and display dataset statistics (vocab size, tag count, etc.).
4.  Collate all performance metrics into summary tables.
5.  Run qualitative error analysis on a saved model.

## 1. Setup and Imports

In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import keras
import os
import pickle
import json
import numpy as np
import tensorflow as tf
from sklearn.metrics import classification_report, f1_score

from data.preprocessor import DataPreprocessor, DataPreprocessorConfig
from models.base_model import ModelConfig
from models.lstm_model import LSTMModel
from trainer.trainer import TrainerConfig, Trainer
from evaluator.evaluator import Evaluator
from inference.predictor import Predictor
from utils import load_data

keras.utils.set_random_seed(50)



## 2. Experiment Configuration

This is the main control panel. We can define all the languages and model architectures we want to test.

In [2]:
LANGUAGES = ["english", "spanish", "german"]

preprocessor_config = DataPreprocessorConfig(
    padding_type="post",
    truncation_type="post",
    remove_long_sentences=True,
    max_sequence_length=100, # As per your config
)

base_training_config = TrainerConfig(
    epochs=20,
    batch_size=64,
    early_stopping_patience=3,
    learning_rate=1e-3,
    model_dir="saved_models_experiment",
    save_best_only=True,
)


EXPERIMENT_CONFIGS = {
    "LSTM_Embed_80_LSTM_64": ModelConfig(
        embedding_dim=80,
        lstm_units=64,
        bidirectional=False,
        dropout_rate=0.3,
        training_config=base_training_config
    ),
    "LSTM_Stacked_Embed_80_LSTM_64": ModelConfig(
        embedding_dim=80,
        lstm_units=64,
        bidirectional=False,
        dropout_rate=0.3,
        training_config=base_training_config,
        lstm_layers = 2
    ),
    "LSTM_Embed_80_LSTM_128": ModelConfig(
        embedding_dim=80,
        lstm_units=128,
        bidirectional=False,
        dropout_rate=0.3,
        training_config=base_training_config
    ),
    "LSTM_Stacked_Embed_80_LSTM_128": ModelConfig(
        embedding_dim=80,
        lstm_units=128,
        bidirectional=False,
        dropout_rate=0.3,
        training_config=base_training_config,
        lstm_layers = 2
    ),
    "LSTM_Embed_128_LSTM_64": ModelConfig(
        embedding_dim=128,
        lstm_units=64,
        bidirectional=False,
        dropout_rate=0.3,
        training_config=base_training_config
    ),
    "LSTM_Stacked_Embed_128_LSTM_64": ModelConfig(
        embedding_dim=128,
        lstm_units=64,
        bidirectional=False,
        dropout_rate=0.3,
        training_config=base_training_config,
        lstm_layers = 2
    ),
    "LSTM_Embed_128_LSTM_128": ModelConfig(
        embedding_dim=128,
        lstm_units=128,
        bidirectional=False,
        dropout_rate=0.3,
        training_config=base_training_config
    ),
    "LSTM_Stacked_Embed_128_LSTM_128": ModelConfig(
        embedding_dim=128,
        lstm_units=128,
        bidirectional=False,
        dropout_rate=0.3,
        training_config=base_training_config,
        lstm_layers=2
    ),
    "BiLSTM_Embed_80_LSTM_64": ModelConfig(
        embedding_dim=80,
        lstm_units=64,
        bidirectional=True,
        dropout_rate=0.3,
        training_config=base_training_config
    ),
    "BiLSTM_Stacked_Embed_80_LSTM_64": ModelConfig(
        embedding_dim=80,
        lstm_units=64,
        bidirectional=True,
        dropout_rate=0.3,
        training_config=base_training_config,
        lstm_layers=2
    ),
    "BiLSTM_Embed_80_LSTM_128": ModelConfig(
        embedding_dim=80,
        lstm_units=128,
        bidirectional=True,
        dropout_rate=0.3,
        training_config=base_training_config
    ),
    "BiLSTM_Stacked_Embed_80_LSTM_128": ModelConfig(
        embedding_dim=80,
        lstm_units=128,
        bidirectional=True,
        dropout_rate=0.3,
        training_config=base_training_config,
        lstm_layers=2
    ),
    "BiLSTM_Embed_128_LSTM_64": ModelConfig(
        embedding_dim=128,
        lstm_units=64,
        bidirectional=True,
        dropout_rate=0.3,
        training_config=base_training_config
    ),
    "BiLSTM_Stacked_Embed_128_LSTM_64": ModelConfig(
        embedding_dim=128,
        lstm_units=64,
        bidirectional=True,
        dropout_rate=0.3,
        training_config=base_training_config,
        lstm_layers=2
    ),
    "BiLSTM_Embed_128_LSTM_128": ModelConfig(
        embedding_dim=128,
        lstm_units=128,
        bidirectional=True,
        dropout_rate=0.3,
        training_config=base_training_config
    ),
    "BiLSTM_Stacked_Embed_128_LSTM_128": ModelConfig(
        embedding_dim=128,
        lstm_units=128,
        bidirectional=True,
        dropout_rate=0.3,
        training_config=base_training_config,
        lstm_layers=2
    )
}

all_results = {}

if not os.path.exists(base_training_config.model_dir):
    os.makedirs(base_training_config.model_dir)

## 3. Main Experiment Loop

This loop will iterate through every model configuration and train/evaluate it on every language. All results will be saved.

In [3]:
for config_name, model_config in EXPERIMENT_CONFIGS.items():
    
    all_results[config_name] = {}
    
    for language in LANGUAGES:
        print(f"\n{'='*60}")
        print(f"RUNNING EXPERIMENT: Model='{config_name}', Language='{language}'")
        print(f"{'='*60}")
        
        all_results[config_name][language] = {}

        # 1. Load Data
        print("Loading data...")
        train_data, dev_data, test_data = load_data(language)
        all_results[config_name][language]['n_train_sents'] = len(train_data)
        all_results[config_name][language]['n_dev_sents'] = len(dev_data)
        all_results[config_name][language]['n_test_sents'] = len(test_data)

        # 2. Preprocess Data
        # We create a new preprocessor for each language to build language-specific vocabs
        print("Preprocessing data...")
        preprocessor = DataPreprocessor(preprocessor_config)

        X_train, y_train = preprocessor.process_data_to_pad_sequences(
            train_data, is_train_dataset=True
        )
        X_dev, y_dev = preprocessor.process_data_to_pad_sequences(
            dev_data, is_train_dataset=False
        )
        X_test, y_test = preprocessor.process_data_to_pad_sequences(
            test_data, is_train_dataset=False
        )
        
        # Save dataset stats for the report
        print(f"Vocab Size: {preprocessor.vocab_size}")
        print(f"Num Tags: {preprocessor.num_tags}")
        all_results[config_name][language]['vocab_size'] = preprocessor.vocab_size
        all_results[config_name][language]['num_tags'] = preprocessor.num_tags

        # 3. Initialize Model
        print("Initializing model...")
        model = LSTMModel(
            model_config,
            preprocessor.vocab_size,
            preprocessor.num_tags,
            preprocessor_config.max_sequence_length,
        )
        
        model.build_model()
        model.compile_model()
        print(model.get_model().summary()) # For trainable param count

        # 4. Train Model
        print("Training model...")
        trainer = Trainer(model_config.training_config, model, preprocessor)
        history = trainer.train((X_train, y_train), (X_dev, y_dev), language)
        print("Training completed.")
        
        # 5. Evaluate Model
        print("Evaluating model on test set...")
        # Note: The trainer auto-restores the best model. We use that for evaluation.
        evaluator = Evaluator(model, preprocessor)
        test_metrics = evaluator.evaluate(X_test, y_test, "Test")
        
        # Save key results
        all_results[config_name][language]['test_accuracy'] = test_metrics['accuracy']
        all_results[config_name][language]['test_f1_macro'] = f1_score(test_metrics['y_true'], test_metrics['y_pred'], average='macro')
        all_results[config_name][language]['test_f1_weighted'] = f1_score(test_metrics['y_true'], test_metrics['y_pred'], average='weighted')
        all_results[config_name][language]['classification_report'] = classification_report(test_metrics['y_true'], test_metrics['y_pred'], zero_division=0, output_dict=True)

        # 6. Save Model and Preprocessor for inference
        model_name = f"{config_name}_{language}"
        preprocessor_path = os.path.join(base_training_config.model_dir, f"{model_name}_preprocessor.pkl")
        
        with open(preprocessor_path, 'wb') as f:
            pickle.dump(preprocessor, f)
        
        print(f"--- Experiment for {config_name} on {language} FINISHED ---")

print("\n\nALL EXPERIMENTS COMPLETED.")

# Save results to a JSON file for persistence
results_path = os.path.join(base_training_config.model_dir, "all_experiment_results.json")
with open(results_path, 'w') as f:
    # We can't save the classification_report dict easily, let's simplify
    # A more robust way would be to flatten it, but this is fine for now.
    temp_results = all_results.copy()
    for config_name in temp_results:
        for language in temp_results[config_name]:
            if 'classification_report' in temp_results[config_name][language]:
                del temp_results[config_name][language]['classification_report']
                
    json.dump(temp_results, f, indent=4)

print(f"All results saved to {results_path}")


RUNNING EXPERIMENT: Model='LSTM_Embed_80_LSTM_64', Language='english'
Loading data...
Preprocessing data...
Vocab Size: 19676
Num Tags: 19
Initializing model...


None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - _masked_accuracy: 0.3491 - loss: 2.2972
Epoch 1: val_loss improved from None to 0.81489, saving model to saved_models_experiment/LSTM_Embed80_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 57ms/step - _masked_accuracy: 0.5271 - loss: 1.6781 - val__masked_accuracy: 0.7709 - val_loss: 0.8149
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - _masked_accuracy: 0.8338 - loss: 0.6127
Epoch 2: val_loss improved from 0.81489 to 0.47868, saving model to saved_models_experiment/LSTM_Embed80_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 67ms/step - _masked_accuracy: 0.8650 - loss: 0.5033 - val__masked_accuracy: 0.8592 - val_loss: 0.4787
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - _masked_accuracy: 0.9144 - loss: 0.3143
Epo

None
Training model...
Epoch 1/20
[1m221/222[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 55ms/step - _masked_accuracy: 0.4454 - loss: 2.0027
Epoch 1: val_loss improved from None to 0.57353, saving model to saved_models_experiment/LSTM_Embed80_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 59ms/step - _masked_accuracy: 0.6226 - loss: 1.3508 - val__masked_accuracy: 0.8426 - val_loss: 0.5735
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - _masked_accuracy: 0.8811 - loss: 0.4576
Epoch 2: val_loss improved from 0.57353 to 0.28777, saving model to saved_models_experiment/LSTM_Embed80_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 67ms/step - _masked_accuracy: 0.9070 - loss: 0.3702 - val__masked_accuracy: 0.9213 - val_loss: 0.2878
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - _masked_accuracy: 0.9465 - loss: 0.2198
Epo

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - _masked_accuracy: 0.3737 - loss: 2.1691
Epoch 1: val_loss improved from None to 0.88630, saving model to saved_models_experiment/LSTM_Embed80_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 58ms/step - _masked_accuracy: 0.5320 - loss: 1.5819 - val__masked_accuracy: 0.7460 - val_loss: 0.8863
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - _masked_accuracy: 0.8241 - loss: 0.6184
Epoch 2: val_loss improved from 0.88630 to 0.42675, saving model to saved_models_experiment/LSTM_Embed80_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 67ms/step - _masked_accuracy: 0.8655 - loss: 0.4899 - val__masked_accuracy: 0.8791 - val_loss: 0.4268
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - _masked_accuracy: 0.9377 - loss: 0.2557
Epoch

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - _masked_accuracy: 0.1815 - loss: 2.6111
Epoch 1: val_loss improved from None to 1.57344, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 63ms/step - _masked_accuracy: 0.2843 - loss: 2.2874 - val__masked_accuracy: 0.5074 - val_loss: 1.5734
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - _masked_accuracy: 0.5702 - loss: 1.3707
Epoch 2: val_loss improved from 1.57344 to 0.89754, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 66ms/step - _masked_accuracy: 0.6473 - loss: 1.1937 - val__masked_accuracy: 0.7697 - val_loss: 0.8975
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - _masked_accuracy: 0.7962 - 

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - _masked_accuracy: 0.2694 - loss: 2.3278
Epoch 1: val_loss improved from None to 1.06703, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 72ms/step - _masked_accuracy: 0.4188 - loss: 1.8440 - val__masked_accuracy: 0.6510 - val_loss: 1.0670
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - _masked_accuracy: 0.7016 - loss: 0.9438
Epoch 2: val_loss improved from 1.06703 to 0.57520, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 74ms/step - _masked_accuracy: 0.7562 - loss: 0.8124 - val__masked_accuracy: 0.8589 - val_loss: 0.5752
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - _masked_accuracy: 0.8734 - 

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - _masked_accuracy: 0.2413 - loss: 2.4656
Epoch 1: val_loss improved from None to 1.50101, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 71ms/step - _masked_accuracy: 0.3604 - loss: 2.0818 - val__masked_accuracy: 0.5048 - val_loss: 1.5010
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - _masked_accuracy: 0.6387 - loss: 1.1440
Epoch 2: val_loss improved from 1.50101 to 0.79263, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 74ms/step - _masked_accuracy: 0.7143 - loss: 0.9684 - val__masked_accuracy: 0.8172 - val_loss: 0.7926
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - _masked_accuracy: 0.8739 - lo

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - _masked_accuracy: 0.3478 - loss: 2.2197
Epoch 1: val_loss improved from None to 0.73381, saving model to saved_models_experiment/LSTM_Embed80_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 109ms/step - _masked_accuracy: 0.5402 - loss: 1.5712 - val__masked_accuracy: 0.7858 - val_loss: 0.7338
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - _masked_accuracy: 0.8443 - loss: 0.5433
Epoch 2: val_loss improved from 0.73381 to 0.44168, saving model to saved_models_experiment/LSTM_Embed80_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 106ms/step - _masked_accuracy: 0.8737 - loss: 0.4468 - val__masked_accuracy: 0.8698 - val_loss: 0.4417
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - _masked_accuracy: 0.9211 - loss: 0.2

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step - _masked_accuracy: 0.4661 - loss: 1.8720
Epoch 1: val_loss improved from None to 0.48106, saving model to saved_models_experiment/LSTM_Embed80_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 106ms/step - _masked_accuracy: 0.6452 - loss: 1.2238 - val__masked_accuracy: 0.8588 - val_loss: 0.4811
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - _masked_accuracy: 0.8908 - loss: 0.3928
Epoch 2: val_loss improved from 0.48106 to 0.26237, saving model to saved_models_experiment/LSTM_Embed80_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 105ms/step - _masked_accuracy: 0.9123 - loss: 0.3231 - val__masked_accuracy: 0.9254 - val_loss: 0.2624
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - _masked_accuracy: 0.9481 - loss: 0.1

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - _masked_accuracy: 0.3863 - loss: 2.0720
Epoch 1: val_loss improved from None to 0.75015, saving model to saved_models_experiment/LSTM_Embed80_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 106ms/step - _masked_accuracy: 0.5532 - loss: 1.4577 - val__masked_accuracy: 0.7867 - val_loss: 0.7501
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - _masked_accuracy: 0.8431 - loss: 0.5319
Epoch 2: val_loss improved from 0.75015 to 0.40686, saving model to saved_models_experiment/LSTM_Embed80_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 105ms/step - _masked_accuracy: 0.8777 - loss: 0.4263 - val__masked_accuracy: 0.8807 - val_loss: 0.4069
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - _masked_accuracy: 0.9402 - loss: 0.232

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step - _masked_accuracy: 0.2349 - loss: 2.4681
Epoch 1: val_loss improved from None to 1.00626, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 153ms/step - _masked_accuracy: 0.3942 - loss: 1.9551 - val__masked_accuracy: 0.6933 - val_loss: 1.0063
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - _masked_accuracy: 0.7696 - loss: 0.7879
Epoch 2: val_loss improved from 1.00626 to 0.57706, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 158ms/step - _masked_accuracy: 0.8122 - loss: 0.6570 - val__masked_accuracy: 0.8386 - val_loss: 0.5771
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step - _masked_accuracy: 0.

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - _masked_accuracy: 0.3184 - loss: 2.1841
Epoch 1: val_loss improved from None to 0.82637, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m300s[0m 1s/step - _masked_accuracy: 0.4899 - loss: 1.6293 - val__masked_accuracy: 0.7426 - val_loss: 0.8264
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step - _masked_accuracy: 0.7957 - loss: 0.6871
Epoch 2: val_loss improved from 0.82637 to 0.38370, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 170ms/step - _masked_accuracy: 0.8440 - loss: 0.5588 - val__masked_accuracy: 0.8956 - val_loss: 0.3837
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step - _masked_accuracy: 0.9208 

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step - _masked_accuracy: 0.2701 - loss: 2.3178
Epoch 1: val_loss improved from None to 1.24191, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 161ms/step - _masked_accuracy: 0.4043 - loss: 1.8625 - val__masked_accuracy: 0.5681 - val_loss: 1.2419
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step - _masked_accuracy: 0.7129 - loss: 0.8925
Epoch 2: val_loss improved from 1.24191 to 0.56488, saving model to saved_models_experiment/LSTM_Stacked_Embed80_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 170ms/step - _masked_accuracy: 0.7806 - loss: 0.7148 - val__masked_accuracy: 0.8474 - val_loss: 0.5649
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step - _masked_accuracy: 0.89

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - _masked_accuracy: 0.4100 - loss: 2.1781
Epoch 1: val_loss improved from None to 0.70499, saving model to saved_models_experiment/LSTM_Embed128_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 73ms/step - _masked_accuracy: 0.5899 - loss: 1.5128 - val__masked_accuracy: 0.8076 - val_loss: 0.7050
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - _masked_accuracy: 0.8669 - loss: 0.5086
Epoch 2: val_loss improved from 0.70499 to 0.44802, saving model to saved_models_experiment/LSTM_Embed128_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 80ms/step - _masked_accuracy: 0.8895 - loss: 0.4188 - val__masked_accuracy: 0.8658 - val_loss: 0.4480
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - _masked_accuracy: 0.9261 - loss: 0.2648
E

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - _masked_accuracy: 0.5013 - loss: 1.8824
Epoch 1: val_loss improved from None to 0.48519, saving model to saved_models_experiment/LSTM_Embed128_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 78ms/step - _masked_accuracy: 0.6711 - loss: 1.2262 - val__masked_accuracy: 0.8778 - val_loss: 0.4852
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - _masked_accuracy: 0.9054 - loss: 0.3887
Epoch 2: val_loss improved from 0.48519 to 0.26600, saving model to saved_models_experiment/LSTM_Embed128_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 84ms/step - _masked_accuracy: 0.9231 - loss: 0.3172 - val__masked_accuracy: 0.9270 - val_loss: 0.2660
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - _masked_accuracy: 0.9530 - loss: 0.1904
E

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - _masked_accuracy: 0.4265 - loss: 2.0640
Epoch 1: val_loss improved from None to 0.71237, saving model to saved_models_experiment/LSTM_Embed128_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 79ms/step - _masked_accuracy: 0.5865 - loss: 1.4356 - val__masked_accuracy: 0.8023 - val_loss: 0.7124
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - _masked_accuracy: 0.8576 - loss: 0.4994
Epoch 2: val_loss improved from 0.71237 to 0.37575, saving model to saved_models_experiment/LSTM_Embed128_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 84ms/step - _masked_accuracy: 0.8905 - loss: 0.3955 - val__masked_accuracy: 0.8924 - val_loss: 0.3758
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - _masked_accuracy: 0.9495 - loss: 0.2059
Epo

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - _masked_accuracy: 0.2063 - loss: 2.5568
Epoch 1: val_loss improved from None to 1.50617, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 81ms/step - _masked_accuracy: 0.3050 - loss: 2.2021 - val__masked_accuracy: 0.5366 - val_loss: 1.5062
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - _masked_accuracy: 0.6314 - loss: 1.2626
Epoch 2: val_loss improved from 1.50617 to 0.78179, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 84ms/step - _masked_accuracy: 0.6928 - loss: 1.0682 - val__masked_accuracy: 0.7887 - val_loss: 0.7818
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - _masked_accuracy: 0.8433 

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - _masked_accuracy: 0.2939 - loss: 2.2853
Epoch 1: val_loss improved from None to 0.99502, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 92ms/step - _masked_accuracy: 0.4531 - loss: 1.7642 - val__masked_accuracy: 0.6724 - val_loss: 0.9950
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - _masked_accuracy: 0.7551 - loss: 0.8368
Epoch 2: val_loss improved from 0.99502 to 0.42932, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 94ms/step - _masked_accuracy: 0.8227 - loss: 0.6828 - val__masked_accuracy: 0.9079 - val_loss: 0.4293
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - _masked_accuracy: 0.9256 

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - _masked_accuracy: 0.2746 - loss: 2.3889
Epoch 1: val_loss improved from None to 1.30149, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 92ms/step - _masked_accuracy: 0.4121 - loss: 1.9124 - val__masked_accuracy: 0.5701 - val_loss: 1.3015
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - _masked_accuracy: 0.7032 - loss: 0.9664
Epoch 2: val_loss improved from 1.30149 to 0.68244, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 94ms/step - _masked_accuracy: 0.7746 - loss: 0.7925 - val__masked_accuracy: 0.8156 - val_loss: 0.6824
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - _masked_accuracy: 0.9051 - 

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - _masked_accuracy: 0.4027 - loss: 2.1093
Epoch 1: val_loss improved from None to 0.61666, saving model to saved_models_experiment/LSTM_Embed128_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 121ms/step - _masked_accuracy: 0.5950 - loss: 1.4191 - val__masked_accuracy: 0.8334 - val_loss: 0.6167
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - _masked_accuracy: 0.8773 - loss: 0.4449
Epoch 2: val_loss improved from 0.61666 to 0.40101, saving model to saved_models_experiment/LSTM_Embed128_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 115ms/step - _masked_accuracy: 0.8978 - loss: 0.3704 - val__masked_accuracy: 0.8864 - val_loss: 0.4010
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step - _masked_accuracy: 0.9296 - loss: 0

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step - _masked_accuracy: 0.5068 - loss: 1.7797
Epoch 1: val_loss improved from None to 0.40794, saving model to saved_models_experiment/LSTM_Embed128_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 120ms/step - _masked_accuracy: 0.6827 - loss: 1.1211 - val__masked_accuracy: 0.8881 - val_loss: 0.4079
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - _masked_accuracy: 0.9117 - loss: 0.3322
Epoch 2: val_loss improved from 0.40794 to 0.24360, saving model to saved_models_experiment/LSTM_Embed128_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 120ms/step - _masked_accuracy: 0.9281 - loss: 0.2750 - val__masked_accuracy: 0.9308 - val_loss: 0.2436
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - _masked_accuracy: 0.9551 - loss: 0

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - _masked_accuracy: 0.4298 - loss: 1.9516
Epoch 1: val_loss improved from None to 0.60547, saving model to saved_models_experiment/LSTM_Embed128_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 120ms/step - _masked_accuracy: 0.6027 - loss: 1.3105 - val__masked_accuracy: 0.8349 - val_loss: 0.6055
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step - _masked_accuracy: 0.8764 - loss: 0.4245
Epoch 2: val_loss improved from 0.60547 to 0.36488, saving model to saved_models_experiment/LSTM_Embed128_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 118ms/step - _masked_accuracy: 0.9039 - loss: 0.3385 - val__masked_accuracy: 0.8837 - val_loss: 0.3649
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - _masked_accuracy: 0.9538 - loss: 0.1

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step - _masked_accuracy: 0.2512 - loss: 2.4311
Epoch 1: val_loss improved from None to 1.00585, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 160ms/step - _masked_accuracy: 0.4121 - loss: 1.9203 - val__masked_accuracy: 0.7033 - val_loss: 1.0058
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step - _masked_accuracy: 0.7863 - loss: 0.7732
Epoch 2: val_loss improved from 1.00585 to 0.50274, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 164ms/step - _masked_accuracy: 0.8340 - loss: 0.6178 - val__masked_accuracy: 0.8677 - val_loss: 0.5027
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - _masked_accuracy: 

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step - _masked_accuracy: 0.3528 - loss: 2.0932
Epoch 1: val_loss improved from None to 0.65992, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 174ms/step - _masked_accuracy: 0.5449 - loss: 1.4860 - val__masked_accuracy: 0.8159 - val_loss: 0.6599
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - _masked_accuracy: 0.8547 - loss: 0.5482
Epoch 2: val_loss improved from 0.65992 to 0.31384, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 181ms/step - _masked_accuracy: 0.8882 - loss: 0.4412 - val__masked_accuracy: 0.9211 - val_loss: 0.3138
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step - _masked_accuracy: 

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step - _masked_accuracy: 0.2920 - loss: 2.2414
Epoch 1: val_loss improved from None to 1.06466, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 172ms/step - _masked_accuracy: 0.4552 - loss: 1.7052 - val__masked_accuracy: 0.6855 - val_loss: 1.0647
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step - _masked_accuracy: 0.7520 - loss: 0.8121
Epoch 2: val_loss improved from 1.06466 to 0.59254, saving model to saved_models_experiment/LSTM_Stacked_Embed128_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 177ms/step - _masked_accuracy: 0.7979 - loss: 0.6740 - val__masked_accuracy: 0.8321 - val_loss: 0.5925
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9s/step - _masked_accuracy: 0.900

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - _masked_accuracy: 0.3924 - loss: 2.1431
Epoch 1: val_loss improved from None to 0.60390, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 76ms/step - _masked_accuracy: 0.5846 - loss: 1.4507 - val__masked_accuracy: 0.8276 - val_loss: 0.6039
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - _masked_accuracy: 0.8852 - loss: 0.4177
Epoch 2: val_loss improved from 0.60390 to 0.35914, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 86ms/step - _masked_accuracy: 0.9102 - loss: 0.3324 - val__masked_accuracy: 0.8918 - val_loss: 0.3591
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - _masked_accuracy: 0.9497 - loss: 0.1911

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 475ms/step - _masked_accuracy: 0.4611 - loss: 1.8710
Epoch 1: val_loss improved from None to 0.41367, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 479ms/step - _masked_accuracy: 0.6535 - loss: 1.1914 - val__masked_accuracy: 0.8860 - val_loss: 0.4137
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - _masked_accuracy: 0.9104 - loss: 0.3321
Epoch 2: val_loss improved from 0.41367 to 0.21228, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 92ms/step - _masked_accuracy: 0.9281 - loss: 0.2666 - val__masked_accuracy: 0.9372 - val_loss: 0.2123
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - _masked_accuracy: 0.9584 - loss: 0.1

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - _masked_accuracy: 0.3911 - loss: 2.0262
Epoch 1: val_loss improved from None to 0.60576, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 84ms/step - _masked_accuracy: 0.5792 - loss: 1.3647 - val__masked_accuracy: 0.8187 - val_loss: 0.6058
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - _masked_accuracy: 0.8786 - loss: 0.4087
Epoch 2: val_loss improved from 0.60576 to 0.31873, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m783s[0m 4s/step - _masked_accuracy: 0.9084 - loss: 0.3184 - val__masked_accuracy: 0.9004 - val_loss: 0.3187
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - _masked_accuracy: 0.9587 - loss: 0.1583
Epoc

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - _masked_accuracy: 0.2892 - loss: 2.2951
Epoch 1: val_loss improved from None to 0.70893, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 148ms/step - _masked_accuracy: 0.4822 - loss: 1.6825 - val__masked_accuracy: 0.7972 - val_loss: 0.7089
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step - _masked_accuracy: 0.8439 - loss: 0.5485
Epoch 2: val_loss improved from 0.70893 to 0.43262, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 183ms/step - _masked_accuracy: 0.8756 - loss: 0.4453 - val__masked_accuracy: 0.8785 - val_loss: 0.4326
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step - _masked_accuracy: 

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step - _masked_accuracy: 0.3713 - loss: 1.9914
Epoch 1: val_loss improved from None to 0.43241, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 171ms/step - _masked_accuracy: 0.5933 - loss: 1.3076 - val__masked_accuracy: 0.8819 - val_loss: 0.4324
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step - _masked_accuracy: 0.9028 - loss: 0.3679
Epoch 2: val_loss improved from 0.43241 to 0.23150, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 201ms/step - _masked_accuracy: 0.9230 - loss: 0.2947 - val__masked_accuracy: 0.9355 - val_loss: 0.2315
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step - _masked_accuracy: 

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step - _masked_accuracy: 0.2990 - loss: 2.1783
Epoch 1: val_loss improved from None to 0.69446, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 171ms/step - _masked_accuracy: 0.4862 - loss: 1.5761 - val__masked_accuracy: 0.7992 - val_loss: 0.6945
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step - _masked_accuracy: 0.8587 - loss: 0.4827
Epoch 2: val_loss improved from 0.69446 to 0.38873, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 207ms/step - _masked_accuracy: 0.8944 - loss: 0.3755 - val__masked_accuracy: 0.8897 - val_loss: 0.3887
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 203ms/step - _masked_accuracy: 0.

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step - _masked_accuracy: 0.3885 - loss: 2.0718
Epoch 1: val_loss improved from None to 0.54977, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 164ms/step - _masked_accuracy: 0.5944 - loss: 1.3688 - val__masked_accuracy: 0.8429 - val_loss: 0.5498
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step - _masked_accuracy: 0.8932 - loss: 0.3849
Epoch 2: val_loss improved from 0.54977 to 0.32250, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 200ms/step - _masked_accuracy: 0.9160 - loss: 0.3064 - val__masked_accuracy: 0.9092 - val_loss: 0.3225
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 195ms/step - _masked_accuracy: 0.9516 - loss:

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - _masked_accuracy: 0.4742 - loss: 1.7541
Epoch 1: val_loss improved from None to 0.34017, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 168ms/step - _masked_accuracy: 0.6779 - loss: 1.0725 - val__masked_accuracy: 0.9016 - val_loss: 0.3402
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 202ms/step - _masked_accuracy: 0.9206 - loss: 0.2802
Epoch 2: val_loss improved from 0.34017 to 0.19562, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 205ms/step - _masked_accuracy: 0.9359 - loss: 0.2285 - val__masked_accuracy: 0.9415 - val_loss: 0.1956
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 202ms/step - _masked_accuracy: 0.9615 - loss:

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - _masked_accuracy: 0.4111 - loss: 1.9311
Epoch 1: val_loss improved from None to 0.51645, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 168ms/step - _masked_accuracy: 0.6028 - loss: 1.2633 - val__masked_accuracy: 0.8431 - val_loss: 0.5164
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 202ms/step - _masked_accuracy: 0.8884 - loss: 0.3614
Epoch 2: val_loss improved from 0.51645 to 0.28365, saving model to saved_models_experiment/BiLSTM_Embed80_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 205ms/step - _masked_accuracy: 0.9149 - loss: 0.2841 - val__masked_accuracy: 0.9140 - val_loss: 0.2836
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - _masked_accuracy: 0.9611 - loss: 0.14

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 312ms/step - _masked_accuracy: 0.3277 - loss: 2.1538
Epoch 1: val_loss improved from None to 0.56918, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 328ms/step - _masked_accuracy: 0.5387 - loss: 1.4812 - val__masked_accuracy: 0.8337 - val_loss: 0.5692
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step - _masked_accuracy: 0.8829 - loss: 0.4133
Epoch 2: val_loss improved from 0.56918 to 0.35710, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 387ms/step - _masked_accuracy: 0.9067 - loss: 0.3319 - val__masked_accuracy: 0.8977 - val_loss: 0.3571
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 379ms/step - _masked_accuracy

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 334ms/step - _masked_accuracy: 0.4198 - loss: 1.8223
Epoch 1: val_loss improved from None to 0.33102, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 345ms/step - _masked_accuracy: 0.6459 - loss: 1.1242 - val__masked_accuracy: 0.9065 - val_loss: 0.3310
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 431ms/step - _masked_accuracy: 0.9170 - loss: 0.2945
Epoch 2: val_loss improved from 0.33102 to 0.20828, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 439ms/step - _masked_accuracy: 0.9340 - loss: 0.2381 - val__masked_accuracy: 0.9396 - val_loss: 0.2083
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 421ms/step - _masked_accuracy

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 347ms/step - _masked_accuracy: 0.3486 - loss: 2.0041
Epoch 1: val_loss improved from None to 0.54656, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 358ms/step - _masked_accuracy: 0.5576 - loss: 1.3394 - val__masked_accuracy: 0.8423 - val_loss: 0.5466
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 435ms/step - _masked_accuracy: 0.8855 - loss: 0.3742
Epoch 2: val_loss improved from 0.54656 to 0.42078, saving model to saved_models_experiment/BiLSTM_Stacked_Embed80_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 440ms/step - _masked_accuracy: 0.9132 - loss: 0.2922 - val__masked_accuracy: 0.8847 - val_loss: 0.4208
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 433ms/step - _masked_accuracy: 

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step - _masked_accuracy: 0.4145 - loss: 2.0525
Epoch 1: val_loss improved from None to 0.50992, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 105ms/step - _masked_accuracy: 0.6209 - loss: 1.3277 - val__masked_accuracy: 0.8624 - val_loss: 0.5099
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - _masked_accuracy: 0.9065 - loss: 0.3471
Epoch 2: val_loss improved from 0.50992 to 0.32780, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 121ms/step - _masked_accuracy: 0.9255 - loss: 0.2786 - val__masked_accuracy: 0.9062 - val_loss: 0.3278
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step - _masked_accuracy: 0.9550 - loss: 

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - _masked_accuracy: 0.5181 - loss: 1.7410
Epoch 1: val_loss improved from None to 0.32807, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 109ms/step - _masked_accuracy: 0.6984 - loss: 1.0577 - val__masked_accuracy: 0.9071 - val_loss: 0.3281
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - _masked_accuracy: 0.9286 - loss: 0.2617
Epoch 2: val_loss improved from 0.32807 to 0.19310, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 132ms/step - _masked_accuracy: 0.9420 - loss: 0.2126 - val__masked_accuracy: 0.9428 - val_loss: 0.1931
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - _masked_accuracy: 0.9645 - loss:

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - _masked_accuracy: 0.4332 - loss: 1.9253
Epoch 1: val_loss improved from None to 0.49990, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 111ms/step - _masked_accuracy: 0.6221 - loss: 1.2483 - val__masked_accuracy: 0.8563 - val_loss: 0.4999
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - _masked_accuracy: 0.9068 - loss: 0.3308
Epoch 2: val_loss improved from 0.49990 to 0.28188, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 132ms/step - _masked_accuracy: 0.9300 - loss: 0.2547 - val__masked_accuracy: 0.9155 - val_loss: 0.2819
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - _masked_accuracy: 0.9673 - loss: 0

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step - _masked_accuracy: 0.3174 - loss: 2.2177
Epoch 1: val_loss improved from None to 0.57759, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 185ms/step - _masked_accuracy: 0.5258 - loss: 1.5510 - val__masked_accuracy: 0.8395 - val_loss: 0.5776
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219ms/step - _masked_accuracy: 0.8848 - loss: 0.4235
Epoch 2: val_loss improved from 0.57759 to 0.36179, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM64_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 225ms/step - _masked_accuracy: 0.9093 - loss: 0.3360 - val__masked_accuracy: 0.8982 - val_loss: 0.3618
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step - _masked_accuracy

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 190ms/step - _masked_accuracy: 0.3962 - loss: 1.9250
Epoch 1: val_loss improved from None to 0.39191, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 197ms/step - _masked_accuracy: 0.6162 - loss: 1.2427 - val__masked_accuracy: 0.8909 - val_loss: 0.3919
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step - _masked_accuracy: 0.9162 - loss: 0.3182
Epoch 2: val_loss improved from 0.39191 to 0.22999, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM64_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 238ms/step - _masked_accuracy: 0.9346 - loss: 0.2510 - val__masked_accuracy: 0.9307 - val_loss: 0.2300
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step - _masked_accuracy

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step - _masked_accuracy: 0.3349 - loss: 2.0945
Epoch 1: val_loss improved from None to 0.59318, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 197ms/step - _masked_accuracy: 0.5347 - loss: 1.4531 - val__masked_accuracy: 0.8238 - val_loss: 0.5932
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - _masked_accuracy: 0.8811 - loss: 0.4069
Epoch 2: val_loss improved from 0.59318 to 0.35148, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM64_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 234ms/step - _masked_accuracy: 0.9131 - loss: 0.3097 - val__masked_accuracy: 0.8961 - val_loss: 0.3515
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - _masked_accuracy: 

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step - _masked_accuracy: 0.4234 - loss: 1.9647
Epoch 1: val_loss improved from None to 0.47160, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 179ms/step - _masked_accuracy: 0.6385 - loss: 1.2254 - val__masked_accuracy: 0.8592 - val_loss: 0.4716
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201ms/step - _masked_accuracy: 0.9154 - loss: 0.3051
Epoch 2: val_loss improved from 0.47160 to 0.32571, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 207ms/step - _masked_accuracy: 0.9322 - loss: 0.2468 - val__masked_accuracy: 0.8973 - val_loss: 0.3257
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step - _masked_accuracy: 0.9592 - los

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step - _masked_accuracy: 0.5425 - loss: 1.6394
Epoch 1: val_loss improved from None to 0.28233, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 187ms/step - _masked_accuracy: 0.7233 - loss: 0.9599 - val__masked_accuracy: 0.9195 - val_loss: 0.2823
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219ms/step - _masked_accuracy: 0.9337 - loss: 0.2340
Epoch 2: val_loss improved from 0.28233 to 0.18427, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 223ms/step - _masked_accuracy: 0.9462 - loss: 0.1916 - val__masked_accuracy: 0.9456 - val_loss: 0.1843
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step - _masked_accuracy: 0.9667 - los

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - _masked_accuracy: 0.4605 - loss: 1.8332
Epoch 1: val_loss improved from None to 0.42989, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 183ms/step - _masked_accuracy: 0.6457 - loss: 1.1499 - val__masked_accuracy: 0.8689 - val_loss: 0.4299
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step - _masked_accuracy: 0.9095 - loss: 0.2993
Epoch 2: val_loss improved from 0.42989 to 0.27067, saving model to saved_models_experiment/BiLSTM_Embed128_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 201ms/step - _masked_accuracy: 0.9326 - loss: 0.2324 - val__masked_accuracy: 0.9183 - val_loss: 0.2707
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step - _masked_accuracy: 0.9682 - loss:

None
Training model...
Epoch 1/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 328ms/step - _masked_accuracy: 0.3497 - loss: 2.0884
Epoch 1: val_loss improved from None to 0.52060, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 346ms/step - _masked_accuracy: 0.5720 - loss: 1.3816 - val__masked_accuracy: 0.8508 - val_loss: 0.5206
Epoch 2/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 403ms/step - _masked_accuracy: 0.9000 - loss: 0.3505
Epoch 2: val_loss improved from 0.52060 to 0.38722, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM128_english.keras
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 415ms/step - _masked_accuracy: 0.9215 - loss: 0.2780 - val__masked_accuracy: 0.8909 - val_loss: 0.3872
Epoch 3/20
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 394ms/step - _masked_accura

None
Training model...
Epoch 1/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 364ms/step - _masked_accuracy: 0.4476 - loss: 1.7426
Epoch 1: val_loss improved from None to 0.29239, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 376ms/step - _masked_accuracy: 0.6780 - loss: 1.0332 - val__masked_accuracy: 0.9182 - val_loss: 0.2924
Epoch 2/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 447ms/step - _masked_accuracy: 0.9288 - loss: 0.2566
Epoch 2: val_loss improved from 0.29239 to 0.21340, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM128_spanish.keras
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 454ms/step - _masked_accuracy: 0.9435 - loss: 0.2068 - val__masked_accuracy: 0.9379 - val_loss: 0.2134
Epoch 3/20
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 444ms/step - _masked_accur

None
Training model...
Epoch 1/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 359ms/step - _masked_accuracy: 0.3781 - loss: 1.9259
Epoch 1: val_loss improved from None to 0.47385, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 367ms/step - _masked_accuracy: 0.5912 - loss: 1.2467 - val__masked_accuracy: 0.8678 - val_loss: 0.4738
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 448ms/step - _masked_accuracy: 0.9001 - loss: 0.3314
Epoch 2: val_loss improved from 0.47385 to 0.30636, saving model to saved_models_experiment/BiLSTM_Stacked_Embed128_LSTM128_german.keras
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 454ms/step - _masked_accuracy: 0.9259 - loss: 0.2560 - val__masked_accuracy: 0.9099 - val_loss: 0.3064
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 431ms/step - _masked_accuracy

## 4. Collate and Display Results

This section formats the collected data from the `all_results` dictionary into markdown tables, ready to be copied into your report.

In [10]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import json
import os

# Set seaborn style for better-looking plots
sns.set_theme(style="whitegrid")

# --- 1. Load and Flatten Data ---
print("Loading all_experiment_results.json...")
# Ensure the file is in the same directory as this notebook
with open('saved_models_experiment/all_experiment_results.json', 'r') as f:
    data = json.load(f)

# Flatten the nested JSON into a list of dictionaries
flat_data = []
for model_name, languages in data.items():
    for language, metrics in languages.items():
        # Create a new record for each model-language pair
        record = {
            'model': model_name,
            'language': language,
            **metrics  # Add all metric key-value pairs
        }
        flat_data.append(record)

# Convert to Pandas DataFrame
df = pd.DataFrame(flat_data)
print("Data loaded and flattened successfully.")

# Feature Engineering
# Extract key hyperparameters from the model name
df['Direction'] = df['model'].apply(lambda x: 'BiLSTM' if 'BiLSTM' in x else 'LSTM')
df['Embedding Dim'] = df['model'].apply(lambda x: 128 if 'Embed_128' in x else 80).astype(int)
df['LSTM Units'] = df['model'].apply(lambda x: 128 if 'LSTM_128' in x else 64).astype(int)
df['Stacked'] = df['model'].apply(lambda x: 'Stacked' if 'Stacked' in x else 'Single')


# Create a cleaner label for plotting
df['Model Label'] = df['model'].str.replace('_', ' ').str.replace('LSTM', ' LSTM')

# Directory to save plots
plot_dir = "seaborn_plots"
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)
print(f"Plots will be saved to '{plot_dir}/'")

# --- 3. Generate Plots ---
plot_files = []

# Plot 1: Dataset Statistics (Vocabulary Size)
print("Generating Plot 1: Vocabulary Size by Language...")
plt.figure(figsize=(8, 5))
# We only need one entry per language, so we drop duplicates
df_stats = df.drop_duplicates(subset='language')
sns.barplot(data=df_stats, x='language', y='vocab_size', palette='viridis')
plt.title('Vocabulary Size by Language', fontsize=16)
plt.ylabel('Vocabulary Count', fontsize=12)
plt.xlabel('Language', fontsize=12)
plot_path = os.path.join(plot_dir, '1_vocab_size_by_language.png')
plt.savefig(plot_path)
plot_files.append(plot_path)
plt.close()

# Plot 2: Key Insight - BiLSTM vs. Unidirectional LSTM
print("Generating Plot 2: Directionality Comparison...")
g = sns.catplot(
    data=df, 
    x='language', 
    y='test_accuracy', 
    hue='Direction', 
    kind='bar', 
    palette='colorblind',
    aspect=1.5,
    legend_out=False
)
g.fig.suptitle('BiLSTM vs. Unidirectional LSTM (Test Accuracy)', fontsize=16, y=1.03)
g.set_axis_labels('Language', 'Test Accuracy', fontsize=12)
plt.ylim(0.85, 0.95) # Emphasize the difference
g.ax.legend(title='Direction', loc='upper right')
plot_path = os.path.join(plot_dir, '2_directionality_comparison.png')
plt.savefig(plot_path, bbox_inches='tight')
plot_files.append(plot_path)
plt.close()

# Plot 3: Key Insight - Accuracy vs. F1-Macro (Best Model)
print("Generating Plot 3: Accuracy vs. F1-Macro...")
# Find the best model overall (highest mean F1-Macro)
best_model_name = df.groupby('model')['test_f1_macro'].mean().idxmax()
df_best_model = df[df['model'] == best_model_name]

# Melt the dataframe to plot multiple metrics
df_melted = df_best_model.melt(
    id_vars='language', 
    value_vars=['test_accuracy', 'test_f1_macro'], 
    var_name='Metric', 
    value_name='Score'
)
# Make labels cleaner
df_melted['Metric'] = df_melted['Metric'].map({
    'test_accuracy': 'Accuracy',
    'test_f1_macro': 'F1-Macro'
})

g = sns.catplot(
    data=df_melted, 
    x='language', 
    y='Score', 
    hue='Metric', 
    kind='bar', 
    palette='Pastel1',
    aspect=1.5
)
g.fig.suptitle(f'Accuracy vs. F1-Macro (Best Model: {best_model_name})', fontsize=16, y=1.03)
g.set_axis_labels('Language', 'Score', fontsize=12)
plt.ylim(0.7, 1.0)
plot_path = os.path.join(plot_dir, '3_accuracy_vs_f1_macro.png')
plt.savefig(plot_path, bbox_inches='tight')
plot_files.append(plot_path)
plt.close()

# Plot 4: Model Complexity Heatmap (English BiLSTMs)
print("Generating Plot 4: Complexity Heatmap (English BiLSTMs)...")
df_eng_bilstm = df[(df['language'] == 'english') & (df['Direction'] == 'BiLSTM')]
pivot_table = df_eng_bilstm.pivot_table(
    index='Embedding Dim', 
    columns='LSTM Units', 
    values='test_accuracy'
)

plt.figure(figsize=(7, 5))
sns.heatmap(pivot_table, annot=True, fmt=".4f", cmap="viridis", linewidths=.5)
plt.title('English BiLSTM: Test Accuracy Heatmap', fontsize=16)
plt.xlabel('LSTM Hidden Units', fontsize=12)
plt.ylabel('Embedding Dimension', fontsize=12)
plot_path = os.path.join(plot_dir, '4_english_bilstm_heatmap.png')
plt.savefig(plot_path, bbox_inches='tight')
plot_files.append(plot_path)
plt.close()

# Plot 5: Full Model Comparison (All Models, All Languages)
print("Generating Plot 5: Full Model Comparison...")
df_sorted = df.sort_values(by='test_accuracy', ascending=False)
plt.figure(figsize=(12, 10))
sns.barplot(
    data=df_sorted, 
    x='test_accuracy', 
    y='Model Label', 
    hue='language', 
    palette='muted'
)
plt.title('Overall Model Performance Comparison (Test Accuracy)', fontsize=16)
plt.xlabel('Test Accuracy', fontsize=12)
plt.ylabel('Model Configuration', fontsize=12)
plt.legend(title='Language', bbox_to_anchor=(1.02, 1), loc='upper left')
plt.axvline(x=0.9, color='r', linestyle='--', label='90% Accuracy') # Add a reference line
plt.xlim(0.85, 0.95)
plot_path = os.path.join(plot_dir, '5_full_model_comparison.png')
plt.savefig(plot_path, bbox_inches='tight')
plot_files.append(plot_path)
plt.close()

print("\n--- All Plots Generated ---")

Loading all_experiment_results.json...
Data loaded and flattened successfully.
Plots will be saved to 'seaborn_plots/'
Generating Plot 1: Vocabulary Size by Language...
Generating Plot 2: Directionality Comparison...



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=df_stats, x='language', y='vocab_size', palette='viridis')


Generating Plot 3: Accuracy vs. F1-Macro...
Generating Plot 4: Complexity Heatmap (English BiLSTMs)...
Generating Plot 5: Full Model Comparison...

--- All Plots Generated ---


In [9]:
print("Generating Plot 6: Staked Comparison...")
g = sns.catplot(
    data=df, 
    x='language', 
    y='test_accuracy', 
    hue='Stacked', 
    kind='bar', 
    palette='colorblind',
    aspect=1.5,
    legend_out=False
)
g.fig.suptitle('Stacked vs. Single LSTM (Test Accuracy)', fontsize=16, y=1.03)
g.set_axis_labels('Language', 'Test Accuracy', fontsize=12)
plt.ylim(0.85, 0.95) # Emphasize the difference
g.ax.legend(title='Stacked', loc='upper right')
plot_path = os.path.join(plot_dir, '6_stacked_comparison.png')
plt.savefig(plot_path, bbox_inches='tight')
plot_files.append(plot_path)
plt.close()

Generating Plot 6: Staked Comparison...


## 5. Qualitative Error Analysis

Here, we load our best-performing model (e.g., `BiLSTM_Embed_128` for English) and its corresponding preprocessor to run inference on new, custom sentences. This allows us to find specific examples of 'good', 'bad', and 'ugly' predictions for the report.

In [5]:
# --- 1. Load the Model and Preprocessor ---

# Define which saved model we want to test
TARGET_CONFIG = "BiLSTM_Embed_128_LSTM_128"
TARGET_LANGUAGE = "english"

model_name = f"{TARGET_CONFIG}_{TARGET_LANGUAGE}"
model_path = os.path.join(base_training_config.model_dir, f"{model_name}.keras")
preprocessor_path = os.path.join(base_training_config.model_dir, f"{model_name}_preprocessor.pkl")

print(f"Loading model from: {model_path}")

try:
    loaded_model = keras.models.load_model(model_path)
    
    print(f"Loading preprocessor from: {preprocessor_path}")
    with open(preprocessor_path, 'rb') as f:
        loaded_preprocessor = pickle.load(f)
        
    # 2. Create Predictor Instance
    predictor = Predictor(loaded_model, loaded_preprocessor)

    print("\n--- Predictor Ready. Running Qualitative Tests. ---\n")
    
    # --- 3. Run Test Cases ---
    
    test_sentences = {
        "The 'Good' (Simple Case)": [
            "Today it is cloudy",
            "The quick brown fox jumps over the lazy dog ."
        ],
        "The 'Bad' (Ambiguity)": [
            "The leaves are falling .", # leaves = NOUN
            "He leaves tomorrow .", # leaves = VERB
            "I bought an apple .", # apple = NOUN
            "I work at Apple ." # Apple = PROPN
        ],
        "The 'Ugly' (OOV & Typos)": [
            "I googled this supercalifragilisticexpialidocious wrd .",
            "This sentance has twoo mispellings ."
        ]
    }

    for category, sentences in test_sentences.items():
        print(f"\n--- {category} ---")
        for sentence in sentences:
            predicted_tags = predictor.predict_sentence(sentence)
            print(f"  Sentence: {sentence}")
            print(f"  Tags:     {' '.join(predicted_tags)}")
            # Optional: Print word-tag pairs
            # print(f"  Result:   {list(zip(sentence.split(), predicted_tags))}")

except FileNotFoundError:
    print(f"Error: Model file not found at {model_path}")
    print("Please ensure the main experiment loop (Section 3) has been run successfully.")
except Exception as e:
    print(f"An error occurred: {e}")


Loading model from: saved_models_experiment/BiLSTM_Embed_128_LSTM_128_english.keras
An error occurred: Could not locate class 'method'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': 'builtins', 'class_name': 'method', 'config': '_masked_accuracy', 'registered_name': 'method'}
