# LSTM Models with 3 different languages

### Create the configuration for the experiment

All the languages with use the same model configuration for comparing performances across them

In [1]:
%pip install conllu

Note: you may need to restart the kernel to use updated packages.


In [2]:
import keras

keras.utils.set_random_seed(50)



In [3]:
from data.preprocessor import DataPreprocessor, DataPreprocessorConfig
from trainer.trainer import TrainerConfig, Trainer
from models.base_model import ModelConfig

preprocessor_config = DataPreprocessorConfig(
    padding_type="post",
    truncation_type="post",
    remove_long_sentences=True,
    max_sequence_length=100,
)

training_config = TrainerConfig(
    epochs=20,
    batch_size=64,
    early_stopping_patience=3,
    learning_rate=1e-3,
    model_dir="saved_models",
    save_best_only=True,
)

model_config = ModelConfig(
    embedding_dim=80,
    lstm_units=128,
    bidirectional=False,
    dropout_rate=0.3,
    training_config=training_config,
)

## English

### Preprocesses the data

In [4]:
from utils import load_data

train_data_en, dev_data_en, test_data_en = load_data("english")

In [5]:
preprocessor_en = DataPreprocessor(preprocessor_config)

In [6]:
X_train_en, y_train_en = preprocessor.process_data_to_pad_sequences(
    train_data_en, is_train_dataset=True
)
X_dev_en, y_dev_en = preprocessor.process_data_to_pad_sequences(
    dev_data, is_train_dataset=False
)
X_test_en, y_test_en = preprocessor.process_data_to_pad_sequences(
    test_data, is_train_dataset=False
)

NameError: name 'preprocessor' is not defined

### Initilize the model with the config

In [None]:
from models.lstm_model import LSTMModel

lstm_en = LSTMModel(
    model_config,
    preprocessor_en.vocab_size,
    preprocessor_en.num_tags,
    preprocessor_config.max_sequence_length,
)
lstm_en.build_model()
lstm_en.compile_model()

print("Model summary:\n")
print(lstm_en.get_model().summary())

### Training the model

In [None]:
# Initialize trainer
trainer_en = Trainer(training_config, lstm_en, preprocessor)

In [None]:
# Train the model
print("Training model...\n")
trainer_en.train((X_train_en, y_train_en), (X_dev_en, y_dev_en))
print("Training completed.\n")

### Testing the model

In [None]:
from evaluator.evaluator import Evaluator

evaluator_en = Evaluator(lstm_en, preprocessor_en)

In [None]:
test_metrics_en = evaluator_en.evaluate(X_test_en, y_test_en, "Test")

### Inference on new sentences

In [None]:
from inference.predictor import Predictor

# Create predictor for inference
predictor+en = Predictor(lstm_en.get_model(), preprocessor)

In [None]:
example_sentence_en = "Today it is cloudy"
predicted_tags_en = predictor.predict_sentence(example_sentence)
print(f"\nExample prediction:")
print(f"Sentence: {example_sentence_en}")
print(f"Predicted tags: {' '.join(predicted_tags_en)}")

## Spanish

In [None]:
from utils import load_data

train_data_es, dev_data_es, test_data_es = load_data("spanish")

In [None]:
preprocessor_es = DataPreprocessor(preprocessor_config)

In [None]:
X_train_es, y_train_es = preprocessor_es.process_data_to_pad_sequences(
    train_data_es, is_train_dataset=True
)
X_dev_es, y_dev_es = preprocessor_es.process_data_to_pad_sequences(
    dev_data_es, is_train_dataset=False
)
X_test_es, y_test_es = preprocessor_es.process_data_to_pad_sequences(
    test_data_es, is_train_dataset=False
)

### Initilize the model with the config

In [None]:
from models.lstm_model import LSTMModel

es_lstm = LSTMModel(
    model_config,
    preprocessor_es.vocab_size,
    preprocessor_es.num_tags,
    preprocessor_config.max_sequence_length,
)
es_lstm.build_model()
es_lstm.compile_model()

print("Model summary:\n")
print(es_lstm.get_model().summary())

In [None]:
trainer = Trainer(training_config, es_lstm, preprocessor_es)

In [None]:
# Train the model
print("Training model...\n")
trainer.train((X_train_es, y_train_es), (X_dev_es, y_dev_es))
print("Training completed.\n")

In [None]:
from evaluator.evaluator import Evaluator

evaluator_es = Evaluator(es_lstm, preprocessor_es)

In [None]:
test_metrics_es = evaluator_es.evaluate(X_test_es, y_test_es, "Test")

### Inference on new sentences

In [None]:
from inference.predictor import Predictor

# Create predictor for inference
predictor_es = Predictor(es_lstm.get_model(), preprocessor_es)

In [None]:
example_sentence = "Hoy el tiempo es soleado ."
predicted_tags = predictor_es.predict_sentence(example_sentence)
print(f"\nExample prediction:")
print(f"Sentence: {example_sentence}")
print(f"Predicted tags: {' '.join(predicted_tags)}")

## German

### Preprocess the data

In [None]:
from utils import load_data

train_data_ge, dev_data_ge, test_data_ge = load_data("german")

In [None]:
preprocessor_ge = DataPreprocessor(preprocessor_config)

In [None]:
X_train_ge, y_train_ge = preprocessor_ge.process_data_to_pad_sequences(
    train_data_ge, is_train_dataset=True
)
X_dev_ge, y_dev_ge = preprocessor_ge.process_data_to_pad_sequences(
    dev_data_ge, is_train_dataset=False
)
X_test_ge, y_test_ge = preprocessor_ge.process_data_to_pad_sequences(
    test_data_ge, is_train_dataset=False
)

### Initilize the model with the config

In [None]:
from models.lstm_model import LSTMModel

ge_lstm = LSTMModel(
    model_config,
    preprocessor_ge.vocab_size,
    preprocessor_ge.num_tags,
    preprocessor_config.max_sequence_length,
)
ge_lstm.build_model()
ge_lstm.compile_model()

print("Model summary:\n")
print(ge_lstm.get_model().summary())

In [None]:
trainer_ge = Trainer(training_config, ge_lstm, preprocessor_ge)

In [None]:
# Train the model
print("Training model...\n")
trainer_ge.train((X_train_ge, y_train_ge), (X_dev_ge, y_dev_ge))
print("Training completed.\n")

In [None]:
from evaluator.evaluator import Evaluator

evaluator_ge = Evaluator(ge_lstm, preprocessor_ge)

In [None]:
test_metrics_ge = evaluator_ge.evaluate(X_test_ge, y_test_ge, "Test")

### Inference on new sentences

In [None]:
from inference.predictor import Predictor

# Create predictor for inference
predictor_ge = Predictor(ge_lstm.get_model(), preprocessor_ge)

In [None]:
example_sentence = "Heute ist es wolkig ."
predicted_tags = predictor_es.predict_sentence(example_sentence)
print(f"\nExample prediction:")
print(f"Sentence: {example_sentence}")
print(f"Predicted tags: {' '.join(predicted_tags)}")