local transformer_dim = 512; local transformer_emb_dim = 768; local cuda_device = 0; local env_or_default(env_name, default_value) = local env_value = std.extVar(env_name); if env_value == "" then default_value else env_value; local stringToBool(s) = if s == "true" then true else if s == "false" then false else error "invalid boolean: " + std.manifestJson(s); local transformer_model = std.extVar("transformer_model"); local batch_size = std.parseInt(env_or_default("BATCH_SIZE", "4")); local epochs = std.parseInt(env_or_default("EPOCHS", "10")); //local encoder_type = std.extVar("encoder_type"); local encoder_type = "pt_encoder"; local lstm_encoder = { "type": "lstm", "bidirectional": true, "dropout": 0.5, "hidden_size": 200, "input_size": transformer_emb_dim, "num_layers": 2 }; local pt_encoder = { "type": "pass_through", "input_dim": transformer_emb_dim }; local encoder = if encoder_type == "pt_encoder" then pt_encoder else lstm_encoder; local dropout = std.parseJson(std.extVar("dropout")); local weight_decay = std.parseJson(std.extVar("weight_decay")); local learning_rate = std.parseJson(std.extVar("lr")); local gradient_accumulation_steps = std.parseInt(env_or_default("gradient_accumulation_steps", "4")); local wandb_name = std.extVar("WANDB_NAME"); local tuning_callbacks = [ { "type": "optuna_pruner" } ]; local callbacks = tuning_callbacks; local tokenizer_kwargs = { "max_len": transformer_dim }; local token_indexer = { "type": "pretrained_transformer_mismatched", "max_length": transformer_dim, "model_name": transformer_model, "tokenizer_kwargs": tokenizer_kwargs }; local conll_reader = { "type": "conll2003", "coding_scheme": "BIOUL", "tag_label": "ner", "token_indexers": { "tokens": token_indexer } }; local seed = std.parseJson(std.extVar('seed')); local dataset_reader = conll_reader; local train_path = "/media/pedro/repositorios/entidades/dataset/conll2003/train.conll"; local dev_path = "/media/pedro/repositorios/entidades/dataset/conll2003/dev.conll"; local test_path = "/media/pedro/repositorios/entidades/dataset/conll2003/test.conll"; local evaluate_on_test = true; local scheduler = std.extVar("scheduler"); local slanted_triangular_scheduler = { "type": "slanted_triangular", "cut_frac": 0.06 }; local instances = 14041; local linear_with_warmup_scheduler_01 = { "type": "linear_with_warmup", "num_epochs": epochs, "num_steps_per_epoch": std.ceil(instances / batch_size / gradient_accumulation_steps), "warmup_steps": std.ceil(self.num_steps_per_epoch * epochs * 0.1) }; local linear_with_warmup_scheduler_003 = { "type": "linear_with_warmup", "num_epochs": epochs, "num_steps_per_epoch": std.ceil(instances / batch_size / gradient_accumulation_steps), "warmup_steps": std.ceil(self.num_steps_per_epoch * epochs * 0.03) }; local learning_rate_scheduler = if scheduler == "slanted_triangular" then slanted_triangular_scheduler else if scheduler == "linear_with_warmup_01" then linear_with_warmup_scheduler_01 else linear_with_warmup_scheduler_003; { "numpy_seed": seed, "pytorch_seed": seed, "random_seed": seed, "dataset_reader": dataset_reader, "train_data_path": train_path, "validation_data_path": dev_path, "test_data_path": test_path, "evaluate_on_test": evaluate_on_test, "model": { "type": "crf_tagger", "calculate_span_f1": true, "constrain_crf_decoding": true, "include_start_end_transitions": false, "label_encoding": "BIOUL", "dropout": dropout, "text_field_embedder": { "token_embedders": { "tokens": { "type": "pretrained_transformer_mismatched", "max_length": transformer_dim, "model_name": transformer_model, "tokenizer_kwargs": { "max_len": transformer_dim } } } }, "encoder": encoder, "regularizer": { "regexes": [ [ "scalar_parameters", { "type": "l2", "alpha": 0.1 } ] ] } }, "data_loader": { "batch_sampler": { "type": "bucket", "batch_size" : batch_size, "sorting_keys": [ "tokens" ] } }, "trainer": { "optimizer": { "type": "huggingface_adamw", "weight_decay": weight_decay, "lr": learning_rate, "parameter_groups": [ [ ["^text_field_embedder(?:\\.(?!(LayerNorm|bias))[^.]+)+$"], {"weight_decay": weight_decay, "lr": learning_rate} ], [ ["^text_field_embedder\\.[\\S]+(LayerNorm[\\S]+|bias)$"], {"weight_decay": 0, "lr": learning_rate} ] ] }, "callbacks": callbacks, "learning_rate_scheduler": learning_rate_scheduler, "num_gradient_accumulation_steps": gradient_accumulation_steps, "cuda_device": cuda_device, "num_epochs": epochs, "checkpointer": { "keep_most_recent_by_count": 1 }, "patience": 5, "validation_metric": "+f1-measure-overall" } }