In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
import logging
import shutil
from omegaconf import OmegaConf
from hydra import initialize, compose
from hydra.core.config_store import ConfigStore
import mlflow
from src.experiments.sner import sner
from pathlib import Path

import os

logging.basicConfig(
    format="%(asctime)s %(levelname)s:%(message)s",
    level=logging.INFO,
    datefmt="%I:%M:%S",
)
logger = logging.getLogger("training")

In [3]:
mlflow.get_tracking_uri()

'https://mlflow-uvl.ifi.uni-heidelberg.de'

In [4]:
sentence_length = 106

run_experiments = os.getenv("UVL_BERT_RUN_EXPERIMENTS", "True") == "True"
pin_commits = os.getenv("UVL_BERT_PIN_COMMITS", "True") == "FALSE"

print(f"{run_experiments=}")
print(f"{pin_commits=}")

run_experiments=True
pin_commits=False


In [5]:
from tooling.config import Experiment, Transformation
#Set Iterations=1 for Split training and testing on datasets. Set dataset to desired dataset split. See tooling.py for split options
base_experiment_config = Experiment(
    name="Base Config 1",
    iterations=1,
    force=False,
    dataset="all",
    lower_case=True,
)

levels_transformation_config = Transformation(
    description="Levels",
    type="Reduced",
    task="Domain_Level",
    domain_data="Domain_Level",
    activity="Domain_Level",
    stakeholder="Domain_Level",
    system_function="Interaction_Level",
    interaction="Interaction_Level",
    interaction_data="Domain_Level",
    workspace="Interaction_Level",
    software="System_Level",
    internal_action="System_Level",
    internal_data="System_Level",
    system_level="System_Level",
)

label_transformation_config = Transformation(
    description="None",
    type="Full",
    task="Task",
    domain_data="Domain_Data",
    activity="Activity",
    stakeholder="Stakeholder",
    system_function="System_Function",
    interaction="Interaction",
    interaction_data="Interaction_Data",
    workspace="Workspace",
    software="System_Level",
    internal_action="System_Level",
    internal_data="System_Level",
    system_level="System_Level",
)

In [6]:
from tooling.transformation import get_hint_transformation
import pickle

hint_transformation = get_hint_transformation(
    transformation_cfg=OmegaConf.structured(levels_transformation_config)
)

Path("./src/service/models/").mkdir(parents=True, exist_ok=True)

hint_label2id = hint_transformation["label2id"]
pickle.dump(
    hint_label2id, open("./src/service/models/hint_label2id.pickle", "wb")
)
hint_id2label = {y: x for x, y in hint_label2id.items()}
pickle.dump(
    hint_id2label, open("./src/service/models/hint_id2label.pickle", "wb")
)

transformation = get_hint_transformation(
    transformation_cfg=OmegaConf.structured(label_transformation_config)
)
label2id = transformation["label2id"]
pickle.dump(label2id, open("./src/service/models/label2id.pickle", "wb"))
id2label = {y: x for x, y in label2id.items()}
pickle.dump(id2label, open("./src/service/models/id2label.pickle", "wb"))

06:09:21 INFO:Hint Label2Id: hint_label2id={'0': 0, 'System_Level': 1, 'Domain_Level': 2, 'Interaction_Level': 3}
06:09:21 INFO:Hint Label2Id: hint_label2id={'0': 0, 'Task': 1, 'Domain_Data': 2, 'Activity': 3, 'Stakeholder': 4, 'System_Function': 5, 'Interaction': 6, 'Interaction_Data': 7, 'Workspace': 8, 'System_Level': 9}


## Train BiLSTM First Stage Model


In [9]:
'''
from tooling.observability import get_run_id
from tooling.config import BiLSTMConfig, BiLSTM

from copy import deepcopy

bilstm_experiment_config = deepcopy(base_experiment_config)
bilstm_experiment_config.name = "Production"

bilstm_config = BiLSTM(
    batch_size=16,
    learning_rate=0.006,
    number_epochs=5,
    weighted_classes=False,
    sentence_length=106,
)

bilstm_cfg: BiLSTMConfig = OmegaConf.structured(
    BiLSTMConfig(
        bilstm=bilstm_config,
        experiment=bilstm_experiment_config,
        transformation=levels_transformation_config,
    )
)

# bilstm_cfg.experiment.force = True

if run_experiments:
    from experiments.bilstm import bilstm

    bilstm(bilstm_cfg)


bilstm_run_id = get_run_id(bilstm_cfg, pin_commit=pin_commits)

print(bilstm_run_id)
print(mlflow.get_artifact_uri())

bilstm_run = mlflow.get_run(bilstm_run_id)
mlflow.artifacts.download_artifacts(
    f"{bilstm_run.info.artifact_uri}/0_model",
    dst_path=Path("./src/service/models/"),
)
try:
    shutil.rmtree(Path("./src/service/models/bilstm"))
except FileNotFoundError:
    pass
Path("./src/service/models/0_model").rename("./src/service/models/bilstm")
'''

SyntaxError: unterminated triple-quoted string literal (detected at line 48) (87726336.py, line 1)

## Train SNER First Stage Model


In [13]:
'''
from tooling.observability import (get_run_id)
from tooling.config import SNERConfig, SNER

from copy import deepcopy

sner_experiment_config = deepcopy(base_experiment_config)
sner_experiment_config.name = "Production"

sner_config = SNER()

sner_cfg = OmegaConf.structured(
    SNERConfig(
        sner=sner_config,
        experiment=sner_experiment_config,
        transformation=levels_transformation_config,
    )
)

if run_experiments:
    from experiments.sner import sner

    sner(OmegaConf.create(sner_cfg))

sner_run_id = get_run_id(sner_cfg, pin_commit=pin_commits)
print(mlflow.get_artifact_uri())
print(sner_run_id)

sner_run = mlflow.get_run(sner_run_id)
mlflow.artifacts.download_artifacts(
    f"{sner_run.info.artifact_uri}/0_model.ser.gz",
    dst_path=Path("./src/service/models/"),
)
try:
    Path("./src/service/models/sner.ser.gz").unlink()
except FileNotFoundError:
    pass
Path("./src/service/models/0_model.ser.gz").rename(
    "./src/service/models/sner.ser.gz"
)
'''

SyntaxError: unterminated triple-quoted string literal (detected at line 40) (2848408053.py, line 1)

## Train BERT First Stage Model


In [14]:
'''
from tooling.observability import get_run_id
from tooling.config import BERTConfig, BERT

from copy import deepcopy

bert_1_experiment_config = deepcopy(base_experiment_config)
bert_1_experiment_config.name = "Production"
bert_1_experiment_config.force = False

bert_1_config = BERT(
    max_len=123,
    number_epochs=11,
    train_batch_size=8,
    weight_decay=0.01,
    weighted_classes=True,
    learning_rate_bert=3e-05,
    learning_rate_classifier=0.0005,
    validation_batch_size=64,
)


bert_1_cfg = OmegaConf.structured(
    BERTConfig(
        bert=bert_1_config,
        experiment=bert_1_experiment_config,
        transformation=levels_transformation_config,
    )
)

if run_experiments:
    from experiments.bert import bert

    bert(OmegaConf.create(bert_1_cfg))

bert_1_run_id = get_run_id(bert_1_cfg, pin_commit=pin_commits)

print(bert_1_run_id)

bert_1_run = mlflow.get_run(bert_1_run_id)
mlflow.artifacts.download_artifacts(
    f"{bert_1_run.info.artifact_uri}/0_model",
    dst_path=Path("./src/service/models/"),
)
try:
    shutil.rmtree(Path("./src/service/models/bert_1"))
except FileNotFoundError:
    pass
Path("./src/service/models/0_model").rename("./src/service/models/bert_1")
## Train BERT Second Stage Model for BERT First Stage
'''

02:26:05 INFO:
bert:
  model: bert-large-uncased
  type: BERT
  max_len: 123
  train_batch_size: 8
  validation_batch_size: 64
  number_epochs: 11
  learning_rate_bert: 3.0e-05
  learning_rate_classifier: 0.0005
  weight_decay: 0.01
  weighted_classes: true
experiment:
  name: Production
  description: ''
  random_state: 125
  folds: 5
  iterations: 1
  average: macro
  dataset: all
  lower_case: true
  force: false
  pin_commit: false
  smote: false
  smote_k_neighbors: 5
  smote_sampling_strategy: not majority
  smote_balance_to_average: false
transformation:
  description: Levels
  type: Reduced
  task: Domain_Level
  goals: null
  domain_data: Domain_Level
  activity: Domain_Level
  stakeholder: Domain_Level
  system_function: Interaction_Level
  interaction: Interaction_Level
  interaction_data: Domain_Level
  workspace: Interaction_Level
  software: System_Level
  internal_action: System_Level
  internal_data: System_Level
  system_level: System_Level

02:26:05 INFO:New experimen

Map:   0%|          | 0/2488 [00:00<?, ? examples/s]

Map:   0%|          | 0/622 [00:00<?, ? examples/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(
02:27:27 INFO:Logged iteration result res.precision=0.6903478382818348 res.recall=0.8199129858857586


{'eval_loss': 0.5139986276626587, 'eval_step': 0, 'eval_precision': 0.6903478382818348, 'eval_recall': 0.8199129858857586, 'eval_f1': 0.749572720589389, 'eval_label_count': 4, 'eval_runtime': 5.1846, 'eval_samples_per_second': 119.97, 'eval_steps_per_second': 1.929, 'epoch': 1.0}
{'loss': 0.5443, 'grad_norm': 8.23720645904541, 'learning_rate': 2.561531715872552e-05, 'epoch': 1.607717041800643}


02:29:11 INFO:Logged iteration result res.precision=0.6877753591285063 res.recall=0.8166917752797052


{'eval_loss': 0.5168285369873047, 'eval_step': 2, 'eval_precision': 0.6877753591285063, 'eval_recall': 0.8166917752797052, 'eval_f1': 0.7467102021623674, 'eval_label_count': 4, 'eval_runtime': 5.4536, 'eval_samples_per_second': 114.052, 'eval_steps_per_second': 1.834, 'epoch': 2.0}


02:30:52 INFO:Logged iteration result res.precision=0.7362654921933249 res.recall=0.8218482052384194


{'eval_loss': 0.6212430000305176, 'eval_step': 4, 'eval_precision': 0.7362654921933249, 'eval_recall': 0.8218482052384194, 'eval_f1': 0.7767064423288953, 'eval_label_count': 4, 'eval_runtime': 4.7963, 'eval_samples_per_second': 129.682, 'eval_steps_per_second': 2.085, 'epoch': 3.0}
{'loss': 0.2434, 'grad_norm': 8.56639575958252, 'learning_rate': 2.1230634317451038e-05, 'epoch': 3.215434083601286}


02:32:30 INFO:Logged iteration result res.precision=0.7427709508452707 res.recall=0.8222790813185519


{'eval_loss': 0.7375434041023254, 'eval_step': 6, 'eval_precision': 0.7427709508452707, 'eval_recall': 0.8222790813185519, 'eval_f1': 0.7805054184072554, 'eval_label_count': 4, 'eval_runtime': 4.8435, 'eval_samples_per_second': 128.42, 'eval_steps_per_second': 2.065, 'epoch': 4.0}
{'loss': 0.096, 'grad_norm': 2.142723321914673, 'learning_rate': 1.6845951476176556e-05, 'epoch': 4.823151125401929}


02:34:14 INFO:Logged iteration result res.precision=0.7838924154342816 res.recall=0.8161155553895505


{'eval_loss': 1.024376630783081, 'eval_step': 8, 'eval_precision': 0.7838924154342816, 'eval_recall': 0.8161155553895505, 'eval_f1': 0.7996795086694526, 'eval_label_count': 4, 'eval_runtime': 5.0918, 'eval_samples_per_second': 122.156, 'eval_steps_per_second': 1.964, 'epoch': 5.0}


02:35:55 INFO:Logged iteration result res.precision=0.7964285161725284 res.recall=0.806883709845793


{'eval_loss': 1.2160838842391968, 'eval_step': 10, 'eval_precision': 0.7964285161725284, 'eval_recall': 0.806883709845793, 'eval_f1': 0.8016220238676411, 'eval_label_count': 4, 'eval_runtime': 4.7572, 'eval_samples_per_second': 130.748, 'eval_steps_per_second': 2.102, 'epoch': 6.0}
{'loss': 0.0386, 'grad_norm': 0.5252747535705566, 'learning_rate': 1.2461268634902075e-05, 'epoch': 6.430868167202572}


02:37:39 INFO:Logged iteration result res.precision=0.79783988502216 res.recall=0.8124556297509433


{'eval_loss': 1.4016085863113403, 'eval_step': 12, 'eval_precision': 0.79783988502216, 'eval_recall': 0.8124556297509433, 'eval_f1': 0.8050814279482538, 'eval_label_count': 4, 'eval_runtime': 4.6271, 'eval_samples_per_second': 134.425, 'eval_steps_per_second': 2.161, 'epoch': 7.0}


02:39:20 INFO:Logged iteration result res.precision=0.808643779778418 res.recall=0.8112349876206547


{'eval_loss': 1.5024405717849731, 'eval_step': 14, 'eval_precision': 0.808643779778418, 'eval_recall': 0.8112349876206547, 'eval_f1': 0.8099373112117005, 'eval_label_count': 4, 'eval_runtime': 4.7047, 'eval_samples_per_second': 132.208, 'eval_steps_per_second': 2.126, 'epoch': 8.0}
{'loss': 0.0198, 'grad_norm': 1.4681686162948608, 'learning_rate': 8.076585793627593e-06, 'epoch': 8.038585209003216}


02:40:59 INFO:Logged iteration result res.precision=0.7989473567090808 res.recall=0.8135639564070826


{'eval_loss': 1.5148842334747314, 'eval_step': 16, 'eval_precision': 0.7989473567090808, 'eval_recall': 0.8135639564070826, 'eval_f1': 0.8061894105153427, 'eval_label_count': 4, 'eval_runtime': 4.8719, 'eval_samples_per_second': 127.671, 'eval_steps_per_second': 2.053, 'epoch': 9.0}
{'loss': 0.0102, 'grad_norm': 0.036630794405937195, 'learning_rate': 3.691902952353113e-06, 'epoch': 9.646302250803858}


02:42:37 INFO:Logged iteration result res.precision=0.8093775536293907 res.recall=0.8117250397806778


{'eval_loss': 1.660742998123169, 'eval_step': 18, 'eval_precision': 0.8093775536293907, 'eval_recall': 0.8117250397806778, 'eval_f1': 0.810549597031228, 'eval_label_count': 4, 'eval_runtime': 4.5929, 'eval_samples_per_second': 135.426, 'eval_steps_per_second': 2.177, 'epoch': 10.0}


02:44:23 INFO:Logged iteration result res.precision=0.8050577289903632 res.recall=0.8133762651366054


{'eval_loss': 1.6525459289550781, 'eval_step': 20, 'eval_precision': 0.8050577289903632, 'eval_recall': 0.8133762651366054, 'eval_f1': 0.8091956189770539, 'eval_label_count': 4, 'eval_runtime': 4.5014, 'eval_samples_per_second': 138.18, 'eval_steps_per_second': 2.222, 'epoch': 11.0}
{'train_runtime': 1118.9819, 'train_samples_per_second': 24.458, 'train_steps_per_second': 3.057, 'train_loss': 0.1395734454832102, 'epoch': 11.0}


02:45:22 INFO:Logged iteration result res.precision=0.8050577289903632 res.recall=0.8133762651366054
02:45:23 INFO:Logged iteration result res.precision=0.8050577289903632 res.recall=0.8133762651366054
02:45:23 INFO:Finished iteration=0
02:45:23 INFO:Logging model artifact (might take a while)
02:49:11 ERROR:API request to https://mlflow-uvl.ifi.uni-heidelberg.de/api/2.0/mlflow-artifacts/artifacts/6/2fd7eb8bb98240a2bc79aaa6986e4487/artifacts/0_model/model.safetensors failed with exception HTTPSConnectionPool(host='mlflow-uvl.ifi.uni-heidelberg.de', port=443): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/6/2fd7eb8bb98240a2bc79aaa6986e4487/artifacts/0_model/model.safetensors (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2384)')))


MlflowException: API request to https://mlflow-uvl.ifi.uni-heidelberg.de/api/2.0/mlflow-artifacts/artifacts/6/2fd7eb8bb98240a2bc79aaa6986e4487/artifacts/0_model/model.safetensors failed with exception HTTPSConnectionPool(host='mlflow-uvl.ifi.uni-heidelberg.de', port=443): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/6/2fd7eb8bb98240a2bc79aaa6986e4487/artifacts/0_model/model.safetensors (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2384)')))

In [15]:
'''
from tooling.observability import get_run_id
from tooling.config import DualModelStagedBERTConfig, StagedBERT

from copy import deepcopy

bert_2_bert_experiment_config = deepcopy(base_experiment_config)
bert_2_bert_experiment_config.name = "Production"

bert_2_bert_config = StagedBERT(
    max_len=123,
    layers=[],
    learning_rate_bert=3e-05,
    learning_rate_classifier=0.16,
    number_epochs=8,
    train_batch_size=8,
    weight_decay=0.1,
    weighted_classes=True,
    validation_batch_size=64,
)

cfg = DualModelStagedBERTConfig(
    bert=bert_2_bert_config,
    experiment=bert_2_bert_experiment_config,
    transformation=label_transformation_config,
    first_model_bert=bert_1_cfg,
)

bert_2_bert_cfg: DualModelStagedBERTConfig = OmegaConf.structured(cfg)

if run_experiments:
    from experiments.dual_model_staged_bert import dual_stage_bert

    dual_stage_bert(OmegaConf.create(bert_2_bert_cfg))

bert_2_bert_run_id = get_run_id(bert_2_bert_cfg, pin_commit=pin_commits)

print(bert_2_bert_run_id)

bert_2_bert_run = mlflow.get_run(bert_2_bert_run_id)
mlflow.artifacts.download_artifacts(
    f"{bert_2_bert_run.info.artifact_uri}/0_model",
    dst_path=Path("./src/service/models/"),
)
try:
    shutil.rmtree(Path("./src/service/models/bert_2_bert"))
except FileNotFoundError:
    pass
Path("./src/service/models/0_model").rename("./src/service/models/bert_2_bert")
'''

02:49:22 INFO:
first_model_bert:
  bert:
    model: bert-large-uncased
    type: BERT
    max_len: 123
    train_batch_size: 8
    validation_batch_size: 64
    number_epochs: 11
    learning_rate_bert: 3.0e-05
    learning_rate_classifier: 0.0005
    weight_decay: 0.01
    weighted_classes: true
  experiment:
    name: Production
    description: ''
    random_state: 125
    folds: 5
    iterations: 1
    average: macro
    dataset: all
    lower_case: true
    force: false
    pin_commit: false
    smote: false
    smote_k_neighbors: 5
    smote_sampling_strategy: not majority
    smote_balance_to_average: false
  transformation:
    description: Levels
    type: Reduced
    task: Domain_Level
    goals: null
    domain_data: Domain_Level
    activity: Domain_Level
    stakeholder: Domain_Level
    system_function: Interaction_Level
    interaction: Interaction_Level
    interaction_data: Domain_Level
    workspace: Interaction_Level
    software: System_Level
    internal_action: Sy

Map:   0%|          | 0/2488 [00:00<?, ? examples/s]

Map:   0%|          | 0/622 [00:00<?, ? examples/s]

02:51:52 INFO:Logged iteration result res.precision=0.6819089064097362 res.recall=0.8139693008422187


{'eval_loss': 0.5252214670181274, 'eval_step': 0, 'eval_precision': 0.6819089064097362, 'eval_recall': 0.8139693008422187, 'eval_f1': 0.7421097694953261, 'eval_label_count': 4, 'eval_runtime': 30.8961, 'eval_samples_per_second': 20.132, 'eval_steps_per_second': 0.324, 'epoch': 1.0}
{'loss': 0.5513, 'grad_norm': 6.163368225097656, 'learning_rate': 2.561531715872552e-05, 'epoch': 1.607717041800643}


02:55:48 INFO:Logged iteration result res.precision=0.6819854791943949 res.recall=0.816880522414842


{'eval_loss': 0.5075342059135437, 'eval_step': 2, 'eval_precision': 0.6819854791943949, 'eval_recall': 0.816880522414842, 'eval_f1': 0.7433628542184961, 'eval_label_count': 4, 'eval_runtime': 12.7502, 'eval_samples_per_second': 48.783, 'eval_steps_per_second': 0.784, 'epoch': 2.0}


02:59:01 INFO:Logged iteration result res.precision=0.7345653655128026 res.recall=0.8148828902452926


{'eval_loss': 0.6649463176727295, 'eval_step': 4, 'eval_precision': 0.7345653655128026, 'eval_recall': 0.8148828902452926, 'eval_f1': 0.7726424498510202, 'eval_label_count': 4, 'eval_runtime': 10.0529, 'eval_samples_per_second': 61.872, 'eval_steps_per_second': 0.995, 'epoch': 3.0}
{'loss': 0.246, 'grad_norm': 8.337251663208008, 'learning_rate': 2.1230634317451038e-05, 'epoch': 3.215434083601286}


03:01:50 INFO:Logged iteration result res.precision=0.7438307465065683 res.recall=0.8224610526881617


{'eval_loss': 0.7602126598358154, 'eval_step': 6, 'eval_precision': 0.7438307465065683, 'eval_recall': 0.8224610526881617, 'eval_f1': 0.7811722172179418, 'eval_label_count': 4, 'eval_runtime': 9.3547, 'eval_samples_per_second': 66.491, 'eval_steps_per_second': 1.069, 'epoch': 4.0}
{'loss': 0.1023, 'grad_norm': 1.979170322418213, 'learning_rate': 1.6845951476176556e-05, 'epoch': 4.823151125401929}


03:04:28 INFO:Logged iteration result res.precision=0.80370847713164 res.recall=0.8025923754966897


{'eval_loss': 1.19338858127594, 'eval_step': 8, 'eval_precision': 0.80370847713164, 'eval_recall': 0.8025923754966897, 'eval_f1': 0.8031500385652393, 'eval_label_count': 4, 'eval_runtime': 12.6699, 'eval_samples_per_second': 49.093, 'eval_steps_per_second': 0.789, 'epoch': 5.0}


03:07:23 INFO:Logged iteration result res.precision=0.8072358208343874 res.recall=0.8005684124172876


{'eval_loss': 1.2415218353271484, 'eval_step': 10, 'eval_precision': 0.8072358208343874, 'eval_recall': 0.8005684124172876, 'eval_f1': 0.8038882920774002, 'eval_label_count': 4, 'eval_runtime': 10.6913, 'eval_samples_per_second': 58.178, 'eval_steps_per_second': 0.935, 'epoch': 6.0}
{'loss': 0.0451, 'grad_norm': 0.20919625461101532, 'learning_rate': 1.2461268634902075e-05, 'epoch': 6.430868167202572}


03:11:40 INFO:Logged iteration result res.precision=0.8020785151963441 res.recall=0.8042910763600086


{'eval_loss': 1.3368412256240845, 'eval_step': 12, 'eval_precision': 0.8020785151963441, 'eval_recall': 0.8042910763600086, 'eval_f1': 0.8031832720233293, 'eval_label_count': 4, 'eval_runtime': 10.323, 'eval_samples_per_second': 60.254, 'eval_steps_per_second': 0.969, 'epoch': 7.0}


03:16:15 INFO:Logged iteration result res.precision=0.7978905381131066 res.recall=0.801171775113673


{'eval_loss': 1.4603686332702637, 'eval_step': 14, 'eval_precision': 0.7978905381131066, 'eval_recall': 0.801171775113673, 'eval_f1': 0.7995277901041035, 'eval_label_count': 4, 'eval_runtime': 10.6033, 'eval_samples_per_second': 58.661, 'eval_steps_per_second': 0.943, 'epoch': 8.0}
{'loss': 0.0249, 'grad_norm': 3.590575933456421, 'learning_rate': 8.076585793627593e-06, 'epoch': 8.038585209003216}


03:20:37 INFO:Logged iteration result res.precision=0.8057829846779383 res.recall=0.7988204940197272


{'eval_loss': 1.5697133541107178, 'eval_step': 16, 'eval_precision': 0.8057829846779383, 'eval_recall': 0.7988204940197272, 'eval_f1': 0.8022866339733274, 'eval_label_count': 4, 'eval_runtime': 8.5961, 'eval_samples_per_second': 72.358, 'eval_steps_per_second': 1.163, 'epoch': 9.0}
{'loss': 0.0118, 'grad_norm': 0.05620058253407478, 'learning_rate': 3.691902952353113e-06, 'epoch': 9.646302250803858}


03:23:56 INFO:Logged iteration result res.precision=0.8018761819997918 res.recall=0.8040091055955558


{'eval_loss': 1.6771281957626343, 'eval_step': 18, 'eval_precision': 0.8018761819997918, 'eval_recall': 0.8040091055955558, 'eval_f1': 0.8029412273319085, 'eval_label_count': 4, 'eval_runtime': 10.7302, 'eval_samples_per_second': 57.967, 'eval_steps_per_second': 0.932, 'epoch': 10.0}


03:26:36 INFO:Logged iteration result res.precision=0.8044535315969952 res.recall=0.8009834272072174


{'eval_loss': 1.7390738725662231, 'eval_step': 20, 'eval_precision': 0.8044535315969952, 'eval_recall': 0.8009834272072174, 'eval_f1': 0.8027147291382264, 'eval_label_count': 4, 'eval_runtime': 29.1447, 'eval_samples_per_second': 21.342, 'eval_steps_per_second': 0.343, 'epoch': 11.0}
{'train_runtime': 2244.684, 'train_samples_per_second': 12.192, 'train_steps_per_second': 1.524, 'train_loss': 0.14421657963964332, 'epoch': 11.0}


03:28:12 INFO:Logged iteration result res.precision=0.8044535315969952 res.recall=0.8009834272072174
03:28:14 INFO:Logged iteration result res.precision=0.8044535315969952 res.recall=0.8009834272072174
03:28:14 INFO:Finished iteration=0
03:28:14 INFO:Logging model artifact (might take a while)
03:32:23 ERROR:API request to https://mlflow-uvl.ifi.uni-heidelberg.de/api/2.0/mlflow-artifacts/artifacts/6/ea07e5979b2142128114559b5c743eb4/artifacts/0_model/model.safetensors failed with exception HTTPSConnectionPool(host='mlflow-uvl.ifi.uni-heidelberg.de', port=443): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/6/ea07e5979b2142128114559b5c743eb4/artifacts/0_model/model.safetensors (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2384)')))
03:32:25 ERROR:API request to https://mlflow-uvl.ifi.uni-heidelberg.de/api/2.0/mlflow-artifacts/artifacts/6/ea07e5979b2142128114559b5c743eb4/artifacts/0_model/model.safetensors failed with exception HTTPS

MlflowException: API request to https://mlflow-uvl.ifi.uni-heidelberg.de/api/2.0/mlflow-artifacts/artifacts/6/ea07e5979b2142128114559b5c743eb4/artifacts/0_model/model.safetensors failed with exception HTTPSConnectionPool(host='mlflow-uvl.ifi.uni-heidelberg.de', port=443): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/6/ea07e5979b2142128114559b5c743eb4/artifacts/0_model/model.safetensors (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2384)')))

## Train BERT Second Stage Model for SNER First Stage


In [None]:
'''
from tooling.observability import get_run_id
from tooling.config import DualModelStagedBERTConfig, StagedBERT

from copy import deepcopy

import os

os.environ["TOKENIZERS_PARALLELISM"] = "true"

bert_2_sner_experiment_config = deepcopy(base_experiment_config)
bert_2_sner_experiment_config.name = "Production"

bert_2_sner_config = StagedBERT(
    max_len=123,
    layers=[],
    learning_rate_bert=3e-05,
    learning_rate_classifier=0.16,
    number_epochs=5,
    train_batch_size=8,
    weight_decay=0.1,
    weighted_classes=True,
)

bert_2_sner_cfg = OmegaConf.structured(
    DualModelStagedBERTConfig(
        bert=bert_2_sner_config,
        experiment=bert_2_sner_experiment_config,
        transformation=label_transformation_config,
        first_model_sner=sner_cfg,
    )
)

if run_experiments:
    from experiments.dual_model_staged_bert import dual_stage_bert

    dual_stage_bert(OmegaConf.create(bert_2_sner_cfg))

bert_2_sner_run_id = get_run_id(bert_2_sner_cfg, pin_commit=pin_commits)

print(bert_2_sner_run_id)

bert_2_sner_run = mlflow.get_run(bert_2_sner_run_id)
mlflow.artifacts.download_artifacts(
    f"{bert_2_sner_run.info.artifact_uri}/0_model",
    dst_path=Path("./src/service/models/"),
)
try:
    shutil.rmtree(Path("./src/service/models/bert_2_sner"))
except FileNotFoundError:
    pass

Path("./src/service/models/0_model").rename("./src/service/models/bert_2_sner")
'''

## Train BERT Second Stage Model for BILSTM First Stage


In [None]:
'''
from tooling.observability import get_run_id
from tooling.config import DualModelStagedBERTConfig, StagedBERT

from copy import deepcopy

bilstm_config = BiLSTM(
    batch_size=16,
    learning_rate=0.006,
    number_epochs=5,
    weighted_classes=False,
    sentence_length=106,
)


bert_2_bilstm_experiment_config = deepcopy(base_experiment_config)
bert_2_bilstm_experiment_config.name = "Production"

bert_2_bilstm_config = StagedBERT(
    max_len=123,
    layers=[],
    learning_rate_bert=3e-05,
    learning_rate_classifier=0.16,
    number_epochs=5,
    train_batch_size=8,
    weight_decay=0.1,
    weighted_classes=True,
)

bert_2_bilstm_cfg = OmegaConf.structured(
    DualModelStagedBERTConfig(
        bert=bert_2_bilstm_config,
        experiment=bert_2_bilstm_experiment_config,
        transformation=label_transformation_config,
        first_model_bilstm=bilstm_cfg,
    )
)
bert_2_bilstm_cfg.first_model_bilstm.bilstm.sentence_length = 106


if run_experiments:
    from experiments.dual_model_staged_bert import dual_stage_bert

    dual_stage_bert(OmegaConf.create(bert_2_bilstm_cfg))

bert_2_bilstm_run_id = get_run_id(bert_2_bilstm_cfg, pin_commit=pin_commits)

print(bert_2_bilstm_run_id)

bert_2_bilstm_run = mlflow.get_run(bert_2_bilstm_run_id)
mlflow.artifacts.download_artifacts(
    f"{bert_2_bilstm_run.info.artifact_uri}/0_model",
    dst_path=Path("./src/service/models/"),
)
try:
    shutil.rmtree(Path("./src/service/models/bert_2_bilstm"))
except FileNotFoundError:
    pass
Path("./src/service/models/0_model").rename(
    "./src/service/models/bert_2_bilstm"
)
'''

## Train BERT E2E Model


In [7]:
from tooling.observability import get_run_id
from tooling.config import BERTConfig, BERT

from copy import deepcopy

bert_experiment_config = deepcopy(base_experiment_config)
bert_experiment_config.name = "Production"

bert_config = BERT(
    learning_rate_bert=6e-05,
    learning_rate_classifier=0.1,
    max_len=123,
    number_epochs=1 ,
    train_batch_size=8,
    weight_decay=0.01,
    weighted_classes=False,
)

bert_cfg = OmegaConf.structured(
    BERTConfig(
        bert=bert_config,
        experiment=bert_experiment_config,
        transformation=label_transformation_config,
    )
)

if run_experiments:
    from experiments.bert import bert

    bert(OmegaConf.create(bert_cfg))

bert_run_id = get_run_id(bert_cfg, pin_commit=pin_commits)

print(bert_run_id)

run = mlflow.get_run(bert_run_id)
mlflow.artifacts.download_artifacts(
    f"{run.info.artifact_uri}/0_model", dst_path=Path("./src/service/models/")
)
try:
    shutil.rmtree(Path("./src/service/models/bert"))
except FileNotFoundError:
    pass
Path("./src/service/models/0_model").rename("./src/service/models/bert")

06:09:31 INFO:PyTorch version 2.3.0+cu118 available.
06:09:31 INFO:TensorFlow version 2.15.1 available.

06:09:35 INFO:
bert:
  model: bert-large-uncased
  type: BERT
  max_len: 123
  train_batch_size: 8
  validation_batch_size: 32
  number_epochs: 1
  learning_rate_bert: 6.0e-05
  learning_rate_classifier: 0.1
  weight_decay: 0.01
  weighted_classes: false
experiment:
  name: Production
  description: ''
  random_state: 125
  folds: 5
  iterations: 1
  average: macro
  dataset: all
  lower_case: true
  force: false
  pin_commit: false
  smote: false
  smote_k_neighbors: 5
  smote_sampling_strategy: not majority
  smote_balance_to_average: false
transformation:
  description: None
  type: Full
  task: Task
  goals: null
  domain_data: Domain_Data
  activity: Activity
  stakeholder: Stakeholder
  system_function: System_Function
  interaction: Interaction
  interaction_data: Interaction_Data
  workspace: Workspace
  software: System_Level
  internal_action: System_Level
  internal_data:

MlflowException: API request to https://mlflow-uvl.ifi.uni-heidelberg.de/api/2.0/mlflow-artifacts/artifacts/6/9f0a9d8307da45f29d017f024a5348c4/artifacts/imported_dataset.csv failed with exception HTTPSConnectionPool(host='mlflow-uvl.ifi.uni-heidelberg.de', port=443): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/6/9f0a9d8307da45f29d017f024a5348c4/artifacts/imported_dataset.csv (Caused by ResponseError('too many 500 error responses'))

In [None]:
##Train RoBERTa E2E Model


In [14]:
'''
from tooling.observability import get_run_id
from tooling.config import RoBERTaConfig, RoBERTa

from copy import deepcopy

roberta_experiment_config = deepcopy(base_experiment_config)
roberta_experiment_config.name = "Production"

roberta_config = RoBERTa(
    learning_rate_roberta=6e-05,
    learning_rate_classifier=0.1,
    max_len=123,
    number_epochs=11,
    train_batch_size=8,
    weight_decay=0.01,
    weighted_classes=False,
)

roberta_cfg = OmegaConf.structured(
    RoBERTaConfig(
        roberta=roberta_config,
        experiment=roberta_experiment_config,
        transformation=label_transformation_config,
    )
)

if run_experiments:
    from experiments.roberta import roberta

    roberta(OmegaConf.create(roberta_cfg))

roberta_run_id = get_run_id(roberta_cfg, pin_commit=pin_commits)

print(roberta_run_id)

run = mlflow.get_run(roberta_run_id)
mlflow.artifacts.download_artifacts(
    f"{run.info.artifact_uri}/0_model", dst_path=Path("./src/service/models/")
)
try:
    shutil.rmtree(Path("./src/service/models/roberta"))
except FileNotFoundError:
    pass
Path("./src/service/models/0_model").rename("./src/service/models/roberta")
'''

01:44:16 INFO:
roberta:
  model: Jean-Baptiste/roberta-large-ner-english
  type: RoBERTa
  max_len: 123
  train_batch_size: 8
  validation_batch_size: 32
  number_epochs: 11
  learning_rate_roberta: 6.0e-05
  learning_rate_classifier: 0.1
  weight_decay: 0.01
  weighted_classes: false
experiment:
  name: Production
  description: ''
  random_state: 125
  folds: 5
  iterations: 1
  average: macro
  dataset: all
  lower_case: true
  force: false
  pin_commit: false
  smote: false
  smote_k_neighbors: 5
  smote_sampling_strategy: not majority
transformation:
  description: None
  type: Full
  task: Task
  goals: null
  domain_data: Domain_Data
  activity: Activity
  stakeholder: Stakeholder
  system_function: System_Function
  interaction: Interaction
  interaction_data: Interaction_Data
  workspace: Workspace
  software: System_Level
  internal_action: System_Level
  internal_data: System_Level
  system_level: System_Level

01:44:16 INFO:New experiment. Running
01:44:16 INFO:Entering mlf

Map:   0%|          | 0/2488 [00:00<?, ? examples/s]

Map:   0%|          | 0/622 [00:00<?, ? examples/s]

01:45:21 INFO:Logged iteration result res.precision=0.5793384603014975 res.recall=0.5294931132606071


{'eval_loss': 0.606141209602356, 'eval_step': 0, 'eval_precision': 0.5793384603014975, 'eval_recall': 0.5294931132606071, 'eval_f1': 0.5532954369096804, 'eval_label_count': 10, 'eval_runtime': 3.4457, 'eval_samples_per_second': 180.515, 'eval_steps_per_second': 5.804, 'epoch': 1.0}
{'loss': 1.2792, 'grad_norm': 15.624954223632812, 'learning_rate': 5.123063431745104e-05, 'epoch': 1.607717041800643}


01:46:59 INFO:Logged iteration result res.precision=0.6080609820590207 res.recall=0.4686796187106605


{'eval_loss': 0.6071531176567078, 'eval_step': 2, 'eval_precision': 0.6080609820590207, 'eval_recall': 0.4686796187106605, 'eval_f1': 0.5293490168765562, 'eval_label_count': 10, 'eval_runtime': 3.4508, 'eval_samples_per_second': 180.249, 'eval_steps_per_second': 5.796, 'epoch': 2.0}


01:48:43 INFO:Logged iteration result res.precision=0.6848862486107135 res.recall=0.5565420222272148


{'eval_loss': 0.4957864582538605, 'eval_step': 4, 'eval_precision': 0.6848862486107135, 'eval_recall': 0.5565420222272148, 'eval_f1': 0.6140797446801176, 'eval_label_count': 10, 'eval_runtime': 3.4562, 'eval_samples_per_second': 179.968, 'eval_steps_per_second': 5.787, 'epoch': 3.0}
{'loss': 0.6808, 'grad_norm': 13.241116523742676, 'learning_rate': 4.2461268634902076e-05, 'epoch': 3.215434083601286}


01:50:20 INFO:Logged iteration result res.precision=0.6507461365221903 res.recall=0.6693099147848478


{'eval_loss': 0.4820605218410492, 'eval_step': 6, 'eval_precision': 0.6507461365221903, 'eval_recall': 0.6693099147848478, 'eval_f1': 0.6598974956419169, 'eval_label_count': 10, 'eval_runtime': 3.46, 'eval_samples_per_second': 179.767, 'eval_steps_per_second': 5.78, 'epoch': 4.0}
{'loss': 0.5049, 'grad_norm': 7.3944220542907715, 'learning_rate': 3.369190295235311e-05, 'epoch': 4.823151125401929}


01:51:56 INFO:Logged iteration result res.precision=0.6895372990852613 res.recall=0.6271393081541824


{'eval_loss': 0.5652507543563843, 'eval_step': 8, 'eval_precision': 0.6895372990852613, 'eval_recall': 0.6271393081541824, 'eval_f1': 0.6568597669574816, 'eval_label_count': 10, 'eval_runtime': 3.4651, 'eval_samples_per_second': 179.504, 'eval_steps_per_second': 5.772, 'epoch': 5.0}


01:53:34 INFO:Logged iteration result res.precision=0.6742795448771706 res.recall=0.6181710754031103


{'eval_loss': 0.6192948222160339, 'eval_step': 10, 'eval_precision': 0.6742795448771706, 'eval_recall': 0.6181710754031103, 'eval_f1': 0.6450074066096988, 'eval_label_count': 10, 'eval_runtime': 3.5492, 'eval_samples_per_second': 175.249, 'eval_steps_per_second': 5.635, 'epoch': 6.0}
{'loss': 0.324, 'grad_norm': 18.10057258605957, 'learning_rate': 2.492253726980415e-05, 'epoch': 6.430868167202572}


01:55:12 INFO:Logged iteration result res.precision=0.6873868514132714 res.recall=0.660922580138302


{'eval_loss': 0.5512689352035522, 'eval_step': 12, 'eval_precision': 0.6873868514132714, 'eval_recall': 0.660922580138302, 'eval_f1': 0.6738949988155228, 'eval_label_count': 10, 'eval_runtime': 3.4693, 'eval_samples_per_second': 179.286, 'eval_steps_per_second': 5.765, 'epoch': 7.0}


01:56:43 INFO:Logged iteration result res.precision=0.6969054031299542 res.recall=0.6630879913521743


{'eval_loss': 0.5906326770782471, 'eval_step': 14, 'eval_precision': 0.6969054031299542, 'eval_recall': 0.6630879913521743, 'eval_f1': 0.6795762476477104, 'eval_label_count': 10, 'eval_runtime': 3.4785, 'eval_samples_per_second': 178.813, 'eval_steps_per_second': 5.75, 'epoch': 8.0}
{'loss': 0.2004, 'grad_norm': 7.203489780426025, 'learning_rate': 1.6153171587255187e-05, 'epoch': 8.038585209003216}


01:58:20 INFO:Logged iteration result res.precision=0.6929995630182143 res.recall=0.6776944016612059


{'eval_loss': 0.7513129711151123, 'eval_step': 16, 'eval_precision': 0.6929995630182143, 'eval_recall': 0.6776944016612059, 'eval_f1': 0.6852615336654618, 'eval_label_count': 10, 'eval_runtime': 3.5576, 'eval_samples_per_second': 174.838, 'eval_steps_per_second': 5.622, 'epoch': 9.0}
{'loss': 0.0924, 'grad_norm': 11.683978080749512, 'learning_rate': 7.383805904706226e-06, 'epoch': 9.646302250803858}


02:00:00 INFO:Logged iteration result res.precision=0.6888084658467657 res.recall=0.6815572697029866


{'eval_loss': 0.9262818694114685, 'eval_step': 18, 'eval_precision': 0.6888084658467657, 'eval_recall': 0.6815572697029866, 'eval_f1': 0.6851636831717619, 'eval_label_count': 10, 'eval_runtime': 3.6713, 'eval_samples_per_second': 169.423, 'eval_steps_per_second': 5.448, 'epoch': 10.0}


02:01:44 INFO:Logged iteration result res.precision=0.6904512276980231 res.recall=0.6926464362913085


{'eval_loss': 0.9886234998703003, 'eval_step': 20, 'eval_precision': 0.6904512276980231, 'eval_recall': 0.6926464362913085, 'eval_f1': 0.6915470899120589, 'eval_label_count': 10, 'eval_runtime': 3.5215, 'eval_samples_per_second': 176.631, 'eval_steps_per_second': 5.679, 'epoch': 11.0}
{'train_runtime': 1083.2292, 'train_samples_per_second': 25.265, 'train_steps_per_second': 3.158, 'train_loss': 0.45614679554546483, 'epoch': 11.0}


02:02:35 INFO:Logged iteration result res.precision=0.6904512276980231 res.recall=0.6926464362913085
02:02:35 INFO:Logged iteration result res.precision=0.6904512276980231 res.recall=0.6926464362913085
02:02:35 INFO:Finished iteration=0
02:02:35 INFO:Logging model artifact (might take a while)
02:02:38 INFO:Breaking early after iteration=0 of 5 folds
02:02:38 INFO:Logged experiment result res.mean_precision=0.6904512276980231 res.mean_recall=0.6926464362913085
02:02:39 INFO:Left mlflow context


73c3721d2c3e41e6b0539148725132df


Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

WindowsPath('src/service/models/roberta')