In [1]:
'''
Imports
'''

# General
import json
import numpy as np
from collections import Counter
import pandas as pd

try:
  import wandb
except:
  ! pip install wandb
  import wandb

try:
  from datasets import Dataset
except:
  ! pip install datasets
  from datasets import Dataset

# Metrics
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score as sk_precision_score
from sklearn.metrics import recall_score as sk_recall_score
from sklearn.metrics import f1_score as sk_f1_score

try:
  from seqeval.metrics import classification_report as seqeval_classification_report
  from seqeval.metrics import f1_score, precision_score, recall_score
except:
  ! pip install seqeval
  from seqeval.metrics import classification_report as seqeval_classification_report
  from seqeval.metrics import f1_score, precision_score, recall_score

# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset

# Transformers
try:
    from transformers import (
        DataCollatorForTokenClassification,
        DistilBertConfig,
        DistilBertForTokenClassification,
        DistilBertTokenizerFast,
        Trainer,
        TrainingArguments,
    )
except:
    ! pip install transformers
    from transformers import (
        DataCollatorForTokenClassification,
        DistilBertConfig,
        DistilBertForTokenClassification,
        DistilBertTokenizerFast,
        Trainer,
        TrainingArguments,
    )

! pip install accelerate



In [2]:
import os
#os.environ["WANDB_MODE"] = "offline"
from google.colab import drive
if "COLAB_GPU" in os.environ:
        drive.mount('/content/drive')

output_dir = "/content/drive/MyDrive/distilbert_outputs"
os.makedirs(output_dir, exist_ok=True)


Mounted at /content/drive


In [3]:
"""
    Preprocess a list of email chains for token classification with BIO tagging.

    For each email chain, this function:
    - Concatenates all email bodies into a single string.
    - Extracts entity values from the tenant profile.
    - Tokenizes the email text.
    - Labels each token using the BIO format based on entity spans.
    - Converts labels to IDs using the provided label2id mapping.
    - Masks padding token positions with -100 for loss computation.

    Args:
        data (list): List of email chains, each as a dictionary with an email_chain and tenant_profile.
        tokenizer (PreTrainedTokenizer): Hugging Face tokenizer.
        label2id (dict): Mapping from BIO label strings to integer IDs.
        max_length (int, optional): Maximum sequence length for tokenization. Defaults to 512.

    Returns:
        A tuple of two lists:
            - List of tokenized input dictionaries (with input_ids, attention_mask, etc.)
            - List of corresponding label ID sequences
"""

def preprocess_for_token_classification(data, tokenizer, label2id, max_length=512):
    inputs, all_labels = [], []

    for item in data:
        email_text = " ".join(email["body"].replace("\n", " ") for email in item["email_chain"])
        email_text_lower = email_text.lower()

        profile = item.get("tenant_profile", {})
        prefs = profile.get("Property Preferences", {})

        # Get all entities
        raw_entities = {
            "FirstName": profile.get("Tenant Representative Details", {}).get("First Name", ""),
            "LastName": profile.get("Tenant Representative Details", {}).get("Last Name", ""),
            "Email": profile.get("Tenant Representative Details", {}).get("Email", ""),
            "Phone": profile.get("Tenant Representative Details", {}).get("Phone", ""),
            "CompanyName": profile.get("Company Details", {}).get("Company Name", ""),
            "Industry": profile.get("Company Details", {}).get("Industry", ""),
            "CompanySize": profile.get("Company Details", {}).get("Company Size", ""),
            "GrowthStage": profile.get("Company Details", {}).get("Growth Stage", ""),
            "CurrentNeighborhood": profile.get("Company Details", {}).get("Current Neighborhood", []),
            "FirstInteraction": profile.get("First Interaction", ""),
            "LastInteraction": profile.get("Last Interaction", ""),
            "DecisionMakerRole": profile.get("Decision-Maker Role", ""),
            "PropertyType": prefs.get("Property Type", ""),
            "PreferredNeighborhood": prefs.get("Preferred Neighborhood", []),
            "Budget": prefs.get("Estimated or Stated Budget", ""),
            "MustHaves": prefs.get("Must-Haves", []),
            "NiceToHaves": prefs.get("Nice-to-Haves", []),
            "SpaceSize": prefs.get("Space Size", ""),
            "PreferredLeaseTerm": prefs.get("Preferred Lease Term", ""),
            "MovingTerm": prefs.get("Moving Term", ""),
            "min_months": prefs.get("Moving Timing", {}).get("min_months", ""),
            "max_months": prefs.get("Moving Timing", {}).get("max_months", ""),
            "MovingTimeline": profile.get("Moving Timeline", ""),
            "PainPoint": profile.get("Pain Points", []),
            "UrgencyScore": str(profile.get("Urgency Score", "")),
            "Outcome": profile.get("Outcome", ""),
            "Personality": profile.get("Tenant Personality", "")
        }

        # Normalize all values to a list of values
        entities = {}
        for key, val in raw_entities.items():
            if isinstance(val, list):
                entities[key] = [str(v).strip() for v in val if v]
            elif val:
                entities[key] = [str(val).strip()]
            else:
                entities[key] = []

        # Tokenize input
        encoding = tokenizer(
            email_text,
            truncation=True,
            padding="max_length",
            max_length=max_length,
            return_offsets_mapping=True
        )
        offset_mapping = encoding.pop("offset_mapping")
        input_ids = encoding["input_ids"]

        # Set default label to "O"
        label_list = ["O"] * len(input_ids)

        # Tag tokens with BIO
        for entity_type, values in entities.items():
            for value in values:
                start = email_text_lower.find(value.lower())
                if start == -1:
                    continue
                end = start + len(value)

                inside = False
                for i, (token_start, token_end) in enumerate(offset_mapping):
                    if token_start == 0 and token_end == 0:
                        continue
                    if token_start >= end or token_end <= start:
                        continue
                    tag = f"I-{entity_type}" if inside else f"B-{entity_type}"
                    label_list[i] = tag
                    inside = True

        # Convert tags to IDs and mask padding
        label_ids = [label2id.get(tag, label2id["O"]) for tag in label_list]
        label_ids = [
            label_id if input_id != tokenizer.pad_token_id else -100
            for label_id, input_id in zip(label_ids, input_ids)
        ]

        inputs.append(encoding)
        all_labels.append(label_ids)

    return inputs, all_labels


In [4]:
"""
    Creates a PyTorch Dataset from the pre-tokenized input encodings and their corresponding label sequences.

    Args:
        encodings (list of dict): List of dictionaries containing tokenized inputs.
        labels (list of list): List of label ID sequences aligned with the tokenized inputs.

    Methods:
        __getitem__(index): Returns a single item from the dataset as a dictionary
                          with input tensors and corresponding label tensor.
        __len__(): Returns the number of items in the dataset.
"""
class EmailNERDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, index):
        item = {
            key: torch.tensor(val)
            for key, val in self.encodings[index].items()
        }
        item["labels"] = torch.tensor(self.labels[index])
        return item

    def __len__(self):
        return len(self.labels)


In [5]:
'''
Define tokenizer and import pre-trained model
'''
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [6]:
'''
Select ENTITY_TAGS for BIO tagging, having "O" as the default tag
'''

ENTITY_TAGS = [
    "O",
    "B-FirstName", "I-FirstName",
    "B-LastName", "I-LastName",
    "B-Email", "I-Email",
    "B-Phone", "I-Phone",
    "B-CompanyName", "I-CompanyName",
    "B-Industry", "I-Industry",
    "B-CompanySize", "I-CompanySize",
    "B-GrowthStage", "I-GrowthStage",
    "B-CurrentNeighborhood", "I-CurrentNeighborhood",
    "B-FirstInteraction", "I-FirstInteraction",
    "B-LastInteraction", "I-LastInteraction",
    "B-DecisionMakerRole", "I-DecisionMakerRole",
    "B-PropertyType", "I-PropertyType",
    "B-PreferredNeighborhood", "I-PreferredNeighborhood",
    "B-Budget", "I-Budget",
    "B-SpaceSize", "I-SpaceSize",
    "B-PreferredLeaseTerm", "I-PreferredLeaseTerm",
    "B-MovingTerm", "I-MovingTerm",
    "B-min_months", "I-min_months",
    "B-max_months", "I-max_months",
    "B-MovingTimeline", "I-MovingTimeline",
    "B-MustHaves", "I-MustHaves",
    "B-NiceToHaves", "I-NiceToHaves",
    "B-PainPoint", "I-PainPoint",
    "B-UrgencyScore", "I-UrgencyScore",
    "B-Outcome", "I-Outcome",
    "B-Personality", "I-Personality"
]

# Define label to id mappings
label2id = {label: i for i, label in enumerate(ENTITY_TAGS)}
id2label = {i: label for label, i in label2id.items()}

# Set inicial label weights
label_weights = torch.tensor([0.05] + [1.0] * (len(ENTITY_TAGS) - 1), dtype=torch.float)

In [7]:
'''
Initialize model
'''

model = DistilBertForTokenClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=len(ENTITY_TAGS),
    id2label=id2label,
    label2id=label2id
)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
'''
Import data and preprocess it
'''

with open("train.json") as f:  # Change path according to the file location
    train_data = json.load(f)
with open("val.json") as f:  # Change path according to the file location
    val_data = json.load(f)
with open("test.json") as f:  # Change path according to the file location
    test_data = json.load(f)

'''
For running with github repo.:
with open("../3. Data Split/train.json") as f:
    train_data = json.load(f)
with open("../3. Data Split/val.json") as f:
    val_data = json.load(f)
with open("../3. Data Split/test.json") as f:
    test_data = json.load(f)
'''

# Tokenize
train_encodings, train_labels = preprocess_for_token_classification(train_data, tokenizer, label2id)
val_encodings, val_labels = preprocess_for_token_classification(val_data, tokenizer, label2id)
test_encodings, test_labels = preprocess_for_token_classification(test_data, tokenizer, label2id)

# Create Datasets
train_data = EmailNERDataset(train_encodings, train_labels)
val_data = EmailNERDataset(val_encodings, val_labels)
test_data = EmailNERDataset(test_encodings, test_labels)


In [9]:
# Check tokenized outputs
for name, dataset in zip(["Validation", "Test", "Train"], [val_data, test_data, train_data]):
  print(f"\nLabel distribution for {name} dataset:")
  all_labels = sum([item['labels'].tolist() for item in dataset], [])
  label_counts = Counter(all_labels)
  print({(id2label[k] if k != -100 else 'IGNORED'): v for k, v in label_counts.items()})


Label distribution for Validation dataset:
{'O': 12598, 'B-FirstName': 30, 'B-CompanyName': 25, 'I-CompanyName': 37, 'B-MustHaves': 60, 'I-MustHaves': 60, 'B-UrgencyScore': 21, 'IGNORED': 2406, 'B-LastName': 12, 'B-Industry': 13, 'B-CurrentNeighborhood': 2, 'I-CurrentNeighborhood': 2, 'B-PropertyType': 27, 'B-PreferredNeighborhood': 9, 'I-PreferredNeighborhood': 8, 'B-NiceToHaves': 10, 'I-NiceToHaves': 13, 'B-PreferredLeaseTerm': 4, 'I-PreferredLeaseTerm': 5, 'B-Personality': 2, 'B-DecisionMakerRole': 7, 'I-FirstName': 2, 'B-Outcome': 2, 'B-SpaceSize': 1, 'I-SpaceSize': 4}

Label distribution for Test dataset:
{'O': 12209, 'B-FirstName': 30, 'B-CompanyName': 26, 'I-CompanyName': 38, 'B-PreferredNeighborhood': 13, 'I-PreferredNeighborhood': 12, 'B-PropertyType': 26, 'B-MustHaves': 49, 'I-MustHaves': 60, 'B-UrgencyScore': 20, 'IGNORED': 2818, 'B-PreferredLeaseTerm': 3, 'I-PreferredLeaseTerm': 3, 'B-LastName': 8, 'B-NiceToHaves': 10, 'I-NiceToHaves': 10, 'B-Industry': 12, 'B-DecisionMake

In [10]:
"""
  A subclass of DistilBertForTokenClassification that incorporates class weights into the loss function.

  This model is designed for token classification tasks where class imbalance may exist.
  It uses a cross-entropy loss function with class weights and `ignore_index=-100`
  to ignore padding tokens.

  Args:
      - config: Model configuration.
      - class_weights (torch.Tensor, optional): 1D tensor of weights for each class. If None, standard (unweighted) loss is used.
"""

class WeightedDistilBertForTokenClassification(DistilBertForTokenClassification):
    def __init__(self, config, class_weights=None):
        super().__init__(config)
        self.class_weights = class_weights

    def forward(self, input_ids=None, attention_mask=None, num_items_in_batch=None, labels=None, **kwargs):
        '''
          Forward Args:
              - input_ids (torch.LongTensor): Token IDs with shape (batch_size, sequence_length).
              - attention_mask (torch.LongTensor): Mask to avoid performing attention on padding tokens.
              - labels (torch.LongTensor, optional): Token-level labels for computing the loss.
              - num_items_in_batch (optional): Unused placeholder for potential logging/debugging.
              - **kwargs: Additional keyword arguments for base model.

          Returns:
              - dict: A dictionary with:
                  - "loss" (torch.FloatTensor, optional): The weighted cross-entropy loss, if labels are provided.
                  - "logits" (torch.FloatTensor): The predicted logits of shape (batch_size, sequence_length, num_labels).
        '''
        outputs = super().forward(input_ids=input_ids, attention_mask=attention_mask, labels=None, **kwargs)
        logits = outputs.logits
        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss(weight=self.class_weights.to(logits.dtype).to(logits.device), ignore_index=-100)
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        return {"loss": loss, "logits": logits} if loss is not None else {"logits": logits}


In [11]:
'''
Initialize model
'''
# Load configuration with entities, labels and label ids
config = DistilBertConfig.from_pretrained(
    "distilbert-base-uncased",
    num_labels=len(ENTITY_TAGS),
    id2label=id2label,
    label2id=label2id
)

# Load model using weighted class
model = WeightedDistilBertForTokenClassification.from_pretrained(
    "distilbert-base-uncased",
    config=config,
    class_weights=label_weights
)

Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
'''
Define metrics for model evaluation
'''

def compute_metrics(p):
    predictions, labels = p
    preds = predictions.argmax(axis=-1)

    true_labels = []
    pred_labels = []

    for pred, label in zip(preds, labels):
        true_seq = []
        pred_seq = []
        for p_, l_ in zip(pred, label):
            if l_ != -100: # Ignore padding
                true_seq.append(id2label[l_])
                pred_seq.append(id2label[p_])
        true_labels.append(true_seq)
        pred_labels.append(pred_seq)

    return {
        "precision": precision_score(true_labels, pred_labels, zero_division=1),
        "recall": recall_score(true_labels, pred_labels, zero_division=1),
        "f1": f1_score(true_labels, pred_labels, zero_division=1)
    }


In [13]:
"""
Extract contiguous entities from BIO-tagged tokens.

Args:
    tokens (List[str]): List of tokens.
    labels (List[str]): Corresponding BIO labels (e.g., B-Budget, I-Budget, O).

Returns:
    Dict[str, str]: A mapping from field name to extracted string value.
"""
def extract_entities_from_tokens(tokens, labels):

    entity_dict = {}
    current_entity = None
    current_tokens = []

    for token, label in zip(tokens, labels):
        if label == "O" or token in tokenizer.all_special_tokens:
            if current_entity:
                value = tokenizer.convert_tokens_to_string(current_tokens).replace(" ##", "")
                entity_dict.setdefault(current_entity.lower(), value.strip())
                current_entity, current_tokens = None, []
            continue

        tag, field = label.split("-", 1)

        if tag == "B":
            if current_entity:
                value = tokenizer.convert_tokens_to_string(current_tokens).replace(" ##", "")
                entity_dict.setdefault(current_entity.lower(), value.strip())
            current_entity = field
            current_tokens = [token]
        elif tag == "I" and field == current_entity:
            current_tokens.append(token)
        else:
            if current_entity:
                value = tokenizer.convert_tokens_to_string(current_tokens).replace(" ##", "")
                entity_dict.setdefault(current_entity.lower(), value.strip())
            current_entity = None
            current_tokens = []

    if current_entity and current_tokens:
        value = tokenizer.convert_tokens_to_string(current_tokens).replace(" ##", "")
        entity_dict.setdefault(current_entity.lower(), value.strip())

    return entity_dict


In [14]:
"""
Convert BERT predictions on token-level to structured tenant profiles.

Args:
    dataset: EmailNERDataset instance
    predictions: raw logits from trainer.predict()
    id2label: mapping from label id to BIO label
    tokenizer: tokenizer used for encoding

Returns:
    List[Dict]: structured tenant profiles
"""
def convert_predictions_to_profiles(dataset, predictions, id2label, tokenizer):

    predicted_ids = predictions.argmax(axis=-1)
    profiles = []

    for i in range(len(dataset)):
        input_ids = dataset[i]["input_ids"]
        tokens = tokenizer.convert_ids_to_tokens(input_ids)
        label_ids = predicted_ids[i]


        tokens_filtered = []
        labels_filtered = []
        for token, label_id in zip(tokens, label_ids):
            if label_id == -100 or token == tokenizer.pad_token: # Ignore padding
                continue
            tokens_filtered.append(token)
            labels_filtered.append(id2label[label_id])

        entities = extract_entities_from_tokens(tokens_filtered, labels_filtered)

        # Fill with all expected keys
        structured_profile = {
            key.lower(): entities.get(key.lower(), None)
            for key in [
                "FirstName", "LastName", "Email", "Phone",
                "CompanyName", "Industry", "CompanySize", "GrowthStage",
                "CurrentNeighborhood", "FirstInteraction", "LastInteraction",
                "DecisionMakerRole", "PropertyType", "PreferredNeighborhood",
                "Budget", "SpaceSize", "PreferredLeaseTerm", "MovingTerm",
                "min_months", "max_months", "MovingTimeline",
                "MustHaves", "NiceToHaves", "PainPoint",
                "UrgencyScore", "Outcome", "Personality"
            ]
        }
        profiles.append(structured_profile)

    return profiles


In [15]:
"""
Converts a seqeval classification report DataFrame into a flat results dictionary.

Args:
    report_df (pd.DataFrame): Output of seqeval classification_report with output_dict=True (transposed to DataFrame).

Returns:
    dict: Dictionary with precision, recall, f1 for each entity label + macro averages.
"""

def get_entity_metrics(report_df):

    results = {}
    macro_p, macro_r, macro_f1 = [], [], []

    for label in report_df.index:
        # skip aggregate rows if present
        if label in ["accuracy", "macro avg", "weighted avg"]:
            continue

        p = report_df.loc[label, "precision"]
        r = report_df.loc[label, "recall"]
        f1 = report_df.loc[label, "f1-score"]

        results[f"{label}_precision"] = round(p, 4)
        results[f"{label}_recall"] = round(r, 4)
        results[f"{label}_f1"] = round(f1, 4)

        macro_p.append(p)
        macro_r.append(r)
        macro_f1.append(f1)

    # macro averages (mean of individual labels)
    results["macro_precision"] = round(np.mean(macro_p), 4) if macro_p else 0.0
    results["macro_recall"] = round(np.mean(macro_r), 4) if macro_r else 0.0
    results["macro_f1"] = round(np.mean(macro_f1), 4) if macro_f1 else 0.0

    print("Structured Field-Level Metrics:", results)
    return results


In [16]:
"""
  Evaluates a token classification model on the given dataset and prints a classification report.

  This function assumes that the dataframe contains one column per entity field, with
  predictions and ground truths already aligned. It treats each cell as correct if
  the predicted value matches the true value exactly (case-insensitive and stripped).

  Args:
      - df (pandas.DataFrame): DataFrame containing one column per entity. Each column
                               should contain both ground truth and predicted values
                               already aligned row-wise.

  Returns:
     - dict: A dictionary with precision, recall, and F1 score for each field, as well
             as macro-averaged precision, recall, and F1 score across all fields.

"""
def evaluate_model(trainer, dataset, id2label, tokenizer):
    predictions, labels, _ = trainer.predict(dataset)
    preds = predictions.argmax(axis=-1)

    true_labels = []
    pred_labels = []

    for pred, label in zip(preds, labels):
        true_seq = []
        pred_seq = []
        for p, l in zip(pred, label):
            if l != -100:
                true_seq.append(id2label[l])
                pred_seq.append(id2label[p])
        true_labels.append(true_seq)
        pred_labels.append(pred_seq)

    profiles = convert_predictions_to_profiles(test_data, predictions, id2label, tokenizer)
    df_profiles = pd.DataFrame(profiles)
    report_dict = seqeval_classification_report(true_labels, pred_labels, output_dict=True)
    df_report = pd.DataFrame(report_dict).transpose()
    print(df_report)


    print(seqeval_classification_report(true_labels, pred_labels))

    return df_profiles, df_report


In [17]:
'''
Initialize wandb
'''

wandb.init()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlidianeh[0m ([33mlidianeh-none[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [18]:
'''
Define sweep configurations and set up sweep agent
'''

sweep_config = {
    "method": "bayes",
    "metric": {
        "name": "eval_f1",
        "goal": "maximize"
    },
    "parameters": {
        "learning_rate": {
            "distribution": "uniform",
            "min": 1e-5,
            "max": 4e-5
        },
        "per_device_train_batch_size": {
            "values": [8, 16]
        },
        "num_train_epochs": {
            "values": [6, 8, 10]
        },
        "weight_decay": {
            "distribution": "uniform",
            "min": 0.05,
            "max": 0.2
        },
        "warmup_ratio": {
            "distribution": "uniform",
            "min": 0.05,
            "max": 0.2
        },
        "adam_epsilon": {
            "values": [1e-8, 1e-6]
        },
        "seed": {
            "values": [42, 2024]
        }
    }
}


sweep_id = wandb.sweep(sweep_config, project="distilbert_final")

Create sweep with ID: hnfvkr9h
Sweep URL: https://wandb.ai/lidianeh-none/distilbert_final/sweeps/hnfvkr9h


In [19]:
"""
Trains and evaluates DistilBERT-based token classification model using a W&B sweep configuration.

This function initializes wandb, builds a model and training pipeline using hyperparameters from sweep_config,
training and evaluating the model on the provided datasets, and logging evaluation metrics to W&B.

Args:
    - config (dict, optional): Sweep configuration containing training hyperparameters.
      If None, wandb will use the default sweep config.

Raises:
    - Exception: Prints the error and finishes the wandb run if training fails.
"""


def train_with_wandb_sweep(config=None):

    with wandb.init(config=config) as run:
        config = wandb.config

        model_config = DistilBertConfig.from_pretrained(
            "distilbert-base-uncased",
            num_labels=len(ENTITY_TAGS),
            id2label=id2label,
            label2id=label2id
        )

        model = WeightedDistilBertForTokenClassification.from_pretrained(
            "distilbert-base-uncased",
            config=model_config,
            class_weights=label_weights
        )

        training_args = TrainingArguments(
            output_dir="./results",
            num_train_epochs=config.num_train_epochs,
            per_device_train_batch_size=config.per_device_train_batch_size,
            per_device_eval_batch_size=config.per_device_train_batch_size,
            learning_rate=config.learning_rate,
            weight_decay=config.weight_decay,
            warmup_ratio=config.warmup_ratio,
            adam_epsilon=config.adam_epsilon,
            seed=config.seed,
            eval_strategy="epoch",
            save_strategy="epoch",
            logging_dir="./logs",
            logging_strategy="epoch",
            logging_steps=10,
            report_to="wandb",
            load_best_model_at_end=True,
            metric_for_best_model="eval_f1",
            greater_is_better=True
        )


        # Initialize trainer
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_data,
            eval_dataset=val_data,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics
        )

        # Train and evaluate
        try:
            trainer.train()
            trainer.evaluate()
            metrics = trainer.evaluate(test_data)
            print(metrics)
            df, entity_metrics = evaluate_model(trainer, test_data, id2label, tokenizer)
            print(df)
            wandb_table = wandb.Table(dataframe=df)
            wandb.log({f"{run.name} Predictions": wandb_table})

            entity_metrics = get_entity_metrics(entity_metrics)
            wandb.log({**{f"test_{k}": float(v) for k, v in entity_metrics.items()}})

            # Save all predictions to Google Drive
            pred_path = os.path.join(output_dir, f"{wandb.run.name}_predictions.json")
            df.to_json(pred_path, orient="records", indent=2)
            print(f"Predictions saved to: {pred_path}")

        except Exception as e:
            print(f"Training failed: {e}")
            return


In [20]:
''' Launch the sweep agent '''
wandb.agent(sweep_id, function=train_with_wandb_sweep, count=25)

[34m[1mwandb[0m: Agent Starting Run: gntvpmor with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 1.1128649479482749e-05
[34m[1mwandb[0m: 	num_train_epochs: 6
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.055473817189145874
[34m[1mwandb[0m: 	weight_decay: 0.10711851287866034


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.872,3.428867,0.0,0.0,0.0
2,2.9225,2.359738,1.0,0.0,0.0
3,2.1012,1.920579,1.0,0.0,0.0
4,1.821,1.763306,1.0,0.0,0.0
5,1.7005,1.673992,1.0,0.0,0.0
6,1.6335,1.642412,1.0,0.0,0.0


{'eval_loss': 3.4286720752716064, 'eval_precision': 0.0013020833333333333, 'eval_recall': 0.004830917874396135, 'eval_f1': 0.0020512820512820513, 'eval_runtime': 0.1819, 'eval_samples_per_second': 164.883, 'eval_steps_per_second': 10.992, 'epoch': 6.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
Budget                  0.000000  0.000000  0.000000      0.0
CompanyName             0.000000  0.000000  0.000000     27.0
CompanySize             0.000000  0.000000  0.000000      0.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
Email                   0.000000  0.000000  0.000000      0.0
FirstName               0.000000  0.000000  0.000000     30.0
GrowthStage             0.000000  0.000000  0.000000      0.0
Industry                0.038462  0.083333  0.052632     12.0
LastInteraction         0.000000  0.000000  0.000000      0.0
LastName                0.000000  0.000000  0.000000      8.0
MovingTerm              0.000000  0.000000  0.000000      0.0
MovingTimeline          0.000000  0.000000  0.000000      0.0
MustHaves               0.000000  0.000000  0.000000     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
Outcome 

0,1
eval/f1,▁▁▁▁▁▁▁█
eval/loss,█▄▂▁▁▁██
eval/precision,▁█████▁▁
eval/recall,▁▁▁▁▁▁▁█
eval/runtime,▆▁▁▁▁█▄▂
eval/samples_per_second,▃████▁▅▇
eval/steps_per_second,▃████▁▅▇
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.00205
eval/loss,3.42867
eval/precision,0.0013
eval/recall,0.00483
eval/runtime,0.1819
eval/samples_per_second,164.883
eval/steps_per_second,10.992
test/f1,0.00205
test/loss,3.42867
test/precision,0.0013


[34m[1mwandb[0m: Agent Starting Run: if0w6bvp with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 2.357034328072062e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.05077446629549874
[34m[1mwandb[0m: 	weight_decay: 0.13497433401379558


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.8113,2.957187,1.0,0.0,0.0
2,2.1993,1.820937,1.0,0.0,0.0
3,1.6305,1.490408,1.0,0.0,0.0
4,1.3723,1.230678,0.375,0.092511,0.14841
5,1.1355,1.032192,0.287356,0.330396,0.307377
6,0.971,0.899199,0.269912,0.537445,0.359352
7,0.8513,0.819724,0.271399,0.572687,0.368272
8,0.7878,0.771351,0.250936,0.590308,0.352168
9,0.7418,0.743587,0.248188,0.603524,0.351733
10,0.7229,0.734107,0.249548,0.60793,0.353846


{'eval_loss': 0.7467631697654724, 'eval_precision': 0.27555555555555555, 'eval_recall': 0.5990338164251208, 'eval_f1': 0.3774733637747337, 'eval_runtime': 0.1803, 'eval_samples_per_second': 166.405, 'eval_steps_per_second': 11.094, 'epoch': 10.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.322581  0.740741  0.449438     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.297872  0.933333  0.451613     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.200000  0.780000  0.318367     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.666667  0.769231  0.714286     13.0
PropertyType            0.657143  0.884615  0.754098     26.0
UrgencyScore            0.081633  0.200000  0.115942     20.0
micro avg               0.275556  0.599034  0.377473    207.0
macro avg               0.171223  0.331378  0.215673    207.0
weighted

0,1
eval/f1,▁▁▁▄▇███████
eval/loss,█▄▃▃▂▂▁▁▁▁▁▁
eval/precision,███▂▁▁▁▁▁▁▁▁
eval/recall,▁▁▁▂▅▇██████
eval/runtime,▄▄▄▇▆▄▅▄█▆▅▁
eval/samples_per_second,▅▅▅▂▃▅▄▅▁▃▄█
eval/steps_per_second,▅▅▅▂▃▅▄▅▁▃▄█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.37747
eval/loss,0.74676
eval/precision,0.27556
eval/recall,0.59903
eval/runtime,0.1803
eval/samples_per_second,166.405
eval/steps_per_second,11.094
test/f1,0.37747
test/loss,0.74676
test/precision,0.27556


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ai15avbi with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 2.772825936567409e-05
[34m[1mwandb[0m: 	num_train_epochs: 6
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.1844200332694338
[34m[1mwandb[0m: 	weight_decay: 0.1975888766328881


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.7841,3.046,0.0,0.0,0.0
2,2.3074,1.890649,1.0,0.0,0.0
3,1.7146,1.552648,1.0,0.0,0.0
4,1.4267,1.320908,0.111111,0.013216,0.023622
5,1.2401,1.180435,0.160714,0.15859,0.159645
6,1.1374,1.132055,0.216216,0.246696,0.230453


{'eval_loss': 1.0763312578201294, 'eval_precision': 0.21666666666666667, 'eval_recall': 0.25120772946859904, 'eval_f1': 0.232662192393736, 'eval_runtime': 0.1793, 'eval_samples_per_second': 167.326, 'eval_steps_per_second': 11.155, 'epoch': 6.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.120000  0.111111  0.115385     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.500000  0.700000  0.583333     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.167665  0.560000  0.258065     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.000000  0.000000  0.000000     13.0
PropertyType            0.000000  0.000000  0.000000     26.0
UrgencyScore            0.000000  0.000000  0.000000     20.0
micro avg               0.216667  0.251208  0.232662    207.0
macro avg               0.060590  0.105470  0.073599    207.0
weighted

0,1
eval/f1,▁▁▁▂▆███
eval/loss,█▄▃▂▁▁▁▁
eval/precision,▁██▂▂▃▃▃
eval/recall,▁▁▁▁▅███
eval/runtime,▁▁▁▂▂▃█▁
eval/samples_per_second,███▇▇▆▁█
eval/steps_per_second,███▇▇▆▁█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.23266
eval/loss,1.07633
eval/precision,0.21667
eval/recall,0.25121
eval/runtime,0.1793
eval/samples_per_second,167.326
eval/steps_per_second,11.155
test/f1,0.23266
test/loss,1.07633
test/precision,0.21667


[34m[1mwandb[0m: Agent Starting Run: rb2j5q81 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 3.8204764430410826e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.10708419864193496
[34m[1mwandb[0m: 	weight_decay: 0.1309598037371757


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.2427,1.850741,1.0,0.0,0.0
2,1.4098,1.018722,0.287402,0.321586,0.303534
3,0.8433,0.690351,0.244526,0.590308,0.345806
4,0.5939,0.537672,0.226686,0.696035,0.341991
5,0.4547,0.473108,0.271647,0.704846,0.392157
6,0.3678,0.440689,0.242165,0.748899,0.365985
7,0.3309,0.432903,0.32282,0.76652,0.454308
8,0.298,0.438574,0.343373,0.753304,0.471724


{'eval_loss': 0.35404303669929504, 'eval_precision': 0.3744394618834081, 'eval_recall': 0.8067632850241546, 'eval_f1': 0.5114854517611026, 'eval_runtime': 0.1958, 'eval_samples_per_second': 153.211, 'eval_steps_per_second': 20.428, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.764706  0.962963  0.852459     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.750000  0.500000  0.600000      6.0
FirstName               0.519231  0.900000  0.658537     30.0
Industry                0.600000  1.000000  0.750000     12.0
LastName                0.500000  0.875000  0.636364      8.0
MustHaves               0.266234  0.820000  0.401961     50.0
NiceToHaves             0.333333  0.100000  0.153846     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.230769  1.000000  0.375000      3.0
PreferredNeighborhood   0.750000  0.923077  0.827586     13.0
PropertyType            0.533333  0.923077  0.676056     26.0
SpaceSize               0.000000  0.000000  0.000000      0.0
UrgencyScore            0.137500  0.550000  0.220000     20.0
micro avg               0.374439  0.806763  0.511485    207.0
macro av

0,1
eval/f1,▁▅▆▆▆▆▇▇▇█
eval/loss,█▄▃▂▂▁▁▁▁▁
eval/precision,█▂▁▁▁▁▂▂▂▂
eval/recall,▁▄▆▇▇▇████
eval/runtime,▂▂▃▆▅▅▁█▄▄
eval/samples_per_second,▇▇▅▃▄▄█▁▅▅
eval/steps_per_second,▇▇▅▃▄▄█▁▅▅
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.51149
eval/loss,0.35404
eval/precision,0.37444
eval/recall,0.80676
eval/runtime,0.1958
eval/samples_per_second,153.211
eval/steps_per_second,20.428
test/f1,0.51149
test/loss,0.35404
test/precision,0.37444


[34m[1mwandb[0m: Agent Starting Run: nava49qn with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 2.4261890777067035e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.095405778172583
[34m[1mwandb[0m: 	weight_decay: 0.1084644941979936


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.1915,1.915583,1.0,0.0,0.0
2,1.5729,1.296791,0.088235,0.013216,0.022989
3,1.1015,0.92158,0.285714,0.484581,0.359477
4,0.8178,0.723686,0.202144,0.581498,0.3
5,0.6568,0.63045,0.254386,0.638767,0.363864
6,0.561,0.579999,0.278937,0.647577,0.38992
7,0.5141,0.560218,0.272727,0.674009,0.388325
8,0.4873,0.549757,0.270833,0.687225,0.388543


{'eval_loss': 0.4962116479873657, 'eval_precision': 0.31069958847736623, 'eval_recall': 0.7294685990338164, 'eval_f1': 0.43578643578643583, 'eval_runtime': 0.1978, 'eval_samples_per_second': 151.649, 'eval_steps_per_second': 20.22, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.408163  0.740741  0.526316     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.437500  0.933333  0.595745     30.0
Industry                0.625000  0.416667  0.500000     12.0
LastName                0.350000  0.875000  0.500000      8.0
MustHaves               0.220513  0.860000  0.351020     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.750000  0.923077  0.827586     13.0
PropertyType            0.545455  0.923077  0.685714     26.0
SpaceSize               0.000000  0.000000  0.000000      0.0
UrgencyScore            0.141176  0.600000  0.228571     20.0
micro avg               0.310700  0.729469  0.435786    207.0
macro av

0,1
eval/f1,▁▁▇▆▇▇▇▇▇█
eval/loss,█▅▃▂▂▁▁▁▁▁
eval/precision,█▁▃▂▂▂▂▂▂▃
eval/recall,▁▁▆▇▇▇▇█▇█
eval/runtime,▃▁▅▃▁▄▅▃█▇
eval/samples_per_second,▆▇▄▆█▅▄▆▁▂
eval/steps_per_second,▆▇▄▆█▅▄▆▁▂
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.43579
eval/loss,0.49621
eval/precision,0.3107
eval/recall,0.72947
eval/runtime,0.1978
eval/samples_per_second,151.649
eval/steps_per_second,20.22
test/f1,0.43579
test/loss,0.49621
test/precision,0.3107


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6ts6x8hm with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 2.7075577345480815e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.12117330079198034
[34m[1mwandb[0m: 	weight_decay: 0.12896577646380605


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,4.093,3.35589,0.0,0.0,0.0
2,2.4119,1.813997,1.0,0.0,0.0
3,1.6215,1.455445,1.0,0.0,0.0
4,1.3172,1.178579,0.309091,0.14978,0.20178
5,1.0758,0.995284,0.302752,0.436123,0.357401
6,0.9277,0.885039,0.248485,0.54185,0.34072
7,0.8336,0.832214,0.236059,0.559471,0.332026
8,0.7955,0.813153,0.237918,0.563877,0.334641


{'eval_loss': 0.921531617641449, 'eval_precision': 0.34415584415584416, 'eval_recall': 0.5120772946859904, 'eval_f1': 0.4116504854368932, 'eval_runtime': 0.1792, 'eval_samples_per_second': 167.443, 'eval_steps_per_second': 11.163, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.276596  0.481481  0.351351     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.394366  0.933333  0.554455     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.218310  0.620000  0.322917     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.733333  0.846154  0.785714     13.0
PropertyType            0.954545  0.807692  0.875000     26.0
UrgencyScore            0.181818  0.100000  0.129032     20.0
micro avg               0.344156  0.512077  0.411650    207.0
macro avg               0.212228  0.291435  0.232190    207.0
weighted

0,1
eval/f1,▁▁▁▄▇▇▇▇▇█
eval/loss,█▄▃▂▂▁▁▁▂▁
eval/precision,▁██▃▃▃▃▃▃▃
eval/recall,▁▁▁▃▆███▆▇
eval/runtime,▃▂▂▂▃▂▂▂█▁
eval/samples_per_second,▆▇▇▇▆▇▇▇▁█
eval/steps_per_second,▆▇▇▇▆▇▇▇▁█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.41165
eval/loss,0.92153
eval/precision,0.34416
eval/recall,0.51208
eval/runtime,0.1792
eval/samples_per_second,167.443
eval/steps_per_second,11.163
test/f1,0.41165
test/loss,0.92153
test/precision,0.34416


[34m[1mwandb[0m: Agent Starting Run: 871i1ph5 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 2.7184894108615504e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.07140174335332633
[34m[1mwandb[0m: 	weight_decay: 0.16170546909295097


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.9636,2.886349,0.0,0.0,0.0
2,2.1853,1.756293,1.0,0.0,0.0
3,1.5736,1.418085,0.0,0.0,0.0
4,1.2855,1.154484,0.32,0.176211,0.227273
5,1.0577,0.983101,0.304094,0.45815,0.365554
6,0.9181,0.879721,0.244094,0.546256,0.337415
7,0.8291,0.829911,0.23694,0.559471,0.332896
8,0.7931,0.811847,0.235294,0.563877,0.332036


{'eval_loss': 0.9090259671211243, 'eval_precision': 0.34177215189873417, 'eval_recall': 0.5217391304347826, 'eval_f1': 0.4130019120458891, 'eval_runtime': 0.1815, 'eval_samples_per_second': 165.27, 'eval_steps_per_second': 11.018, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.294118  0.555556  0.384615     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.388889  0.933333  0.549020     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.218310  0.620000  0.322917     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.733333  0.846154  0.785714     13.0
PropertyType            0.954545  0.807692  0.875000     26.0
UrgencyScore            0.142857  0.100000  0.117647     20.0
micro avg               0.341772  0.521739  0.413002    207.0
macro avg               0.210158  0.297133  0.233455    207.0
weighted

0,1
eval/f1,▁▁▁▅▇▇▇▇▇█
eval/loss,█▄▃▂▂▁▁▁▂▁
eval/precision,▁█▁▃▃▃▃▃▃▃
eval/recall,▁▁▁▃▇███▇▇
eval/runtime,▁▂▁▁▅▃▂▄█▁
eval/samples_per_second,█▇██▄▆▇▅▁█
eval/steps_per_second,█▇██▄▆▇▅▁█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.413
eval/loss,0.90903
eval/precision,0.34177
eval/recall,0.52174
eval/runtime,0.1815
eval/samples_per_second,165.27
eval/steps_per_second,11.018
test/f1,0.413
test/loss,0.90903
test/precision,0.34177


[34m[1mwandb[0m: Agent Starting Run: kjajvhwu with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 1.8998621798329924e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.17349310280911223
[34m[1mwandb[0m: 	weight_decay: 0.10215660830999083


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,4.2433,3.976261,0.0,0.0,0.0
2,3.4194,2.499381,1.0,0.0,0.0
3,2.0522,1.770505,1.0,0.0,0.0
4,1.6189,1.477401,1.0,0.0,0.0
5,1.3554,1.251379,0.185185,0.022026,0.03937
6,1.1618,1.080416,0.336364,0.325991,0.331096
7,1.0133,0.971603,0.295082,0.475771,0.36425
8,0.922,0.90003,0.264444,0.524229,0.351551
9,0.8655,0.861951,0.257862,0.54185,0.349432
10,0.8349,0.848859,0.265957,0.550661,0.35868


{'eval_loss': 0.8994320034980774, 'eval_precision': 0.3353658536585366, 'eval_recall': 0.5314009661835749, 'eval_f1': 0.4112149532710281, 'eval_runtime': 0.1796, 'eval_samples_per_second': 167.06, 'eval_steps_per_second': 11.137, 'epoch': 10.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.320755  0.629630  0.425000     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.388889  0.933333  0.549020     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.205298  0.620000  0.308458     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.666667  0.769231  0.714286     13.0
PropertyType            0.956522  0.846154  0.897959     26.0
SpaceSize               0.000000  0.000000  0.000000      0.0
UrgencyScore            0.153846  0.100000  0.121212     20.0
micro avg               0.335366  0.531401  0.411215    207.0
macro av

0,1
eval/f1,▁▁▁▁▂▇▇▇▇▇▇█
eval/loss,█▅▃▂▂▂▁▁▁▁▁▁
eval/precision,▁███▂▃▃▃▃▃▃▃
eval/recall,▁▁▁▁▁▅▇███▇█
eval/runtime,█▂▁▁▁▁▂▂▂▂▆▁
eval/samples_per_second,▁▇▇███▇▇▇▇▃█
eval/steps_per_second,▁▇▇███▇▇▇▇▃█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.41121
eval/loss,0.89943
eval/precision,0.33537
eval/recall,0.5314
eval/runtime,0.1796
eval/samples_per_second,167.06
eval/steps_per_second,11.137
test/f1,0.41121
test/loss,0.89943
test/precision,0.33537


[34m[1mwandb[0m: Agent Starting Run: vdymknxp with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 1.5459177463138774e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.11167627726074604
[34m[1mwandb[0m: 	weight_decay: 0.17701019921701744


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.8793,3.436171,0.0,0.0,0.0
2,2.8194,2.23925,1.0,0.0,0.0
3,1.9911,1.857397,1.0,0.0,0.0
4,1.7312,1.631842,1.0,0.0,0.0
5,1.5337,1.472355,1.0,0.0,0.0
6,1.3975,1.35063,0.130435,0.013216,0.024
7,1.296,1.281676,0.271429,0.0837,0.127946
8,1.2507,1.257448,0.2375,0.0837,0.123779


{'eval_loss': 1.2268346548080444, 'eval_precision': 0.14925373134328357, 'eval_recall': 0.04830917874396135, 'eval_f1': 0.072992700729927, 'eval_runtime': 0.1798, 'eval_samples_per_second': 166.883, 'eval_steps_per_second': 11.126, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.000000  0.000000  0.000000     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.000000  0.000000  0.000000     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.151515  0.200000  0.172414     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.000000  0.000000  0.000000     13.0
PropertyType            0.000000  0.000000  0.000000     26.0
UrgencyScore            0.000000  0.000000  0.000000     20.0
micro avg               0.149254  0.048309  0.072993    207.0
macro avg               0.011655  0.015385  0.013263    207.0
weighted

0,1
eval/f1,▁▁▁▁▁▂███▅
eval/loss,█▄▃▂▂▁▁▁▁▁
eval/precision,▁████▂▃▃▃▂
eval/recall,▁▁▁▁▁▂███▅
eval/runtime,▅█▄▁▅▂▄▆▃▁
eval/samples_per_second,▄▁▅▇▄▇▅▃▆█
eval/steps_per_second,▄▁▅▇▄▇▅▃▆█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.07299
eval/loss,1.22683
eval/precision,0.14925
eval/recall,0.04831
eval/runtime,0.1798
eval/samples_per_second,166.883
eval/steps_per_second,11.126
test/f1,0.07299
test/loss,1.22683
test/precision,0.14925


[34m[1mwandb[0m: Agent Starting Run: zfqohv03 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 2.3771149316657312e-05
[34m[1mwandb[0m: 	num_train_epochs: 6
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.09551466044456997
[34m[1mwandb[0m: 	weight_decay: 0.15300996087601934


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,4.0226,3.146833,0.0,0.0,0.0
2,2.3748,1.856341,1.0,0.0,0.0
3,1.6872,1.554773,1.0,0.0,0.0
4,1.4443,1.352384,0.0,0.0,0.0
5,1.2718,1.22819,0.25,0.048458,0.081181
6,1.1898,1.184087,0.2875,0.101322,0.149837


{'eval_loss': 1.1209046840667725, 'eval_precision': 0.3448275862068966, 'eval_recall': 0.0966183574879227, 'eval_f1': 0.15094339622641512, 'eval_runtime': 0.1788, 'eval_samples_per_second': 167.808, 'eval_steps_per_second': 11.187, 'epoch': 6.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.666667  0.074074  0.133333     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.000000  0.000000  0.000000     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.303030  0.200000  0.240964     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.071429  0.076923  0.074074     13.0
PropertyType            0.875000  0.269231  0.411765     26.0
UrgencyScore            0.000000  0.000000  0.000000     20.0
micro avg               0.344828  0.096618  0.150943    207.0
macro avg               0.147394  0.047710  0.066164    207.0
weighted

0,1
eval/f1,▁▁▁▁▅███
eval/loss,█▄▂▂▁▁▁▁
eval/precision,▁██▁▃▃▃▃
eval/recall,▁▁▁▁▄███
eval/runtime,▂▂▂▂▂▂█▁
eval/samples_per_second,▇▇▇▇▆▇▁█
eval/steps_per_second,▇▇▇▇▆▇▁█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.15094
eval/loss,1.1209
eval/precision,0.34483
eval/recall,0.09662
eval/runtime,0.1788
eval/samples_per_second,167.808
eval/steps_per_second,11.187
test/f1,0.15094
test/loss,1.1209
test/precision,0.34483


[34m[1mwandb[0m: Agent Starting Run: bgk0rvdc with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 3.88242648513441e-05
[34m[1mwandb[0m: 	num_train_epochs: 6
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.1646324224478789
[34m[1mwandb[0m: 	weight_decay: 0.07079404399707998


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.918,2.974651,0.0,0.0,0.0
2,2.1197,1.744136,1.0,0.0,0.0
3,1.5846,1.425098,1.0,0.0,0.0
4,1.2913,1.141109,0.253623,0.154185,0.191781
5,1.0629,0.980261,0.308411,0.436123,0.361314
6,0.9465,0.928714,0.299145,0.462555,0.363322


{'eval_loss': 0.854259192943573, 'eval_precision': 0.32628398791540786, 'eval_recall': 0.5217391304347826, 'eval_f1': 0.4014869888475836, 'eval_runtime': 0.1793, 'eval_samples_per_second': 167.353, 'eval_steps_per_second': 11.157, 'epoch': 6.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.273973  0.740741  0.400000     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.551020  0.900000  0.683544     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.192090  0.680000  0.299559     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.666667  0.615385  0.640000     13.0
PropertyType            0.950000  0.730769  0.826087     26.0
UrgencyScore            0.000000  0.000000  0.000000     20.0
micro avg               0.326284  0.521739  0.401487    207.0
macro avg               0.202596  0.282069  0.219169    207.0
weighted

0,1
eval/f1,▁▁▁▄▇▇▇█
eval/loss,█▄▃▂▁▁▁▁
eval/precision,▁██▃▃▃▃▃
eval/recall,▁▁▁▃▇▇▇█
eval/runtime,▅▇▄▃█▆▆▁
eval/samples_per_second,▄▂▅▆▁▃▃█
eval/steps_per_second,▄▂▅▆▁▃▃█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.40149
eval/loss,0.85426
eval/precision,0.32628
eval/recall,0.52174
eval/runtime,0.1793
eval/samples_per_second,167.353
eval/steps_per_second,11.157
test/f1,0.40149
test/loss,0.85426
test/precision,0.32628


[34m[1mwandb[0m: Agent Starting Run: 716quqb0 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 2.8691898116880498e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.1884894726915351
[34m[1mwandb[0m: 	weight_decay: 0.08162298928076095


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,4.1132,3.760559,0.000472,0.017621,0.000919
2,3.0266,2.024872,1.0,0.0,0.0
3,1.8313,1.664765,1.0,0.0,0.0
4,1.5176,1.355027,0.212121,0.030837,0.053846
5,1.2246,1.077729,0.294118,0.242291,0.2657
6,0.9819,0.887443,0.263923,0.480176,0.340625
7,0.8192,0.784529,0.279343,0.524229,0.364472
8,0.727,0.716876,0.273859,0.581498,0.372355
9,0.6712,0.685129,0.273642,0.599119,0.375691
10,0.6381,0.673882,0.279352,0.60793,0.382802


{'eval_loss': 0.5914047360420227, 'eval_precision': 0.3045356371490281, 'eval_recall': 0.6811594202898551, 'eval_f1': 0.4208955223880597, 'eval_runtime': 0.181, 'eval_samples_per_second': 165.741, 'eval_steps_per_second': 11.049, 'epoch': 10.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.322034  0.703704  0.441860     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.388889  0.933333  0.549020     30.0
Industry                0.500000  0.250000  0.333333     12.0
LastName                0.315789  0.750000  0.444444      8.0
MustHaves               0.215385  0.840000  0.342857     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.705882  0.923077  0.800000     13.0
PropertyType            0.750000  0.923077  0.827586     26.0
SpaceSize               0.000000  0.000000  0.000000      0.0
UrgencyScore            0.118644  0.350000  0.177215     20.0
micro avg               0.304536  0.681159  0.420896    207.0
macro av

0,1
eval/f1,▁▁▁▂▅▇▇▇▇▇▇█
eval/loss,█▄▃▃▂▂▁▁▁▁▁▁
eval/precision,▁██▂▃▃▃▃▃▃▃▃
eval/recall,▁▁▁▁▃▆▆▇▇▇▇█
eval/runtime,█▁▂▂▁▂▂▂▂▂▂▁
eval/samples_per_second,▁█▇▇█▇▇▇▆▇▇█
eval/steps_per_second,▁█▇▇█▇▇▇▆▇▇█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.4209
eval/loss,0.5914
eval/precision,0.30454
eval/recall,0.68116
eval/runtime,0.181
eval/samples_per_second,165.741
eval/steps_per_second,11.049
test/f1,0.4209
test/loss,0.5914
test/precision,0.30454


[34m[1mwandb[0m: Agent Starting Run: he0eyd39 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 1.6176872910426266e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.15769547120136446
[34m[1mwandb[0m: 	weight_decay: 0.1873601163092351


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.7119,2.90381,1.0,0.0,0.0
2,2.1195,1.711367,1.0,0.0,0.0
3,1.4566,1.244985,0.156028,0.096916,0.119565
4,1.1166,0.981035,0.235443,0.409692,0.299035
5,0.9034,0.837043,0.240631,0.537445,0.332425
6,0.7728,0.765017,0.210191,0.581498,0.308772
7,0.7131,0.716376,0.260618,0.594714,0.362416
8,0.6688,0.704618,0.25046,0.599119,0.353247


{'eval_loss': 0.643658459186554, 'eval_precision': 0.2984749455337691, 'eval_recall': 0.6618357487922706, 'eval_f1': 0.4114114114114114, 'eval_runtime': 0.1985, 'eval_samples_per_second': 151.16, 'eval_steps_per_second': 20.155, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.392157  0.740741  0.512821     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.314607  0.933333  0.470588     30.0
Industry                0.666667  0.333333  0.444444     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.201878  0.860000  0.326996     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.750000  0.923077  0.827586     13.0
PropertyType            0.727273  0.923077  0.813559     26.0
SpaceSize               0.000000  0.000000  0.000000      0.0
UrgencyScore            0.122449  0.300000  0.173913     20.0
micro avg               0.298475  0.661836  0.411411    207.0
macro av

0,1
eval/f1,▁▁▃▆▇▆▇▇▇█
eval/loss,█▄▃▂▂▁▁▁▁▁
eval/precision,██▁▂▂▁▂▂▂▂
eval/recall,▁▁▂▅▇▇▇▇▇█
eval/runtime,▁▁▁▂▂▃▂▂█▃
eval/samples_per_second,███▇▇▆▇▇▁▆
eval/steps_per_second,███▇▇▆▇▇▁▆
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.41141
eval/loss,0.64366
eval/precision,0.29847
eval/recall,0.66184
eval/runtime,0.1985
eval/samples_per_second,151.16
eval/steps_per_second,20.155
test/f1,0.41141
test/loss,0.64366
test/precision,0.29847


[34m[1mwandb[0m: Agent Starting Run: rs3cjpyu with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 3.29807296563652e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.18597549112923667
[34m[1mwandb[0m: 	weight_decay: 0.15436883524843426


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.7679,3.117513,0.0,0.0,0.0
2,2.2643,1.760338,1.0,0.0,0.0
3,1.5745,1.387214,1.0,0.0,0.0
4,1.2602,1.113987,0.214286,0.22467,0.219355
5,1.0283,0.935407,0.25,0.524229,0.338549
6,0.8745,0.823461,0.254132,0.54185,0.345992
7,0.7805,0.764655,0.241636,0.572687,0.339869
8,0.7347,0.746655,0.239927,0.577093,0.338939


{'eval_loss': 0.7331736087799072, 'eval_precision': 0.2824601366742597, 'eval_recall': 0.5990338164251208, 'eval_f1': 0.3839009287925697, 'eval_runtime': 0.1873, 'eval_samples_per_second': 160.145, 'eval_steps_per_second': 10.676, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.363636  0.740741  0.487805     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.301075  0.933333  0.455285     30.0
Industry                1.000000  0.083333  0.153846     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.168831  0.780000  0.277580     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.785714  0.846154  0.814815     13.0
PropertyType            0.800000  0.923077  0.857143     26.0
UrgencyScore            0.066667  0.050000  0.057143     20.0
micro avg               0.282460  0.599034  0.383901    207.0
macro avg               0.268148  0.335126  0.238740    207.0
weighted

0,1
eval/f1,▁▁▁▅▇▇▇▇▇█
eval/loss,█▄▃▂▂▁▁▁▁▁
eval/precision,▁██▃▃▃▃▃▃▃
eval/recall,▁▁▁▄▇▇██▇█
eval/runtime,▂▁▃▂▂▂▃▂█▃
eval/samples_per_second,▆█▆▇▇▇▆▇▁▆
eval/steps_per_second,▆█▆▇▇▇▆▇▁▆
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.3839
eval/loss,0.73317
eval/precision,0.28246
eval/recall,0.59903
eval/runtime,0.1873
eval/samples_per_second,160.145
eval/steps_per_second,10.676
test/f1,0.3839
test/loss,0.73317
test/precision,0.28246


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3oy6e0z0 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 1.8872689843567752e-05
[34m[1mwandb[0m: 	num_train_epochs: 6
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.1730111126981364
[34m[1mwandb[0m: 	weight_decay: 0.1458490019435844


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.4917,2.26672,1.0,0.0,0.0
2,1.7767,1.49786,1.0,0.0,0.0
3,1.2993,1.12749,0.216535,0.242291,0.22869
4,1.0355,0.930488,0.259912,0.519824,0.346549
5,0.8813,0.835439,0.22963,0.546256,0.323338
6,0.8033,0.804317,0.244048,0.54185,0.336525


{'eval_loss': 0.8514395356178284, 'eval_precision': 0.2896039603960396, 'eval_recall': 0.5652173913043478, 'eval_f1': 0.3829787234042553, 'eval_runtime': 0.191, 'eval_samples_per_second': 157.087, 'eval_steps_per_second': 20.945, 'epoch': 6.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.327586  0.703704  0.447059     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.282828  0.933333  0.434109     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.186869  0.740000  0.298387     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.642857  0.692308  0.666667     13.0
PropertyType            0.888889  0.923077  0.905660     26.0
UrgencyScore            0.000000  0.000000  0.000000     20.0
micro avg               0.289604  0.565217  0.382979    207.0
macro avg               0.179156  0.307109  0.211683    207.0
weighted

0,1
eval/f1,▁▁▅▇▇▇▇█
eval/loss,█▄▃▂▁▁▂▁
eval/precision,██▁▁▁▁▁▂
eval/recall,▁▁▄▇██▇█
eval/runtime,▃▂█▃▄▂▂▁
eval/samples_per_second,▅▇▁▅▅▇▆█
eval/steps_per_second,▅▇▁▅▅▇▆█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.38298
eval/loss,0.85144
eval/precision,0.2896
eval/recall,0.56522
eval/runtime,0.191
eval/samples_per_second,157.087
eval/steps_per_second,20.945
test/f1,0.38298
test/loss,0.85144
test/precision,0.2896


[34m[1mwandb[0m: Agent Starting Run: 295cclj6 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 3.381341771079718e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.11479631021957344
[34m[1mwandb[0m: 	weight_decay: 0.16371014024124247


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.3609,1.910253,1.0,0.0,0.0
2,1.4977,1.11781,0.296774,0.202643,0.240838
3,0.9113,0.727722,0.263918,0.563877,0.359551
4,0.6277,0.56436,0.215297,0.669604,0.325831
5,0.4869,0.508673,0.259934,0.69163,0.377858
6,0.4023,0.455529,0.277689,0.762115,0.407059
7,0.3578,0.451773,0.322394,0.735683,0.448322
8,0.337,0.452093,0.311111,0.740088,0.43807


{'eval_loss': 0.3822023272514343, 'eval_precision': 0.3470715835140998, 'eval_recall': 0.7729468599033816, 'eval_f1': 0.47904191616766467, 'eval_runtime': 0.1914, 'eval_samples_per_second': 156.727, 'eval_steps_per_second': 20.897, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.534884  0.851852  0.657143     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       1.000000  0.500000  0.666667      6.0
FirstName               0.509091  0.933333  0.658824     30.0
Industry                0.500000  0.833333  0.625000     12.0
LastName                0.368421  0.875000  0.518519      8.0
MustHaves               0.240000  0.840000  0.373333     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.166667  0.666667  0.266667      3.0
PreferredNeighborhood   0.705882  0.923077  0.800000     13.0
PropertyType            0.533333  0.923077  0.676056     26.0
SpaceSize               0.000000  0.000000  0.000000      0.0
UrgencyScore            0.150000  0.450000  0.225000     20.0
micro avg               0.347072  0.772947  0.479042    207.0
macro av

0,1
eval/f1,▁▅▆▆▇▇█▇██
eval/loss,█▄▃▂▂▁▁▁▁▁
eval/precision,█▂▁▁▁▂▂▂▂▂
eval/recall,▁▃▆▇▇█████
eval/runtime,▂▃▂█▄▄▂██▁
eval/samples_per_second,▇▆▆▁▅▅▆▁▁█
eval/steps_per_second,▇▆▆▁▅▅▆▁▁█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.47904
eval/loss,0.3822
eval/precision,0.34707
eval/recall,0.77295
eval/runtime,0.1914
eval/samples_per_second,156.727
eval/steps_per_second,20.897
test/f1,0.47904
test/loss,0.3822
test/precision,0.34707


[34m[1mwandb[0m: Agent Starting Run: zhfni1c7 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 1.571314426339921e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.052833062580020046
[34m[1mwandb[0m: 	weight_decay: 0.1551848542550971


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,4.0601,3.446084,0.0,0.0,0.0
2,2.7603,2.090016,1.0,0.0,0.0
3,1.8935,1.745114,1.0,0.0,0.0
4,1.6228,1.531681,1.0,0.0,0.0
5,1.448,1.381726,0.0,0.0,0.0
6,1.3125,1.27564,0.133333,0.008811,0.016529
7,1.2343,1.213124,0.294872,0.101322,0.15082
8,1.1887,1.192023,0.287234,0.118943,0.168224


{'eval_loss': 1.13032066822052, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.10628019323671498, 'eval_f1': 0.16117216117216118, 'eval_runtime': 0.1822, 'eval_samples_per_second': 164.698, 'eval_steps_per_second': 10.98, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.500000  0.074074  0.129032     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.000000  0.000000  0.000000     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.297297  0.220000  0.252874     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.071429  0.076923  0.074074     13.0
PropertyType            0.727273  0.307692  0.432432     26.0
UrgencyScore            0.000000  0.000000  0.000000     20.0
micro avg               0.333333  0.106280  0.161172    207.0
macro avg               0.122769  0.052207  0.068339    207.0
weighted

0,1
eval/f1,▁▁▁▁▁▂▇███
eval/loss,█▄▃▂▂▁▁▁▁▁
eval/precision,▁███▁▂▃▃▃▃
eval/recall,▁▁▁▁▁▂▇██▇
eval/runtime,█▂▃▄▁▄▆▁▃▃
eval/samples_per_second,▁▇▆▅█▅▃█▆▆
eval/steps_per_second,▁▇▆▅█▅▃█▆▆
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.16117
eval/loss,1.13032
eval/precision,0.33333
eval/recall,0.10628
eval/runtime,0.1822
eval/samples_per_second,164.698
eval/steps_per_second,10.98
test/f1,0.16117
test/loss,1.13032
test/precision,0.33333


[34m[1mwandb[0m: Agent Starting Run: mfbynm52 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 1.794351384102816e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.1249884245830166
[34m[1mwandb[0m: 	weight_decay: 0.1824784746602227


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.7983,3.244312,0.0,0.0,0.0
2,2.4612,1.920227,1.0,0.0,0.0
3,1.7716,1.644257,1.0,0.0,0.0
4,1.5338,1.435833,1.0,0.0,0.0
5,1.3578,1.286162,0.090909,0.008811,0.016064
6,1.2247,1.177329,0.18254,0.101322,0.130312
7,1.1448,1.120817,0.212996,0.259912,0.234127
8,1.1038,1.099846,0.222973,0.290749,0.25239


{'eval_loss': 1.0317119359970093, 'eval_precision': 0.2792452830188679, 'eval_recall': 0.357487922705314, 'eval_f1': 0.31355932203389825, 'eval_runtime': 0.1796, 'eval_samples_per_second': 167.036, 'eval_steps_per_second': 11.136, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.354167  0.629630  0.453333     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.360000  0.900000  0.514286     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.176471  0.480000  0.258065     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.000000  0.000000  0.000000     13.0
PropertyType            1.000000  0.230769  0.375000     26.0
UrgencyScore            0.000000  0.000000  0.000000     20.0
micro avg               0.279245  0.357488  0.313559    207.0
macro avg               0.145434  0.172338  0.123130    207.0
weighted

0,1
eval/f1,▁▁▁▁▁▄▆▇▇█
eval/loss,█▄▃▂▂▁▁▁▁▁
eval/precision,▁███▂▂▂▃▃▃
eval/recall,▁▁▁▁▁▃▆▇▇█
eval/runtime,▁▃▁▂▂▂▃▂█▁
eval/samples_per_second,▇▆█▆▇▇▆▇▁█
eval/steps_per_second,▇▆█▆▇▇▆▇▁█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.31356
eval/loss,1.03171
eval/precision,0.27925
eval/recall,0.35749
eval/runtime,0.1796
eval/samples_per_second,167.036
eval/steps_per_second,11.136
test/f1,0.31356
test/loss,1.03171
test/precision,0.27925


[34m[1mwandb[0m: Agent Starting Run: 2so15jty with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 2.914598344224308e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.09504806914492864
[34m[1mwandb[0m: 	weight_decay: 0.11149848312889447


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.6778,2.756124,1.0,0.0,0.0
2,2.0536,1.723375,1.0,0.0,0.0
3,1.5566,1.39193,1.0,0.0,0.0
4,1.2696,1.128056,0.209559,0.251101,0.228457
5,1.0416,0.946392,0.228745,0.497797,0.313454
6,0.8783,0.815437,0.219298,0.550661,0.313676
7,0.7611,0.738829,0.251923,0.577093,0.350736
8,0.6893,0.688591,0.245747,0.572687,0.343915
9,0.6463,0.663474,0.223301,0.60793,0.326627
10,0.6245,0.65615,0.235294,0.61674,0.340633


{'eval_loss': 0.6516163945198059, 'eval_precision': 0.2806652806652807, 'eval_recall': 0.6521739130434783, 'eval_f1': 0.3924418604651163, 'eval_runtime': 0.1878, 'eval_samples_per_second': 159.731, 'eval_steps_per_second': 10.649, 'epoch': 10.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.384615  0.740741  0.506329     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.373333  0.933333  0.533333     30.0
Industry                0.500000  0.083333  0.142857     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.190045  0.840000  0.309963     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.750000  0.923077  0.827586     13.0
PropertyType            0.705882  0.923077  0.800000     26.0
UrgencyScore            0.100000  0.400000  0.160000     20.0
micro avg               0.280665  0.652174  0.392442    207.0
macro avg               0.231067  0.372582  0.252313    207.0
weighted

0,1
eval/f1,▁▁▁▅▇▇▇▇▇▇▇█
eval/loss,█▅▃▃▂▂▁▁▁▁▁▁
eval/precision,███▁▁▁▁▁▁▁▁▂
eval/recall,▁▁▁▄▆▇▇▇██▇█
eval/runtime,▂▁▂▃▅▂▁▅▂▂█▃
eval/samples_per_second,▇█▇▆▄▇█▄▆▇▁▆
eval/steps_per_second,▇█▇▆▄▇█▄▆▇▁▆
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.39244
eval/loss,0.65162
eval/precision,0.28067
eval/recall,0.65217
eval/runtime,0.1878
eval/samples_per_second,159.731
eval/steps_per_second,10.649
test/f1,0.39244
test/loss,0.65162
test/precision,0.28067


[34m[1mwandb[0m: Agent Starting Run: dgfjyoji with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 2.6443577523784096e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.07872553277244237
[34m[1mwandb[0m: 	weight_decay: 0.07558590647321581


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.6291,2.596981,1.0,0.0,0.0
2,2.1012,1.832845,1.0,0.0,0.0
3,1.6506,1.493016,1.0,0.0,0.0
4,1.3604,1.238799,0.206522,0.0837,0.119122
5,1.1432,1.059617,0.201102,0.321586,0.247458
6,0.996,0.953157,0.246719,0.414097,0.309211
7,0.9137,0.897525,0.251082,0.511013,0.33672
8,0.8645,0.87809,0.265909,0.515419,0.350825


{'eval_loss': 0.8110065460205078, 'eval_precision': 0.2925, 'eval_recall': 0.5652173913043478, 'eval_f1': 0.38550247116968694, 'eval_runtime': 0.185, 'eval_samples_per_second': 162.167, 'eval_steps_per_second': 10.811, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.344828  0.740741  0.470588     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.341463  0.933333  0.500000     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.180180  0.800000  0.294118     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.714286  0.769231  0.740741     13.0
PropertyType            1.000000  0.730769  0.844444     26.0
UrgencyScore            0.000000  0.000000  0.000000     20.0
micro avg               0.292500  0.565217  0.385502    207.0
macro avg               0.198520  0.305698  0.219222    207.0
weighted

0,1
eval/f1,▁▁▁▃▅▇▇▇▇█
eval/loss,█▅▄▃▂▂▁▁▁▁
eval/precision,███▁▁▁▁▂▂▂
eval/recall,▁▁▁▂▅▆▇▇▇█
eval/runtime,▁▂▅▂▁▂▂▁█▂
eval/samples_per_second,█▇▄▇█▇▇█▁▇
eval/steps_per_second,█▇▄▇█▇▇█▁▇
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.3855
eval/loss,0.81101
eval/precision,0.2925
eval/recall,0.56522
eval/runtime,0.185
eval/samples_per_second,162.167
eval/steps_per_second,10.811
test/f1,0.3855
test/loss,0.81101
test/precision,0.2925


[34m[1mwandb[0m: Agent Starting Run: 861bv6g6 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 1.741207676352433e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.06725039771374533
[34m[1mwandb[0m: 	weight_decay: 0.1261615595594546


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.3383,2.040728,1.0,0.0,0.0
2,1.7121,1.460508,1.0,0.0,0.0
3,1.2685,1.097407,0.219178,0.352423,0.27027
4,0.9943,0.873049,0.23301,0.528634,0.32345
5,0.8057,0.735092,0.219931,0.563877,0.31644
6,0.6774,0.668961,0.176768,0.61674,0.274779
7,0.6162,0.614482,0.218182,0.634361,0.32469
8,0.5585,0.590163,0.250423,0.651982,0.361858
9,0.528,0.573696,0.247961,0.669604,0.361905
10,0.5112,0.570864,0.268166,0.682819,0.385093


{'eval_loss': 0.4890672564506531, 'eval_precision': 0.2901353965183752, 'eval_recall': 0.7246376811594203, 'eval_f1': 0.4143646408839779, 'eval_runtime': 0.1916, 'eval_samples_per_second': 156.575, 'eval_steps_per_second': 20.877, 'epoch': 10.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.375000  0.666667  0.480000     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.439394  0.966667  0.604167     30.0
Industry                0.500000  0.416667  0.454545     12.0
LastName                0.350000  0.875000  0.500000      8.0
MustHaves               0.210256  0.820000  0.334694     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.166667  0.333333  0.222222      3.0
PreferredNeighborhood   0.705882  0.923077  0.800000     13.0
PropertyType            0.571429  0.923077  0.705882     26.0
SpaceSize               0.000000  0.000000  0.000000      0.0
UrgencyScore            0.127451  0.650000  0.213115     20.0
micro avg               0.290135  0.724638  0.414365    207.0
macro av

0,1
eval/f1,▁▁▆▆▆▆▆▇▇███
eval/loss,█▅▄▃▂▂▂▁▁▁▁▁
eval/precision,██▁▁▁▁▁▂▂▂▂▂
eval/recall,▁▁▄▆▆▇▇▇▇███
eval/runtime,▃▃▆▄█▃▇▄▃▃▃▁
eval/samples_per_second,▆▆▃▅▁▆▂▅▆▆▆█
eval/steps_per_second,▆▆▃▅▁▆▂▅▆▆▆█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.41436
eval/loss,0.48907
eval/precision,0.29014
eval/recall,0.72464
eval/runtime,0.1916
eval/samples_per_second,156.575
eval/steps_per_second,20.877
test/f1,0.41436
test/loss,0.48907
test/precision,0.29014


[34m[1mwandb[0m: Agent Starting Run: quxwt2ao with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 1.967440547470033e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.07281068244594793
[34m[1mwandb[0m: 	weight_decay: 0.08952780520589279


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.9236,3.165771,0.0,0.0,0.0
2,2.427,1.920173,1.0,0.0,0.0
3,1.731,1.583752,1.0,0.0,0.0
4,1.475,1.375938,0.0,0.0,0.0
5,1.2997,1.21352,0.081081,0.013216,0.022727
6,1.1625,1.100694,0.289593,0.281938,0.285714
7,1.0683,1.042227,0.277955,0.38326,0.322222
8,1.0313,1.021936,0.277439,0.400881,0.327928


{'eval_loss': 0.9544509649276733, 'eval_precision': 0.3104575163398693, 'eval_recall': 0.45893719806763283, 'eval_f1': 0.37037037037037035, 'eval_runtime': 0.1896, 'eval_samples_per_second': 158.224, 'eval_steps_per_second': 10.548, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.301587  0.703704  0.422222     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.465517  0.900000  0.613636     30.0
Industry                0.000000  0.000000  0.000000     12.0
LastName                0.000000  0.000000  0.000000      8.0
MustHaves               0.184049  0.600000  0.281690     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.000000  0.000000  0.000000     13.0
PropertyType            0.904762  0.730769  0.808511     26.0
UrgencyScore            0.000000  0.000000  0.000000     20.0
micro avg               0.310458  0.458937  0.370370    207.0
macro avg               0.142763  0.225729  0.163543    207.0
weighted

0,1
eval/f1,▁▁▁▁▁▆▇▇▇█
eval/loss,█▄▃▂▂▁▁▁▁▁
eval/precision,▁██▁▂▃▃▃▃▃
eval/recall,▁▁▁▁▁▅▇▇▇█
eval/runtime,▁▂▁▂▁▃▃▂█▃
eval/samples_per_second,█▇█▇█▆▆▇▁▆
eval/steps_per_second,█▇█▇█▆▆▇▁▆
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.37037
eval/loss,0.95445
eval/precision,0.31046
eval/recall,0.45894
eval/runtime,0.1896
eval/samples_per_second,158.224
eval/steps_per_second,10.548
test/f1,0.37037
test/loss,0.95445
test/precision,0.31046


[34m[1mwandb[0m: Agent Starting Run: efu5t3kx with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 3.1841023640322293e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.08704576742821075
[34m[1mwandb[0m: 	weight_decay: 0.1414413257070451


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.376,1.85101,1.0,0.0,0.0
2,1.4595,1.108562,0.284404,0.273128,0.278652
3,0.9049,0.751103,0.292343,0.555066,0.382979
4,0.642,0.601071,0.204735,0.647577,0.311111
5,0.5054,0.544071,0.284387,0.674009,0.4
6,0.4186,0.494446,0.285965,0.718062,0.409034
7,0.3767,0.486507,0.294333,0.709251,0.416021
8,0.3518,0.481988,0.305085,0.713656,0.427441


{'eval_loss': 0.38571587204933167, 'eval_precision': 0.3142292490118577, 'eval_recall': 0.7681159420289855, 'eval_f1': 0.44600280504908835, 'eval_runtime': 0.1928, 'eval_samples_per_second': 155.563, 'eval_steps_per_second': 20.742, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.511628  0.814815  0.628571     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.430769  0.933333  0.589474     30.0
Industry                0.333333  0.750000  0.461538     12.0
LastName                0.350000  0.875000  0.500000      8.0
MustHaves               0.245614  0.840000  0.380090     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.230769  1.000000  0.375000      3.0
PreferredNeighborhood   0.800000  0.923077  0.857143     13.0
PropertyType            0.615385  0.923077  0.738462     26.0
SpaceSize               0.000000  0.000000  0.000000      0.0
UrgencyScore            0.115385  0.600000  0.193548     20.0
micro avg               0.314229  0.768116  0.446003    207.0
macro av

0,1
eval/f1,▁▅▇▆▇▇████
eval/loss,█▄▃▂▂▂▁▁▁▁
eval/precision,█▂▂▁▂▂▂▂▂▂
eval/recall,▁▃▆▇▇█▇███
eval/runtime,▄▇▄▅▂▂█▅▃▁
eval/samples_per_second,▅▂▅▄▇▇▁▄▆█
eval/steps_per_second,▅▂▅▄▇▇▁▄▆█
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.446
eval/loss,0.38572
eval/precision,0.31423
eval/recall,0.76812
eval/runtime,0.1928
eval/samples_per_second,155.563
eval/steps_per_second,20.742
test/f1,0.446
test/loss,0.38572
test/precision,0.31423


[34m[1mwandb[0m: Agent Starting Run: kqvm5hkr with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-06
[34m[1mwandb[0m: 	learning_rate: 2.1456444840473287e-05
[34m[1mwandb[0m: 	num_train_epochs: 8
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	warmup_ratio: 0.15652278094238398
[34m[1mwandb[0m: 	weight_decay: 0.16988406436552506


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.9589,2.885399,0.0,0.0,0.0
2,1.9819,1.495308,1.0,0.0,0.0
3,1.2497,1.02364,0.322097,0.378855,0.348178
4,0.8923,0.788931,0.207607,0.577093,0.305361
5,0.7096,0.673348,0.214171,0.585903,0.313679
6,0.5967,0.614782,0.254833,0.638767,0.364322
7,0.5413,0.587006,0.265455,0.643172,0.375804
8,0.5102,0.57605,0.262877,0.651982,0.374684


{'eval_loss': 0.49636584520339966, 'eval_precision': 0.28846153846153844, 'eval_recall': 0.7246376811594203, 'eval_f1': 0.41265474552957354, 'eval_runtime': 0.1988, 'eval_samples_per_second': 150.871, 'eval_steps_per_second': 20.116, 'epoch': 8.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.425532  0.740741  0.540541     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.000000  0.000000  0.000000      6.0
FirstName               0.394366  0.933333  0.554455     30.0
Industry                0.454545  0.416667  0.434783     12.0
LastName                0.350000  0.875000  0.500000      8.0
MustHaves               0.223404  0.840000  0.352941     50.0
NiceToHaves             0.000000  0.000000  0.000000     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.000000  0.000000  0.000000      3.0
PreferredNeighborhood   0.857143  0.923077  0.888889     13.0
PropertyType            0.500000  0.923077  0.648649     26.0
SpaceSize               0.000000  0.000000  0.000000      0.0
UrgencyScore            0.114286  0.600000  0.192000     20.0
micro avg               0.288462  0.724638  0.412655    207.0
macro av

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Structured Field-Level Metrics: {'CompanyName_precision': np.float64(0.4255), 'CompanyName_recall': np.float64(0.7407), 'CompanyName_f1': np.float64(0.5405), 'CurrentNeighborhood_precision': np.float64(0.0), 'CurrentNeighborhood_recall': np.float64(0.0), 'CurrentNeighborhood_f1': np.float64(0.0), 'DecisionMakerRole_precision': np.float64(0.0), 'DecisionMakerRole_recall': np.float64(0.0), 'DecisionMakerRole_f1': np.float64(0.0), 'FirstName_precision': np.float64(0.3944), 'FirstName_recall': np.float64(0.9333), 'FirstName_f1': np.float64(0.5545), 'Industry_precision': np.float64(0.4545), 'Industry_recall': np.float64(0.4167), 'Industry_f1': np.float64(0.4348), 'LastName_precision': np.float64(0.35), 'LastName_recall': np.float64(0.875), 'LastName_f1': np.float64(0.5), 'MustHaves_precision': np.float64(0.2234), 'MustHaves_recall': np.float64(0.84), 'MustHaves_f1': np.float64(0.3529), 'NiceToHaves_precision': np.float64(0.0), 'NiceToHaves_recall': np.float64(0.0), 'NiceToHaves_f1': np.floa

0,1
eval/f1,▁▁▇▆▆▇▇▇▇█
eval/loss,█▄▃▂▂▁▁▁▁▁
eval/precision,▁█▃▂▂▃▃▃▃▃
eval/recall,▁▁▅▇▇▇▇▇▇█
eval/runtime,▁▁▂▂▃▂▂█▇▃
eval/samples_per_second,██▇▇▆▇▇▁▂▆
eval/steps_per_second,██▇▇▆▇▇▁▂▆
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.41265
eval/loss,0.49637
eval/precision,0.28846
eval/recall,0.72464
eval/runtime,0.1988
eval/samples_per_second,150.871
eval/steps_per_second,20.116
test/f1,0.41265
test/loss,0.49637
test/precision,0.28846


[34m[1mwandb[0m: Agent Starting Run: ftq0b935 with config:
[34m[1mwandb[0m: 	adam_epsilon: 1e-08
[34m[1mwandb[0m: 	learning_rate: 3.490888792906094e-05
[34m[1mwandb[0m: 	num_train_epochs: 10
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	seed: 2024
[34m[1mwandb[0m: 	warmup_ratio: 0.11172000568333944
[34m[1mwandb[0m: 	weight_decay: 0.07853602858613586


Some weights of WeightedDistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,3.6188,2.009308,1.0,0.0,0.0
2,1.5391,1.128656,0.304348,0.123348,0.175549
3,0.9014,0.720431,0.21797,0.577093,0.316425
4,0.6062,0.558827,0.224368,0.665198,0.335556
5,0.4501,0.486472,0.260664,0.726872,0.383721
6,0.3575,0.446833,0.265941,0.753304,0.393103
7,0.3102,0.465991,0.356347,0.704846,0.473373
8,0.2684,0.439707,0.343434,0.748899,0.470914
9,0.2407,0.446318,0.345912,0.726872,0.46875
10,0.2306,0.442867,0.351579,0.735683,0.475783


{'eval_loss': 0.354266494512558, 'eval_precision': 0.3876146788990826, 'eval_recall': 0.8164251207729468, 'eval_f1': 0.5256609642301712, 'eval_runtime': 0.196, 'eval_samples_per_second': 153.061, 'eval_steps_per_second': 20.408, 'epoch': 10.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                       precision    recall  f1-score  support
CompanyName             0.609756  0.925926  0.735294     27.0
CurrentNeighborhood     0.000000  0.000000  0.000000      1.0
DecisionMakerRole       0.666667  0.666667  0.666667      6.0
FirstName               0.491228  0.933333  0.643678     30.0
Industry                0.478261  0.916667  0.628571     12.0
LastName                0.350000  0.875000  0.500000      8.0
MustHaves               0.307692  0.800000  0.444444     50.0
NiceToHaves             0.285714  0.200000  0.235294     10.0
PainPoint               0.000000  0.000000  0.000000      1.0
PreferredLeaseTerm      0.230769  1.000000  0.375000      3.0
PreferredNeighborhood   0.800000  0.923077  0.857143     13.0
PropertyType            0.585366  0.923077  0.716418     26.0
SpaceSize               0.000000  0.000000  0.000000      0.0
UrgencyScore            0.162500  0.650000  0.260000     20.0
micro avg               0.387615  0.816425  0.525661    207.0
macro av

0,1
eval/f1,▁▃▅▅▆▆▇▇▇▇▇█
eval/loss,█▄▃▂▂▁▁▁▁▁▁▁
eval/precision,█▂▁▁▁▁▂▂▂▂▂▃
eval/recall,▁▂▆▇▇▇▇▇▇▇▇█
eval/runtime,▁▁▃▂▃▂▁▂▁▅█▂
eval/samples_per_second,██▅▇▆▇█▇█▄▁▇
eval/steps_per_second,██▅▇▆▇█▇█▄▁▇
test/f1,▁
test/loss,▁
test/precision,▁

0,1
eval/f1,0.52566
eval/loss,0.35427
eval/precision,0.38761
eval/recall,0.81643
eval/runtime,0.196
eval/samples_per_second,153.061
eval/steps_per_second,20.408
test/f1,0.52566
test/loss,0.35427
test/precision,0.38761


Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x7e04f4aa2110>> (for post_run_cell):


BrokenPipeError: [Errno 32] Broken pipe

In [21]:
'''
Predict and Evaluate results on DistilBERT model without training
'''

def run_base_model():
    print("Running base DistilBERT model without training...")

    with wandb.init(project="distilbert-final", name="distilbert_base", reinit=True) as run:

        # Load pre-trained base model
        base_model = DistilBertForTokenClassification.from_pretrained(
            "distilbert-base-uncased",
            num_labels=len(ENTITY_TAGS),
            id2label=id2label,
            label2id=label2id
        ).to("cuda" if torch.cuda.is_available() else "cpu")

        # Setup trainer
        eval_args = TrainingArguments(
            output_dir="./base_model_results",
            per_device_eval_batch_size=8,
            report_to="wandb",
            logging_dir="./logs",
            seed=42
        )

        # Initiate trainer
        trainer = Trainer(
            model=base_model,
            args=eval_args,
            eval_dataset=test_data,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics
        )

        # Evaluate trainer
        trainer.evaluate()
        metrics = trainer.evaluate(test_data)
        print(metrics)
        df, entity_metrics = evaluate_model(trainer, test_data, id2label, tokenizer)
        print(df)
        wandb_table = wandb.Table(dataframe=df)
        wandb.log({f"{run.name} Predictions": wandb_table})

        entity_metrics = get_entity_metrics(entity_metrics)
        wandb.log({**{f"test_{k}": float(v) for k, v in entity_metrics.items()}})

        # Save all predictions to Google Drive
        pred_path = os.path.join(output_dir, f"{wandb.run.name}_predictions.json")
        df.to_json(pred_path, orient="records", indent=2)
        print(f"Predictions saved to: {pred_path}")

#run_base_model()

Error in callback <bound method _WandbInit._pre_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x7e04f4aa2110>> (for pre_run_cell):


BrokenPipeError: [Errno 32] Broken pipe

Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x7e04f4aa2110>> (for post_run_cell):


BrokenPipeError: [Errno 32] Broken pipe