<a href="https://colab.research.google.com/github/bksgupta/made-wit-ml-changes/blob/main/mlflow_run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from collections import Counter, OrderedDict
import ipywidgets as widgets
import itertools
import json
import pandas as pd
from urllib.request import urlopen

In [2]:
# Load projects
url = "https://raw.githubusercontent.com/GokuMohandas/MadeWithML/main/datasets/projects.json"
projects = json.loads(urlopen(url).read())
print (json.dumps(projects[-305], indent=2))

{
  "id": 2106,
  "created_on": "2020-08-08 15:06:18",
  "title": "Fast NST for Videos (+ person segmentation) \ud83c\udfa5 + \u26a1\ud83d\udcbb + \ud83c\udfa8 = \u2764\ufe0f",
  "description": "Create NST videos and pick separate styles for the person in the video and for the background.",
  "tags": [
    "code",
    "tutorial",
    "video",
    "computer-vision",
    "style-transfer",
    "neural-style-transfer"
  ]
}


In [3]:
# Load tags
url = "https://raw.githubusercontent.com/GokuMohandas/MadeWithML/main/datasets/tags.json"
tags = json.loads(urlopen(url).read())
tags_dict = {}
for item in tags:
    key = item.pop("tag")
    tags_dict[key] = item
print (f"{len(tags_dict)} tags")

400 tags


In [4]:
# Create dataframe
df = pd.DataFrame(projects)
print (f"{len(df)} projects")
df.head(5)

2032 projects


Unnamed: 0,id,created_on,title,description,tags
0,1,2020-02-17 06:30:41,Machine Learning Basics,A practical set of notebooks on machine learni...,"[code, tutorial, keras, pytorch, tensorflow, d..."
1,2,2020-02-17 06:41:45,Deep Learning with Electronic Health Record (E...,A comprehensive look at recent machine learnin...,"[article, tutorial, deep-learning, health, ehr]"
2,3,2020-02-20 06:07:59,Automatic Parking Management using computer vi...,Detecting empty and parked spaces in car parki...,"[code, tutorial, video, python, machine-learni..."
3,4,2020-02-20 06:21:57,Easy street parking using region proposal netw...,Get a text on your phone whenever a nearby par...,"[code, tutorial, python, pytorch, machine-lear..."
4,5,2020-02-20 06:29:18,Deep Learning based parking management system ...,Fastai provides easy to use wrappers to quickl...,"[code, tutorial, fastai, deep-learning, parkin..."


In [5]:
df["text"] = df.title + " " + df.description

In [6]:
def filter(l, include=[], exclude=[]):
    """Filter a list using inclusion and exclusion lists of items."""
    filtered = [item for item in l if item in include and item not in exclude]
    return filtered

In [7]:
# Inclusion/exclusion criteria for tags
include = list(tags_dict.keys())
exclude = ["machine-learning", "deep-learning",  "data-science",
           "neural-networks", "python", "r", "visualization", "wandb"]

In [8]:
# Filter tags for each project
df.tags = df.tags.apply(filter, include=include, exclude=exclude)
tags = Counter(itertools.chain.from_iterable(df.tags.values))

In [9]:
# Filter tags that have fewer than <min_tag_freq> occurrences
min_tag_freq = 30
tags_above_freq = Counter(tag for tag in tags.elements() 
                          if tags[tag] >= min_tag_freq)
df.tags = df.tags.apply(filter, include=list(tags_above_freq.keys()))

In [10]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import re

In [11]:
# Remove projects with no more remaining relevant tags
df = df[df.tags.map(len) > 0]
print (f"{len(df)} projects")

1439 projects


In [12]:
nltk.download('stopwords')
STOPWORDS = stopwords.words('english')
porter = PorterStemmer()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [13]:
def preprocess(text, lower=True, stem=False, 
               filters="[!\"'#$%&()*\+,-./:;<=>?@\\\[\]^_`{|}~]", 
               stopwords=STOPWORDS):
    """Conditional preprocessing on our text unique to our task."""
    # Lower
    if lower: 
        text = text.lower()

    # Remove stopwords
    pattern = re.compile(r'\b(' + r'|'.join(stopwords) + r')\b\s*')
    text = pattern.sub('', text)

    # Spacing and filters
    text = re.sub(r"([-;;.,!?<=>])", r" \1 ", text)
    text = re.sub(filters, r"", text)
    text = re.sub('[^A-Za-z0-9]+', ' ', text) # remove non alphanumeric chars
    text = re.sub(' +', ' ', text)  # remove multiple spaces
    text = text.strip()

    # Remove links
    text = re.sub(r'http\S+', '', text)

    # Stemming
    if stem:
        text = " ".join([porter.stem(word) for word in text.split(' ')])

    return text

In [14]:
# Apply to dataframe
original_df = df.copy()
df.text = df.text.apply(preprocess, lower=True, stem=False)
print (f"{original_df.text.values[0]}\n{df.text.values[0]}")

Machine Learning Basics A practical set of notebooks on machine learning basics, implemented in both TF2.0 + Keras and PyTorch.
machine learning basics practical set notebooks machine learning basics implemented tf2 0 keras pytorch


In [15]:
all_tags = list(itertools.chain.from_iterable(df.tags.values))


In [16]:
import numpy as np
import random

In [17]:
# Set seeds for reproducibility
seed = 42
np.random.seed(seed)
random.seed(seed)

In [18]:
df = df.sample(frac=1).reset_index(drop=True)

In [19]:
# Get data
X = df.text.to_numpy()
y = df.tags

In [20]:
class LabelEncoder(object):
    """Label encoder for tag labels."""
    def __init__(self, class_to_index={}):
        self.class_to_index = class_to_index
        self.index_to_class = {v: k for k, v in self.class_to_index.items()}
        self.classes = list(self.class_to_index.keys())

    def __len__(self):
        return len(self.class_to_index)

    def __str__(self):
        return f"<LabelEncoder(num_classes={len(self)})>"

    def fit(self, y):
        classes = np.unique(list(itertools.chain.from_iterable(y)))
        for i, class_ in enumerate(classes):
            self.class_to_index[class_] = i
        self.index_to_class = {v: k for k, v in self.class_to_index.items()}
        self.classes = list(self.class_to_index.keys())
        return self

    def encode(self, y):
        y_one_hot = np.zeros((len(y), len(self.class_to_index)), dtype=int)
        for i, item in enumerate(y):
            for class_ in item:
                y_one_hot[i][self.class_to_index[class_]] = 1
        return y_one_hot

    def decode(self, y):
        classes = []
        for i, item in enumerate(y):
            indices = np.where(item == 1)[0]
            classes.append([self.index_to_class[index] for index in indices])
        return classes

    def save(self, fp):
        with open(fp, 'w') as fp:
            contents = {'class_to_index': self.class_to_index}
            json.dump(contents, fp, indent=4, sort_keys=False)

    @classmethod
    def load(cls, fp):
        with open(fp, 'r') as fp:
            kwargs = json.load(fp=fp)
        return cls(**kwargs)

In [21]:
!pip install scikit-multilearn==0.2.0 -q

[?25l[K     |███▊                            | 10kB 13.9MB/s eta 0:00:01[K     |███████▍                        | 20kB 20.4MB/s eta 0:00:01[K     |███████████                     | 30kB 17.6MB/s eta 0:00:01[K     |██████████████▊                 | 40kB 15.0MB/s eta 0:00:01[K     |██████████████████▍             | 51kB 8.6MB/s eta 0:00:01[K     |██████████████████████          | 61kB 8.9MB/s eta 0:00:01[K     |█████████████████████████▊      | 71kB 8.6MB/s eta 0:00:01[K     |█████████████████████████████▍  | 81kB 9.5MB/s eta 0:00:01[K     |████████████████████████████████| 92kB 6.2MB/s 
[?25h

In [22]:
from sklearn.model_selection import train_test_split
from skmultilearn.model_selection.measures import get_combination_wise_output_matrix

In [23]:
# Split sizes
train_size = 0.7
val_size = 0.15
test_size = 0.15

In [24]:
from skmultilearn.model_selection import IterativeStratification

In [25]:
def iterative_train_test_split(X, y, train_size):
    """Custom iterative train test split which 
    'maintains balanced representation with respect 
    to order-th label combinations.'
    """
    stratifier = IterativeStratification(
        n_splits=2, order=1, sample_distribution_per_fold=[1.0-train_size, train_size, ])
    train_indices, test_indices = next(stratifier.split(X, y))
    X_train, y_train = X[train_indices], y[train_indices]
    X_test, y_test = X[test_indices], y[test_indices]
    return X_train, X_test, y_train, y_test

In [26]:
from sklearn.metrics import precision_recall_fscore_support
import torch

In [27]:
def set_seeds(seed=1234):
    """Set seeds for reproducibility."""
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # multi-GPU

In [28]:
def get_data_splits(df, train_size=0.7):
    """"""
    # Get data
    X = df.text.to_numpy()
    y = df.tags

    # Binarize y
    label_encoder = LabelEncoder()
    label_encoder.fit(y)
    y = label_encoder.encode(y)

    # Split
    X_train, X_, y_train, y_ = iterative_train_test_split(
        X, y, train_size=train_size)
    X_val, X_test, y_val, y_test = iterative_train_test_split(
        X_, y_, train_size=0.5)
    
    return X_train, X_val, X_test, y_train, y_val, y_test, label_encoder

In [29]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

In [30]:
set_seeds()

In [31]:
class Tokenizer(object):
    def __init__(self, char_level, num_tokens=None, 
                 pad_token="<PAD>", oov_token="<UNK>",
                 token_to_index=None):
        self.char_level = char_level
        self.separator = '' if self.char_level else ' '
        if num_tokens: num_tokens -= 2 # pad + unk tokens
        self.num_tokens = num_tokens
        self.pad_token = pad_token
        self.oov_token = oov_token
        if not token_to_index:
            token_to_index = {pad_token: 0, oov_token: 1}
        self.token_to_index = token_to_index
        self.index_to_token = {v: k for k, v in self.token_to_index.items()}

    def __len__(self):
        return len(self.token_to_index)

    def __str__(self):
        return f"<Tokenizer(num_tokens={len(self)})>"

    def fit_on_texts(self, texts):
        if not self.char_level:
            texts = [text.split(" ") for text in texts]
        all_tokens = [token for text in texts for token in text]
        counts = Counter(all_tokens).most_common(self.num_tokens)
        self.min_token_freq = counts[-1][1]
        for token, count in counts:
            index = len(self)
            self.token_to_index[token] = index
            self.index_to_token[index] = token
        return self

    def texts_to_sequences(self, texts):
        sequences = []
        for text in texts:
            if not self.char_level:
                text = text.split(' ')
            sequence = []
            for token in text:
                sequence.append(self.token_to_index.get(
                    token, self.token_to_index[self.oov_token]))
            sequences.append(np.asarray(sequence))
        return sequences

    def sequences_to_texts(self, sequences):
        texts = []
        for sequence in sequences:
            text = []
            for index in sequence:
                text.append(self.index_to_token.get(index, self.oov_token))
            texts.append(self.separator.join([token for token in text]))
        return texts

    def save(self, fp):
        with open(fp, "w") as fp:
            contents = {
                "char_level": self.char_level,
                "oov_token": self.oov_token,
                "token_to_index": self.token_to_index
            }
            json.dump(contents, fp, indent=4, sort_keys=False)

    @classmethod
    def load(cls, fp):
        with open(fp, "r") as fp:
            kwargs = json.load(fp=fp)
        return cls(**kwargs)

In [32]:
def pad_sequences(sequences, max_seq_len=0):
    """Pad sequences to max length in sequence."""
    max_seq_len = max(max_seq_len, max(len(sequence) for sequence in sequences))
    padded_sequences = np.zeros((len(sequences), max_seq_len))
    for i, sequence in enumerate(sequences):
        padded_sequences[i][:len(sequence)] = sequence
    return padded_sequences

In [33]:
class CNNTextDataset(torch.utils.data.Dataset):
    def __init__(self, X, y, max_filter_size):
        self.X = X
        self.y = y
        self.max_filter_size = max_filter_size

    def __len__(self):
        return len(self.y)

    def __str__(self):
        return f"<Dataset(N={len(self)})>"

    def __getitem__(self, index):
        X = self.X[index]
        y = self.y[index]
        return [X, y]

    def collate_fn(self, batch):
        """Processing on a batch."""
        # Get inputs
        batch = np.array(batch, dtype=object)
        X = batch[:, 0]
        y = np.stack(batch[:, 1], axis=0)

        # Pad inputs
        X = pad_sequences(sequences=X, max_seq_len=self.max_filter_size)

        # Cast
        X = torch.LongTensor(X.astype(np.int32))
        y = torch.FloatTensor(y.astype(np.int32))

        return X, y

    def create_dataloader(self, batch_size, shuffle=False, drop_last=False):
        return torch.utils.data.DataLoader(
            dataset=self,
            batch_size=batch_size,
            collate_fn=self.collate_fn,
            shuffle=shuffle,
            drop_last=drop_last,
            pin_memory=True)

In [34]:
# Arguments
embedding_dim = 128
num_filters = 128
hidden_dim = 128
dropout_p = 0.5

In [35]:
class CNN(nn.Module):
    def __init__(self, embedding_dim, vocab_size, num_filters, filter_sizes,
                 hidden_dim, dropout_p, num_classes, padding_idx=0):
        super(CNN, self).__init__()

        # Initialize embeddings
        self.embeddings = nn.Embedding(
                embedding_dim=embedding_dim, num_embeddings=vocab_size,
                padding_idx=padding_idx)

        # Conv weights
        self.filter_sizes = filter_sizes
        self.conv = nn.ModuleList(
            [nn.Conv1d(in_channels=embedding_dim,
                       out_channels=num_filters,
                       kernel_size=f) for f in filter_sizes])

        # FC weights
        self.dropout = nn.Dropout(dropout_p)
        self.fc1 = nn.Linear(num_filters*len(filter_sizes), hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, inputs, channel_first=False):

        # Embed
        x_in, = inputs
        x_in = self.embeddings(x_in)
        if not channel_first:
            x_in = x_in.transpose(1, 2)  # (N, channels, sequence length)

        z = []
        max_seq_len = x_in.shape[2]
        for i, f in enumerate(self.filter_sizes):

            # `SAME` padding
            padding_left = int(
                (self.conv[i].stride[0]*(max_seq_len-1) - max_seq_len + self.filter_sizes[i])/2)
            padding_right = int(math.ceil(
                (self.conv[i].stride[0]*(max_seq_len-1) - max_seq_len + self.filter_sizes[i])/2))

            # Conv
            _z = self.conv[i](F.pad(x_in, (padding_left, padding_right)))

            # Pool
            _z = F.max_pool1d(_z, _z.size(2)).squeeze(2)
            z.append(_z)

        # Concat outputs
        z = torch.cat(z, 1)

        # FC
        z = self.fc1(z)
        z = self.dropout(z)
        z = self.fc2(z)

        return z

In [36]:
from pathlib import Path
from sklearn.metrics import precision_recall_curve

In [37]:
# Determining the best threshold
def find_best_threshold(y_true, y_prob):
    """Find the best threshold for maximum F1."""
    precisions, recalls, thresholds = precision_recall_curve(y_true, y_prob)
    f1s = (2 * precisions * recalls) / (precisions + recalls)
    return thresholds[np.argmax(f1s)]

In [38]:
!pip install mlflow pyngrok -q

[K     |████████████████████████████████| 14.2MB 204kB/s 
[K     |████████████████████████████████| 747kB 44.2MB/s 
[K     |████████████████████████████████| 645kB 46.6MB/s 
[K     |████████████████████████████████| 153kB 56.3MB/s 
[K     |████████████████████████████████| 1.1MB 52.0MB/s 
[K     |████████████████████████████████| 174kB 56.1MB/s 
[K     |████████████████████████████████| 61kB 10.6MB/s 
[K     |████████████████████████████████| 81kB 13.2MB/s 
[K     |████████████████████████████████| 71kB 11.9MB/s 
[K     |████████████████████████████████| 81kB 13.4MB/s 
[K     |████████████████████████████████| 71kB 12.0MB/s 
[?25h  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
  Building wheel for alembic (setup.py) ... [?25l[?25hdone
  Building wheel for prometheus-flask-exporter (setup.py) ... [?25l[?25hdone
  Building wheel for databricks-cli (setup.py) ... [?25l[?25hdone


In [39]:
from argparse import Namespace
import mlflow
from pathlib import Path

In [40]:
# Specify arguments
args = Namespace(
    char_level=True,
    filter_sizes=list(range(1, 11)),
    batch_size=64,
    embedding_dim=128, 
    num_filters=128,
    hidden_dim=128, 
    dropout_p=0.5,
    lr=2e-4,
    num_epochs=200,
    patience=10,
)

In [41]:
# Set tracking URI
MODEL_REGISTRY = Path("experiments")
Path(MODEL_REGISTRY).mkdir(exist_ok=True) # create experiments dir
mlflow.set_tracking_uri("file://" + str(MODEL_REGISTRY.absolute()))

In [42]:
# Trainer (modified for experiment tracking)
class Trainer(object):
    def __init__(self, model, device, loss_fn=None, 
                 optimizer=None, scheduler=None):

        # Set params
        self.model = model
        self.device = device
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.scheduler = scheduler

    def train_step(self, dataloader):
        """Train step."""
        # Set model to train mode
        self.model.train()
        loss = 0.0

        # Iterate over train batches
        for i, batch in enumerate(dataloader):
            # Step
            batch = [item.to(self.device) for item in batch]
            inputs, targets = batch[:-1], batch[-1]
            self.optimizer.zero_grad()  # Reset gradients
            z = self.model(inputs)  # Forward pass
            J = self.loss_fn(z, targets)  # Define loss
            J.backward()  # Backward pass
            self.optimizer.step()  # Update weights

            # Cumulative Metrics
            loss += (J.detach().item() - loss) / (i + 1)

        return loss

    def eval_step(self, dataloader):
        """Validation or test step."""
        # Set model to eval mode
        self.model.eval()
        loss = 0.0
        y_trues, y_probs = [], []

        # Iterate over val batches
        with torch.no_grad():
            for i, batch in enumerate(dataloader):

                # Step
                batch = [item.to(self.device) for item in batch]  # Set device
                inputs, y_true = batch[:-1], batch[-1]
                z = self.model(inputs)  # Forward pass
                J = self.loss_fn(z, y_true).item()

                # Cumulative Metrics
                loss += (J - loss) / (i + 1)

                # Store outputs
                y_prob = torch.sigmoid(z).cpu().numpy()
                y_probs.extend(y_prob)
                y_trues.extend(y_true.cpu().numpy())

        return loss, np.vstack(y_trues), np.vstack(y_probs)

    def predict_step(self, dataloader):
        """Prediction step."""
        # Set model to eval mode
        self.model.eval()
        y_probs = []

        # Iterate over val batches
        with torch.no_grad():
            for i, batch in enumerate(dataloader):

                # Forward pass w/ inputs
                inputs, targets = batch[:-1], batch[-1]
                y_prob = self.model(inputs)

                # Store outputs
                y_probs.extend(y_prob)

        return np.vstack(y_probs)
    
    def train(self, num_epochs, patience, train_dataloader, val_dataloader):
        best_val_loss = np.inf
        for epoch in range(num_epochs):
            # Steps
            train_loss = self.train_step(dataloader=train_dataloader)
            val_loss, _, _ = self.eval_step(dataloader=val_dataloader)
            self.scheduler.step(val_loss)

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model = self.model
                _patience = patience  # reset _patience
            else:
                _patience -= 1
            if not _patience:  # 0
                print("Stopping early!")
                break

            # Tracking
            mlflow.log_metrics(
                {"train_loss": train_loss, "val_loss": val_loss}, step=epoch
            )

            # Logging
            print(
                f"Epoch: {epoch+1} | "
                f"train_loss: {train_loss:.5f}, "
                f"val_loss: {val_loss:.5f}, "
                f"lr: {self.optimizer.param_groups[0]['lr']:.2E}, "
                f"_patience: {_patience}"
            )

        return best_model, best_val_loss

In [43]:
def train_cnn(args, df):
    """Train a CNN using specific arguments."""

    # Set seeds
    set_seeds()

    # Get data splits
    preprocessed_df = df.copy()
    preprocessed_df.text = preprocessed_df.text.apply(preprocess, lower=True)
    X_train, X_val, X_test, y_train, y_val, y_test, label_encoder = get_data_splits(preprocessed_df)
    num_classes = len(label_encoder)

    # Set device
    cuda = True
    device = torch.device("cuda" if (
        torch.cuda.is_available() and cuda) else "cpu")
    torch.set_default_tensor_type("torch.FloatTensor")
    if device.type == "cuda":
        torch.set_default_tensor_type("torch.cuda.FloatTensor")

    # Tokenize
    tokenizer = Tokenizer(char_level=args.char_level)
    tokenizer.fit_on_texts(texts=X_train)
    vocab_size = len(tokenizer)

    # Convert texts to sequences of indices
    X_train = np.array(tokenizer.texts_to_sequences(X_train))
    X_val = np.array(tokenizer.texts_to_sequences(X_val))
    X_test = np.array(tokenizer.texts_to_sequences(X_test))

    # Class weights
    counts = np.bincount([label_encoder.class_to_index[class_] for class_ in all_tags])
    class_weights = {i: 1.0/count for i, count in enumerate(counts)}

    # Create datasets
    train_dataset = CNNTextDataset(
        X=X_train, y=y_train, max_filter_size=max(args.filter_sizes))
    val_dataset = CNNTextDataset(
        X=X_val, y=y_val, max_filter_size=max(args.filter_sizes))
    test_dataset = CNNTextDataset(
        X=X_test, y=y_test, max_filter_size=max(args.filter_sizes))

    # Create dataloaders
    train_dataloader = train_dataset.create_dataloader(
        batch_size=args.batch_size)
    val_dataloader = val_dataset.create_dataloader(
        batch_size=args.batch_size)
    test_dataloader = test_dataset.create_dataloader(
        batch_size=args.batch_size)

    # Initialize model
    model = CNN(
        embedding_dim=args.embedding_dim, vocab_size=vocab_size,
        num_filters=args.num_filters, filter_sizes=args.filter_sizes,
        hidden_dim=args.hidden_dim, dropout_p=args.dropout_p,
        num_classes=num_classes)
    model = model.to(device)

    # Define loss
    class_weights_tensor = torch.Tensor(np.array(list(class_weights.values())))
    loss_fn = nn.BCEWithLogitsLoss(weight=class_weights_tensor)

    # Define optimizer & scheduler
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="min", factor=0.1, patience=5)

    # Trainer module
    trainer = Trainer(
        model=model, device=device, loss_fn=loss_fn, 
        optimizer=optimizer, scheduler=scheduler)

    # Train
    best_model, best_val_loss = trainer.train(
        args.num_epochs, args.patience, train_dataloader, val_dataloader)

    # Best threshold for f1
    train_loss, y_true, y_prob = trainer.eval_step(dataloader=train_dataloader)
    precisions, recalls, thresholds = precision_recall_curve(y_true.ravel(), y_prob.ravel())
    threshold = find_best_threshold(y_true.ravel(), y_prob.ravel())

    # Determine predictions using threshold
    test_loss, y_true, y_prob = trainer.eval_step(dataloader=test_dataloader)
    y_pred = np.array([np.where(prob >= threshold, 1, 0) for prob in y_prob])

    # Evaluate (simple)
    metrics = precision_recall_fscore_support(y_test, y_pred, average="weighted")
    performance = {"precision": metrics[0], "recall": metrics[1], "f1": metrics[2]}
    threshold_dict={"thresh": str(threshold)}
    return {
        "args": args,
        "tokenizer": tokenizer,
        "label_encoder": label_encoder,
        "model": best_model,
        "performance": performance,
        "best_val_loss": best_val_loss,
        "threshold": threshold_dict,
    }

In [44]:
import tempfile

In [45]:
# Set experiment
mlflow.set_experiment(experiment_name="baselines")

INFO: 'baselines' does not exist. Creating a new experiment


In [46]:
def save_dict(d, filepath):
    """Save dict to a json file."""
    with open(filepath, "w") as fp:
        json.dump(d, indent=2, sort_keys=False, fp=fp)

In [47]:
# Tracking
with mlflow.start_run(run_name="cnn") as run:

    # Train & evaluate
    artifacts = train_cnn(args=args, df=df)    
    
    # Log key metrics
    #mlflow.log_metrics({"precision": artifacts["performance"]["overall"]["precision"]})
    mlflow.log_metrics({"precision": artifacts["performance"]["precision"]})
    #mlflow.log_metrics({"recall": artifacts["performance"]["overall"]["recall"]})
    mlflow.log_metrics({"recall": artifacts["performance"]["recall"]})
    #mlflow.log_metrics({"f1": artifacts["performance"]["overall"]["f1"]})
    mlflow.log_metrics({"f1": artifacts["performance"]["f1"]})

    # Log artifacts
    with tempfile.TemporaryDirectory() as dp:
        artifacts["tokenizer"].save(Path(dp, "tokenizer.json"))
        artifacts["label_encoder"].save(Path(dp, "label_encoder.json"))
        torch.save(artifacts["model"].state_dict(), Path(dp, "model.pt"))
        save_dict(artifacts["performance"], Path(dp, "performance.json"))
        save_dict(artifacts["threshold"]["thresh"], Path(dp, "threshold.json"))
        mlflow.log_artifacts(dp)

    # Log parameters
    mlflow.log_params(vars(artifacts["args"]))

  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch: 1 | train_loss: 0.00533, val_loss: 0.00307, lr: 2.00E-04, _patience: 10
Epoch: 2 | train_loss: 0.00385, val_loss: 0.00287, lr: 2.00E-04, _patience: 10
Epoch: 3 | train_loss: 0.00347, val_loss: 0.00269, lr: 2.00E-04, _patience: 10
Epoch: 4 | train_loss: 0.00324, val_loss: 0.00265, lr: 2.00E-04, _patience: 10
Epoch: 5 | train_loss: 0.00307, val_loss: 0.00259, lr: 2.00E-04, _patience: 10
Epoch: 6 | train_loss: 0.00297, val_loss: 0.00253, lr: 2.00E-04, _patience: 10
Epoch: 7 | train_loss: 0.00285, val_loss: 0.00247, lr: 2.00E-04, _patience: 10
Epoch: 8 | train_loss: 0.00273, val_loss: 0.00240, lr: 2.00E-04, _patience: 10
Epoch: 9 | train_loss: 0.00263, val_loss: 0.00233, lr: 2.00E-04, _patience: 10
Epoch: 10 | train_loss: 0.00249, val_loss: 0.00224, lr: 2.00E-04, _patience: 10
Epoch: 11 | train_loss: 0.00238, val_loss: 0.00216, lr: 2.00E-04, _patience: 10
Epoch: 12 | train_loss: 0.00229, val_loss: 0.00211, lr: 2.00E-04, _patience: 10
Epoch: 13 | train_loss: 0.00218, val_loss: 0.0020

  _warn_prf(average, modifier, msg_start, len(result))


In [48]:
from pyngrok import ngrok

In [49]:
# https://stackoverflow.com/questions/61615818/setting-up-mlflow-on-google-colab
get_ipython().system_raw("mlflow server -h 0.0.0.0 -p 5000 --backend-store-uri $PWD/experiments/ &")
ngrok.kill()
ngrok.set_auth_token("")
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

MLflow Tracking UI: https://7cc201a91b7a.ngrok.io


In [50]:
def load_dict(filepath):
    """Load a dict from a json file."""
    with open(filepath, "r") as fp:
        d = json.load(fp)
    return d

In [51]:
# Load all runs from experiment
experiment_id = mlflow.get_experiment_by_name("baselines").experiment_id
all_runs = mlflow.search_runs(experiment_ids=experiment_id, order_by=["metrics.best_val_loss ASC"])
print (all_runs)

                             run_id  ... tags.mlflow.runName
0  c4afb73c0b504fabb18433f1718ac727  ...                 cnn

[1 rows x 25 columns]


In [52]:
# Best run
device = torch.device("cpu")
best_run_id = all_runs.iloc[0].run_id
best_run = mlflow.get_run(run_id=best_run_id)
client = mlflow.tracking.MlflowClient()
with tempfile.TemporaryDirectory() as dp:
    client.download_artifacts(run_id=best_run_id, path="", dst_path=dp)
    tokenizer = Tokenizer.load(fp=Path(dp, "tokenizer.json"))
    label_encoder = LabelEncoder.load(fp=Path(dp, "label_encoder.json"))
    model_state = torch.load(Path(dp, "model.pt"), map_location=device)
    performance = load_dict(filepath=Path(dp, "performance.json"))
    threshold = load_dict(filepath=Path(dp, "threshold.json"))

In [53]:
print (json.dumps(performance, indent=2))

{
  "precision": 0.7885779643550789,
  "recall": 0.5708154506437768,
  "f1": 0.6391557453268072
}


In [54]:
# Load artifacts
device = torch.device("cpu")
model = CNN(
    embedding_dim=args.embedding_dim, vocab_size=len(tokenizer),
    num_filters=args.num_filters, filter_sizes=args.filter_sizes,
    hidden_dim=args.hidden_dim, dropout_p=args.dropout_p,
    num_classes=len(label_encoder))
model.load_state_dict(model_state)
model.to(device)

CNN(
  (embeddings): Embedding(39, 128, padding_idx=0)
  (conv): ModuleList(
    (0): Conv1d(128, 128, kernel_size=(1,), stride=(1,))
    (1): Conv1d(128, 128, kernel_size=(2,), stride=(1,))
    (2): Conv1d(128, 128, kernel_size=(3,), stride=(1,))
    (3): Conv1d(128, 128, kernel_size=(4,), stride=(1,))
    (4): Conv1d(128, 128, kernel_size=(5,), stride=(1,))
    (5): Conv1d(128, 128, kernel_size=(6,), stride=(1,))
    (6): Conv1d(128, 128, kernel_size=(7,), stride=(1,))
    (7): Conv1d(128, 128, kernel_size=(8,), stride=(1,))
    (8): Conv1d(128, 128, kernel_size=(9,), stride=(1,))
    (9): Conv1d(128, 128, kernel_size=(10,), stride=(1,))
  )
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=1280, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=34, bias=True)
)

In [55]:
trainer = Trainer(model=model, device=device)

In [56]:
# Dataloader
text = "Transfer learning with BERT for self-supervised learning"
X = np.array(tokenizer.texts_to_sequences([preprocess(text)]))
y_filler = label_encoder.encode([np.array([label_encoder.classes[0]]*len(X))])
dataset = CNNTextDataset(
    X=X, y=y_filler, max_filter_size=max(args.filter_sizes))
dataloader = dataset.create_dataloader(
    batch_size=args.batch_size)

In [57]:
print (json.dumps(threshold, indent=2))

"0.27691326"


In [58]:
import decimal
threshold_val = json.loads(threshold, parse_float = decimal.Decimal)

In [59]:
threshold_val

Decimal('0.27691326')

In [60]:
if 0.0279 > threshold_val:
  print ('he')

In [61]:
# Inference
y_prob = trainer.predict_step(dataloader)
y_pred = np.array([np.where(prob >= threshold_val, 1, 0) for prob in y_prob])
label_encoder.decode(y_pred)

[['natural-language-processing',
  'self-supervised-learning',
  'transfer-learning',
  'transformers']]

In [62]:
!pip install optuna numpyencoder -q

[K     |████████████████████████████████| 307kB 6.6MB/s 
[K     |████████████████████████████████| 81kB 12.7MB/s 
[K     |████████████████████████████████| 143kB 55.7MB/s 
[K     |████████████████████████████████| 51kB 8.9MB/s 
[K     |████████████████████████████████| 112kB 59.7MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [63]:
import optuna

In [64]:
from argparse import Namespace

In [65]:
# Specify arguments
args = Namespace(
    char_level=True,
    filter_sizes=list(range(1, 11)),
    batch_size=64,
    embedding_dim=128, 
    num_filters=128,
    hidden_dim=128, 
    dropout_p=0.5,
    lr=2e-4,
    num_epochs=200,
    patience=10,
)

In [66]:
# Trainer (modified for experiment tracking)
class Trainer(object):
    def __init__(self, model, device, loss_fn=None, 
                 optimizer=None, scheduler=None, trial=None):

        # Set params
        self.model = model
        self.device = device
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.trial = trial

    def train_step(self, dataloader):
        """Train step."""
        # Set model to train mode
        self.model.train()
        loss = 0.0

        # Iterate over train batches
        for i, batch in enumerate(dataloader):
            # Step
            batch = [item.to(self.device) for item in batch]
            inputs, targets = batch[:-1], batch[-1]
            self.optimizer.zero_grad()  # Reset gradients
            z = self.model(inputs)  # Forward pass
            J = self.loss_fn(z, targets)  # Define loss
            J.backward()  # Backward pass
            self.optimizer.step()  # Update weights

            # Cumulative Metrics
            loss += (J.detach().item() - loss) / (i + 1)

        return loss

    def eval_step(self, dataloader):
        """Validation or test step."""
        # Set model to eval mode
        self.model.eval()
        loss = 0.0
        y_trues, y_probs = [], []

        # Iterate over val batches
        with torch.no_grad():
            for i, batch in enumerate(dataloader):

                # Step
                batch = [item.to(self.device) for item in batch]  # Set device
                inputs, y_true = batch[:-1], batch[-1]
                z = self.model(inputs)  # Forward pass
                J = self.loss_fn(z, y_true).item()

                # Cumulative Metrics
                loss += (J - loss) / (i + 1)

                # Store outputs
                y_prob = torch.sigmoid(z).cpu().numpy()
                y_probs.extend(y_prob)
                y_trues.extend(y_true.cpu().numpy())

        return loss, np.vstack(y_trues), np.vstack(y_probs)

    def predict_step(self, dataloader):
        """Prediction step."""
        # Set model to eval mode
        self.model.eval()
        y_probs = []

        # Iterate over val batches
        with torch.no_grad():
            for i, batch in enumerate(dataloader):

                # Forward pass w/ inputs
                inputs, targets = batch[:-1], batch[-1]
                y_prob = self.model(inputs)

                # Store outputs
                y_probs.extend(y_prob)

        return np.vstack(y_probs)
    
    def train(self, num_epochs, patience, train_dataloader, val_dataloader):
        best_val_loss = np.inf
        for epoch in range(num_epochs):
            # Steps
            train_loss = self.train_step(dataloader=train_dataloader)
            val_loss, _, _ = self.eval_step(dataloader=val_dataloader)
            self.scheduler.step(val_loss)

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model = self.model
                _patience = patience  # reset _patience
            else:
                _patience -= 1
            if not _patience:  # 0
                print("Stopping early!")
                break

            # Logging
            print(
                f"Epoch: {epoch+1} | "
                f"train_loss: {train_loss:.5f}, "
                f"val_loss: {val_loss:.5f}, "
                f"lr: {self.optimizer.param_groups[0]['lr']:.2E}, "
                f"_patience: {_patience}"
            )

            # Pruning based on the intermediate value
            self.trial.report(val_loss, epoch)
            if self.trial.should_prune():
                raise optuna.TrialPruned()
                    
        return best_model, best_val_loss

In [67]:
def train_cnn(args, df, trial=None):
    """Train a CNN using specific arguments."""

    # Set seeds
    set_seeds()

    # Get data splits
    preprocessed_df = df.copy()
    preprocessed_df.text = preprocessed_df.text.apply(preprocess, lower=True)
    X_train, X_val, X_test, y_train, y_val, y_test, label_encoder = get_data_splits(preprocessed_df)
    num_classes = len(label_encoder)

    # Set device
    cuda = True
    device = torch.device("cuda" if (
        torch.cuda.is_available() and cuda) else "cpu")
    torch.set_default_tensor_type("torch.FloatTensor")
    if device.type == "cuda":
        torch.set_default_tensor_type("torch.cuda.FloatTensor")

    # Tokenize
    tokenizer = Tokenizer(char_level=args.char_level)
    tokenizer.fit_on_texts(texts=X_train)
    vocab_size = len(tokenizer)

    # Convert texts to sequences of indices
    X_train = np.array(tokenizer.texts_to_sequences(X_train))
    X_val = np.array(tokenizer.texts_to_sequences(X_val))
    X_test = np.array(tokenizer.texts_to_sequences(X_test))

    # Class weights
    counts = np.bincount([label_encoder.class_to_index[class_] for class_ in all_tags])
    class_weights = {i: 1.0/count for i, count in enumerate(counts)}

    # Create datasets
    train_dataset = CNNTextDataset(
        X=X_train, y=y_train, max_filter_size=max(args.filter_sizes))
    val_dataset = CNNTextDataset(
        X=X_val, y=y_val, max_filter_size=max(args.filter_sizes))
    test_dataset = CNNTextDataset(
        X=X_test, y=y_test, max_filter_size=max(args.filter_sizes))

    # Create dataloaders
    train_dataloader = train_dataset.create_dataloader(
        batch_size=args.batch_size)
    val_dataloader = val_dataset.create_dataloader(
        batch_size=args.batch_size)
    test_dataloader = test_dataset.create_dataloader(
        batch_size=args.batch_size)

    # Initialize model
    model = CNN(
        embedding_dim=args.embedding_dim, vocab_size=vocab_size,
        num_filters=args.num_filters, filter_sizes=args.filter_sizes,
        hidden_dim=args.hidden_dim, dropout_p=args.dropout_p,
        num_classes=num_classes)
    model = model.to(device)

    # Define loss
    class_weights_tensor = torch.Tensor(np.array(list(class_weights.values())))
    loss_fn = nn.BCEWithLogitsLoss(weight=class_weights_tensor)

    # Define optimizer & scheduler
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.1, patience=5)

    # Trainer module
    trainer = Trainer(
        model=model, device=device, loss_fn=loss_fn, 
        optimizer=optimizer, scheduler=scheduler, trial=trial)

    # Train
    best_model, best_val_loss = trainer.train(
        args.num_epochs, args.patience, train_dataloader, val_dataloader)

    # Best threshold for f1
    train_loss, y_true, y_prob = trainer.eval_step(dataloader=train_dataloader)
    precisions, recalls, thresholds = precision_recall_curve(y_true.ravel(), y_prob.ravel())
    threshold = find_best_threshold(y_true.ravel(), y_prob.ravel())

    # Determine predictions using threshold
    test_loss, y_true, y_prob = trainer.eval_step(dataloader=test_dataloader)
    y_pred = np.array([np.where(prob >= threshold, 1, 0) for prob in y_prob])

    # Evaluate (simple)
    metrics = precision_recall_fscore_support(y_test, y_pred, average="weighted")
    performance = {"precision": metrics[0], "recall": metrics[1], "f1": metrics[2]}

    return {
        "args": args,
        "tokenizer": tokenizer,
        "label_encoder": label_encoder,
        "model": best_model,
        "performance": performance,
        "best_val_loss": best_val_loss,
        "threshold": threshold,
    }

In [68]:
def objective(trial, args):
    """Objective function for optimization trials."""

    # Paramters (to tune)
    args.embedding_dim = trial.suggest_int("embedding_dim", 128, 512)
    args.num_filters = trial.suggest_int("num_filters", 128, 512)
    args.hidden_dim = trial.suggest_int("hidden_dim", 128, 512)
    args.dropout_p = trial.suggest_uniform("dropout_p", 0.3, 0.8)
    args.lr = trial.suggest_loguniform("lr", 5e-5, 5e-4)

    # Train & evaluate
    artifacts = train_cnn(args=args, df=df, trial=trial)

    # Set additional attributes
    trial.set_user_attr("precision", artifacts["performance"]["precision"])
    trial.set_user_attr("recall", artifacts["performance"]["recall"])
    trial.set_user_attr("f1", artifacts["performance"]["f1"])
    trial.set_user_attr("threshold", artifacts["threshold"])

    return artifacts["performance"]["f1"]

In [69]:
from numpyencoder import NumpyEncoder
from optuna.integration.mlflow import MLflowCallback

In [70]:
NUM_TRIALS = 50 # small sample for now

In [71]:
# Optimize
pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=5)
study = optuna.create_study(study_name="optimization", direction="maximize", pruner=pruner)
mlflow_callback = MLflowCallback(
    tracking_uri=mlflow.get_tracking_uri(), metric_name="f1")
study.optimize(lambda trial: objective(trial, args),
               n_trials=NUM_TRIALS,
               callbacks=[mlflow_callback])

[32m[I 2021-07-10 21:21:25,233][0m A new study created in memory with name: optimization[0m

MLflowCallback is experimental (supported from v1.4.0). The interface can change in the future.


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray



Epoch: 1 | train_loss: 0.00472, val_loss: 0.00348, lr: 1.26E-04, _patience: 10
Epoch: 2 | train_loss: 0.00362, val_loss: 0.00268, lr: 1.26E-04, _patience: 10
Epoch: 3 | train_loss: 0.00318, val_loss: 0.00258, lr: 1.26E-04, _patience: 10
Epoch: 4 | train_loss: 0.00298, val_loss: 0.00250, lr: 1.26E-04, _patience: 10
Epoch: 5 | train_loss: 0.00277, val_loss: 0.00238, lr: 1.26E-04, _patience: 10
Epoch: 6 | train_loss: 0.00263, val_loss: 0.00230, lr: 1.26E-04, _patience: 10
Epoch: 7 | train_loss: 0.00245, val_loss: 0.00219, lr: 1.26E-04, _patience: 10
Epoch: 8 | train_loss: 0.00230, val_loss: 0.00210, lr: 1.26E-04, _patience: 10
Epoch: 9 | train_loss: 0.00217, val_loss: 0.00199, lr: 1.26E-04, _patience: 10
Epoch: 10 | train_loss: 0.00202, val_loss: 0.00191, lr: 1.26E-04, _patience: 10
Epoch: 11 | train_loss: 0.00188, val_loss: 0.00185, lr: 1.26E-04, _patience: 10
Epoch: 12 | train_loss: 0.00178, val_loss: 0.00181, lr: 1.26E-04, _patience: 10
Epoch: 13 | train_loss: 0.00171, val_loss: 0.0017


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:22:39,265][0m Trial 0 finished with value: 0.6628035246740483 and parameters: {'embedding_dim': 225, 'num_filters': 287, 'hidden_dim': 240, 'dropout_p': 0.5093488542487961, 'lr': 0.00012613377731725462}. Best is trial 0 with value: 0.6628035246740483.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or

INFO: 'optimization' does not exist. Creating a new experiment
Epoch: 1 | train_loss: 0.00542, val_loss: 0.00346, lr: 7.75E-05, _patience: 10
Epoch: 2 | train_loss: 0.00420, val_loss: 0.00269, lr: 7.75E-05, _patience: 10
Epoch: 3 | train_loss: 0.00358, val_loss: 0.00255, lr: 7.75E-05, _patience: 10
Epoch: 4 | train_loss: 0.00330, val_loss: 0.00247, lr: 7.75E-05, _patience: 10
Epoch: 5 | train_loss: 0.00306, val_loss: 0.00240, lr: 7.75E-05, _patience: 10
Epoch: 6 | train_loss: 0.00287, val_loss: 0.00230, lr: 7.75E-05, _patience: 10
Epoch: 7 | train_loss: 0.00272, val_loss: 0.00220, lr: 7.75E-05, _patience: 10
Epoch: 8 | train_loss: 0.00257, val_loss: 0.00212, lr: 7.75E-05, _patience: 10
Epoch: 9 | train_loss: 0.00246, val_loss: 0.00208, lr: 7.75E-05, _patience: 10
Epoch: 10 | train_loss: 0.00230, val_loss: 0.00199, lr: 7.75E-05, _patience: 10
Epoch: 11 | train_loss: 0.00219, val_loss: 0.00191, lr: 7.75E-05, _patience: 10
Epoch: 12 | train_loss: 0.00202, val_loss: 0.00188, lr: 7.75E-05, 


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:26:16,005][0m Trial 1 finished with value: 0.6700979721048231 and parameters: {'embedding_dim': 489, 'num_filters': 454, 'hidden_dim': 347, 'dropout_p': 0.7225605841843792, 'lr': 7.750844913180659e-05}. Best is trial 1 with value: 0.6700979721048231.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-

Epoch: 1 | train_loss: 0.00471, val_loss: 0.00338, lr: 6.66E-05, _patience: 10
Epoch: 2 | train_loss: 0.00356, val_loss: 0.00278, lr: 6.66E-05, _patience: 10
Epoch: 3 | train_loss: 0.00306, val_loss: 0.00260, lr: 6.66E-05, _patience: 10
Epoch: 4 | train_loss: 0.00282, val_loss: 0.00252, lr: 6.66E-05, _patience: 10
Epoch: 5 | train_loss: 0.00273, val_loss: 0.00242, lr: 6.66E-05, _patience: 10
Epoch: 6 | train_loss: 0.00255, val_loss: 0.00234, lr: 6.66E-05, _patience: 10
Epoch: 7 | train_loss: 0.00243, val_loss: 0.00224, lr: 6.66E-05, _patience: 10
Epoch: 8 | train_loss: 0.00228, val_loss: 0.00215, lr: 6.66E-05, _patience: 10
Epoch: 9 | train_loss: 0.00216, val_loss: 0.00208, lr: 6.66E-05, _patience: 10
Epoch: 10 | train_loss: 0.00205, val_loss: 0.00198, lr: 6.66E-05, _patience: 10
Epoch: 11 | train_loss: 0.00193, val_loss: 0.00192, lr: 6.66E-05, _patience: 10
Epoch: 12 | train_loss: 0.00181, val_loss: 0.00185, lr: 6.66E-05, _patience: 10
Epoch: 13 | train_loss: 0.00173, val_loss: 0.0017


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:28:51,920][0m Trial 2 finished with value: 0.6690563213214246 and parameters: {'embedding_dim': 433, 'num_filters': 315, 'hidden_dim': 480, 'dropout_p': 0.563389016090152, 'lr': 6.663875446941576e-05}. Best is trial 1 with value: 0.6700979721048231.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-t

Epoch: 1 | train_loss: 0.00497, val_loss: 0.00274, lr: 4.93E-04, _patience: 10
Epoch: 2 | train_loss: 0.00296, val_loss: 0.00236, lr: 4.93E-04, _patience: 10
Epoch: 3 | train_loss: 0.00224, val_loss: 0.00194, lr: 4.93E-04, _patience: 10
Epoch: 4 | train_loss: 0.00176, val_loss: 0.00176, lr: 4.93E-04, _patience: 10
Epoch: 5 | train_loss: 0.00142, val_loss: 0.00164, lr: 4.93E-04, _patience: 10
Epoch: 6 | train_loss: 0.00110, val_loss: 0.00157, lr: 4.93E-04, _patience: 10
Epoch: 7 | train_loss: 0.00085, val_loss: 0.00158, lr: 4.93E-04, _patience: 9
Epoch: 8 | train_loss: 0.00070, val_loss: 0.00163, lr: 4.93E-04, _patience: 8
Epoch: 9 | train_loss: 0.00055, val_loss: 0.00164, lr: 4.93E-04, _patience: 7
Epoch: 10 | train_loss: 0.00042, val_loss: 0.00160, lr: 4.93E-04, _patience: 6
Epoch: 11 | train_loss: 0.00036, val_loss: 0.00171, lr: 4.93E-04, _patience: 5
Epoch: 12 | train_loss: 0.00031, val_loss: 0.00170, lr: 4.93E-05, _patience: 4
Epoch: 13 | train_loss: 0.00026, val_loss: 0.00157, lr:


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:29:24,304][0m Trial 3 finished with value: 0.7006070644714198 and parameters: {'embedding_dim': 492, 'num_filters': 198, 'hidden_dim': 385, 'dropout_p': 0.3326167065482074, 'lr': 0.0004932086554846984}. Best is trial 3 with value: 0.7006070644714198.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-

Epoch: 1 | train_loss: 0.00537, val_loss: 0.00364, lr: 2.12E-04, _patience: 10
Epoch: 2 | train_loss: 0.00404, val_loss: 0.00271, lr: 2.12E-04, _patience: 10
Epoch: 3 | train_loss: 0.00349, val_loss: 0.00264, lr: 2.12E-04, _patience: 10
Epoch: 4 | train_loss: 0.00322, val_loss: 0.00257, lr: 2.12E-04, _patience: 10
Epoch: 5 | train_loss: 0.00305, val_loss: 0.00250, lr: 2.12E-04, _patience: 10
Epoch: 6 | train_loss: 0.00291, val_loss: 0.00243, lr: 2.12E-04, _patience: 10
Epoch: 7 | train_loss: 0.00274, val_loss: 0.00231, lr: 2.12E-04, _patience: 10
Epoch: 8 | train_loss: 0.00253, val_loss: 0.00223, lr: 2.12E-04, _patience: 10
Epoch: 9 | train_loss: 0.00240, val_loss: 0.00212, lr: 2.12E-04, _patience: 10
Epoch: 10 | train_loss: 0.00227, val_loss: 0.00204, lr: 2.12E-04, _patience: 10
Epoch: 11 | train_loss: 0.00215, val_loss: 0.00198, lr: 2.12E-04, _patience: 10
Epoch: 12 | train_loss: 0.00200, val_loss: 0.00191, lr: 2.12E-04, _patience: 10
Epoch: 13 | train_loss: 0.00193, val_loss: 0.0018


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:30:04,007][0m Trial 4 finished with value: 0.6604618456572443 and parameters: {'embedding_dim': 160, 'num_filters': 209, 'hidden_dim': 293, 'dropout_p': 0.7061580868550322, 'lr': 0.00021193970336840957}. Best is trial 3 with value: 0.7006070644714198.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or

Epoch: 1 | train_loss: 0.00558, val_loss: 0.00278, lr: 3.48E-04, _patience: 10
Epoch: 2 | train_loss: 0.00330, val_loss: 0.00251, lr: 3.48E-04, _patience: 10
Epoch: 3 | train_loss: 0.00253, val_loss: 0.00205, lr: 3.48E-04, _patience: 10
Epoch: 4 | train_loss: 0.00200, val_loss: 0.00180, lr: 3.48E-04, _patience: 10
Epoch: 5 | train_loss: 0.00166, val_loss: 0.00168, lr: 3.48E-04, _patience: 10
Epoch: 6 | train_loss: 0.00136, val_loss: 0.00159, lr: 3.48E-04, _patience: 10
Epoch: 7 | train_loss: 0.00112, val_loss: 0.00153, lr: 3.48E-04, _patience: 10
Epoch: 8 | train_loss: 0.00093, val_loss: 0.00151, lr: 3.48E-04, _patience: 10
Epoch: 9 | train_loss: 0.00075, val_loss: 0.00161, lr: 3.48E-04, _patience: 9
Epoch: 10 | train_loss: 0.00060, val_loss: 0.00160, lr: 3.48E-04, _patience: 8
Epoch: 11 | train_loss: 0.00052, val_loss: 0.00162, lr: 3.48E-04, _patience: 7
Epoch: 12 | train_loss: 0.00045, val_loss: 0.00157, lr: 3.48E-04, _patience: 6
Epoch: 13 | train_loss: 0.00037, val_loss: 0.00165, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:31:02,512][0m Trial 5 finished with value: 0.6762533306367564 and parameters: {'embedding_dim': 510, 'num_filters': 332, 'hidden_dim': 452, 'dropout_p': 0.493073445251808, 'lr': 0.0003482384669259982}. Best is trial 3 with value: 0.7006070644714198.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-t

Epoch: 1 | train_loss: 0.00523, val_loss: 0.00290, lr: 5.64E-05, _patience: 10
Epoch: 2 | train_loss: 0.00345, val_loss: 0.00287, lr: 5.64E-05, _patience: 10
Epoch: 3 | train_loss: 0.00321, val_loss: 0.00263, lr: 5.64E-05, _patience: 10
Epoch: 4 | train_loss: 0.00303, val_loss: 0.00255, lr: 5.64E-05, _patience: 10
Epoch: 5 | train_loss: 0.00289, val_loss: 0.00249, lr: 5.64E-05, _patience: 10
Epoch: 6 | train_loss: 0.00278, val_loss: 0.00242, lr: 5.64E-05, _patience: 10
Epoch: 7 | train_loss: 0.00266, val_loss: 0.00235, lr: 5.64E-05, _patience: 10
Epoch: 8 | train_loss: 0.00255, val_loss: 0.00228, lr: 5.64E-05, _patience: 10
Epoch: 9 | train_loss: 0.00242, val_loss: 0.00222, lr: 5.64E-05, _patience: 10
Epoch: 10 | train_loss: 0.00236, val_loss: 0.00216, lr: 5.64E-05, _patience: 10
Epoch: 11 | train_loss: 0.00226, val_loss: 0.00208, lr: 5.64E-05, _patience: 10
Epoch: 12 | train_loss: 0.00214, val_loss: 0.00203, lr: 5.64E-05, _patience: 10
Epoch: 13 | train_loss: 0.00205, val_loss: 0.0019


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:33:48,429][0m Trial 6 finished with value: 0.64163636445738 and parameters: {'embedding_dim': 364, 'num_filters': 350, 'hidden_dim': 131, 'dropout_p': 0.35375492179098506, 'lr': 5.643457276835367e-05}. Best is trial 3 with value: 0.7006070644714198.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-t

Epoch: 1 | train_loss: 0.00565, val_loss: 0.00349, lr: 4.02E-04, _patience: 10
Epoch: 2 | train_loss: 0.00395, val_loss: 0.00273, lr: 4.02E-04, _patience: 10
Epoch: 3 | train_loss: 0.00328, val_loss: 0.00266, lr: 4.02E-04, _patience: 10
Epoch: 4 | train_loss: 0.00307, val_loss: 0.00256, lr: 4.02E-04, _patience: 10
Epoch: 5 | train_loss: 0.00286, val_loss: 0.00244, lr: 4.02E-04, _patience: 10
Epoch: 6 | train_loss: 0.00269, val_loss: 0.00232, lr: 4.02E-04, _patience: 10
Epoch: 7 | train_loss: 0.00248, val_loss: 0.00216, lr: 4.02E-04, _patience: 10
Epoch: 8 | train_loss: 0.00224, val_loss: 0.00204, lr: 4.02E-04, _patience: 10
Epoch: 9 | train_loss: 0.00204, val_loss: 0.00192, lr: 4.02E-04, _patience: 10
Epoch: 10 | train_loss: 0.00191, val_loss: 0.00182, lr: 4.02E-04, _patience: 10
Epoch: 11 | train_loss: 0.00178, val_loss: 0.00175, lr: 4.02E-04, _patience: 10
Epoch: 12 | train_loss: 0.00166, val_loss: 0.00169, lr: 4.02E-04, _patience: 10
Epoch: 13 | train_loss: 0.00152, val_loss: 0.0016


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:34:10,730][0m Trial 7 finished with value: 0.6656754517992355 and parameters: {'embedding_dim': 135, 'num_filters': 152, 'hidden_dim': 468, 'dropout_p': 0.776901143629122, 'lr': 0.0004017664913919758}. Best is trial 3 with value: 0.7006070644714198.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-t

Epoch: 1 | train_loss: 0.00798, val_loss: 0.00338, lr: 2.61E-04, _patience: 10
Epoch: 2 | train_loss: 0.00472, val_loss: 0.00267, lr: 2.61E-04, _patience: 10
Epoch: 3 | train_loss: 0.00386, val_loss: 0.00255, lr: 2.61E-04, _patience: 10
Epoch: 4 | train_loss: 0.00342, val_loss: 0.00241, lr: 2.61E-04, _patience: 10
Epoch: 5 | train_loss: 0.00306, val_loss: 0.00229, lr: 2.61E-04, _patience: 10
Epoch: 6 | train_loss: 0.00289, val_loss: 0.00217, lr: 2.61E-04, _patience: 10
Epoch: 7 | train_loss: 0.00265, val_loss: 0.00204, lr: 2.61E-04, _patience: 10
Epoch: 8 | train_loss: 0.00248, val_loss: 0.00201, lr: 2.61E-04, _patience: 10
Epoch: 9 | train_loss: 0.00234, val_loss: 0.00189, lr: 2.61E-04, _patience: 10
Epoch: 10 | train_loss: 0.00215, val_loss: 0.00183, lr: 2.61E-04, _patience: 10
Epoch: 11 | train_loss: 0.00198, val_loss: 0.00183, lr: 2.61E-04, _patience: 9
Epoch: 12 | train_loss: 0.00184, val_loss: 0.00170, lr: 2.61E-04, _patience: 10
Epoch: 13 | train_loss: 0.00176, val_loss: 0.00169


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:35:38,746][0m Trial 8 finished with value: 0.6645311189425309 and parameters: {'embedding_dim': 258, 'num_filters': 502, 'hidden_dim': 273, 'dropout_p': 0.7895894577967083, 'lr': 0.0002610158435942419}. Best is trial 3 with value: 0.7006070644714198.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-

Epoch: 1 | train_loss: 0.00627, val_loss: 0.00331, lr: 3.07E-04, _patience: 10
Epoch: 2 | train_loss: 0.00399, val_loss: 0.00256, lr: 3.07E-04, _patience: 10
Epoch: 3 | train_loss: 0.00325, val_loss: 0.00235, lr: 3.07E-04, _patience: 10
Epoch: 4 | train_loss: 0.00278, val_loss: 0.00220, lr: 3.07E-04, _patience: 10
Epoch: 5 | train_loss: 0.00246, val_loss: 0.00197, lr: 3.07E-04, _patience: 10
Epoch: 6 | train_loss: 0.00219, val_loss: 0.00193, lr: 3.07E-04, _patience: 10
Epoch: 7 | train_loss: 0.00196, val_loss: 0.00177, lr: 3.07E-04, _patience: 10
Epoch: 8 | train_loss: 0.00178, val_loss: 0.00179, lr: 3.07E-04, _patience: 9
Epoch: 9 | train_loss: 0.00160, val_loss: 0.00169, lr: 3.07E-04, _patience: 10
Epoch: 10 | train_loss: 0.00149, val_loss: 0.00165, lr: 3.07E-04, _patience: 10
Epoch: 11 | train_loss: 0.00133, val_loss: 0.00171, lr: 3.07E-04, _patience: 9
Epoch: 12 | train_loss: 0.00125, val_loss: 0.00169, lr: 3.07E-04, _patience: 8
Epoch: 13 | train_loss: 0.00110, val_loss: 0.00159, 


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:36:47,677][0m Trial 9 finished with value: 0.66978151495889 and parameters: {'embedding_dim': 454, 'num_filters': 280, 'hidden_dim': 201, 'dropout_p': 0.6571828583575179, 'lr': 0.00030717147778186286}. Best is trial 3 with value: 0.7006070644714198.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-t

Epoch: 1 | train_loss: 0.00451, val_loss: 0.00327, lr: 1.35E-04, _patience: 10
Epoch: 2 | train_loss: 0.00318, val_loss: 0.00265, lr: 1.35E-04, _patience: 10
Epoch: 3 | train_loss: 0.00276, val_loss: 0.00254, lr: 1.35E-04, _patience: 10
Epoch: 4 | train_loss: 0.00260, val_loss: 0.00241, lr: 1.35E-04, _patience: 10
Epoch: 5 | train_loss: 0.00241, val_loss: 0.00226, lr: 1.35E-04, _patience: 10
Epoch: 6 | train_loss: 0.00220, val_loss: 0.00212, lr: 1.35E-04, _patience: 10
Epoch: 7 | train_loss: 0.00203, val_loss: 0.00200, lr: 1.35E-04, _patience: 10
Epoch: 8 | train_loss: 0.00184, val_loss: 0.00190, lr: 1.35E-04, _patience: 10
Epoch: 9 | train_loss: 0.00170, val_loss: 0.00180, lr: 1.35E-04, _patience: 10
Epoch: 10 | train_loss: 0.00157, val_loss: 0.00172, lr: 1.35E-04, _patience: 10
Epoch: 11 | train_loss: 0.00144, val_loss: 0.00168, lr: 1.35E-04, _patience: 10
Epoch: 12 | train_loss: 0.00136, val_loss: 0.00163, lr: 1.35E-04, _patience: 10
Epoch: 13 | train_loss: 0.00126, val_loss: 0.0015


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:37:36,799][0m Trial 10 finished with value: 0.667306498245895 and parameters: {'embedding_dim': 371, 'num_filters': 142, 'hidden_dim': 377, 'dropout_p': 0.3096948773038894, 'lr': 0.0001345063000806952}. Best is trial 3 with value: 0.7006070644714198.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-

Epoch: 1 | train_loss: 0.00629, val_loss: 0.00278, lr: 4.80E-04, _patience: 10
Epoch: 2 | train_loss: 0.00300, val_loss: 0.00233, lr: 4.80E-04, _patience: 10
Epoch: 3 | train_loss: 0.00227, val_loss: 0.00193, lr: 4.80E-04, _patience: 10
Epoch: 4 | train_loss: 0.00180, val_loss: 0.00181, lr: 4.80E-04, _patience: 10
Epoch: 5 | train_loss: 0.00141, val_loss: 0.00174, lr: 4.80E-04, _patience: 10
Epoch: 6 | train_loss: 0.00109, val_loss: 0.00164, lr: 4.80E-04, _patience: 10
Epoch: 7 | train_loss: 0.00084, val_loss: 0.00174, lr: 4.80E-04, _patience: 9
Epoch: 8 | train_loss: 0.00067, val_loss: 0.00164, lr: 4.80E-04, _patience: 8
Epoch: 9 | train_loss: 0.00050, val_loss: 0.00173, lr: 4.80E-04, _patience: 7
Epoch: 10 | train_loss: 0.00041, val_loss: 0.00164, lr: 4.80E-04, _patience: 10
Epoch: 11 | train_loss: 0.00038, val_loss: 0.00172, lr: 4.80E-04, _patience: 9
Epoch: 12 | train_loss: 0.00031, val_loss: 0.00183, lr: 4.80E-04, _patience: 8
Epoch: 13 | train_loss: 0.00027, val_loss: 0.00178, lr


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:38:49,496][0m Trial 11 finished with value: 0.7051560266298316 and parameters: {'embedding_dim': 492, 'num_filters': 397, 'hidden_dim': 420, 'dropout_p': 0.415083093686176, 'lr': 0.000479937827664147}. Best is trial 11 with value: 0.7051560266298316.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-

Epoch: 1 | train_loss: 0.00685, val_loss: 0.00295, lr: 4.77E-04, _patience: 10
Epoch: 2 | train_loss: 0.00322, val_loss: 0.00244, lr: 4.77E-04, _patience: 10
Epoch: 3 | train_loss: 0.00238, val_loss: 0.00198, lr: 4.77E-04, _patience: 10
Epoch: 4 | train_loss: 0.00187, val_loss: 0.00176, lr: 4.77E-04, _patience: 10
Epoch: 5 | train_loss: 0.00148, val_loss: 0.00162, lr: 4.77E-04, _patience: 10
Epoch: 6 | train_loss: 0.00115, val_loss: 0.00155, lr: 4.77E-04, _patience: 10
Epoch: 7 | train_loss: 0.00087, val_loss: 0.00156, lr: 4.77E-04, _patience: 9
Epoch: 8 | train_loss: 0.00069, val_loss: 0.00161, lr: 4.77E-04, _patience: 8
Epoch: 9 | train_loss: 0.00055, val_loss: 0.00161, lr: 4.77E-04, _patience: 7
Epoch: 10 | train_loss: 0.00043, val_loss: 0.00167, lr: 4.77E-04, _patience: 6
Epoch: 11 | train_loss: 0.00036, val_loss: 0.00187, lr: 4.77E-04, _patience: 5
Epoch: 12 | train_loss: 0.00032, val_loss: 0.00183, lr: 4.77E-05, _patience: 4
Epoch: 13 | train_loss: 0.00026, val_loss: 0.00160, lr:


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:39:51,556][0m Trial 12 finished with value: 0.6977403242123549 and parameters: {'embedding_dim': 511, 'num_filters': 414, 'hidden_dim': 411, 'dropout_p': 0.3991779992616886, 'lr': 0.0004773332201413353}. Best is trial 11 with value: 0.7051560266298316.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00514, val_loss: 0.00280, lr: 4.78E-04, _patience: 10
Epoch: 2 | train_loss: 0.00316, val_loss: 0.00245, lr: 4.78E-04, _patience: 10
Epoch: 3 | train_loss: 0.00242, val_loss: 0.00204, lr: 4.78E-04, _patience: 10
Epoch: 4 | train_loss: 0.00191, val_loss: 0.00182, lr: 4.78E-04, _patience: 10
Epoch: 5 | train_loss: 0.00155, val_loss: 0.00173, lr: 4.78E-04, _patience: 10
Epoch: 6 | train_loss: 0.00129, val_loss: 0.00163, lr: 4.78E-04, _patience: 10
Epoch: 7 | train_loss: 0.00104, val_loss: 0.00157, lr: 4.78E-04, _patience: 10
Epoch: 8 | train_loss: 0.00083, val_loss: 0.00154, lr: 4.78E-04, _patience: 10
Epoch: 9 | train_loss: 0.00067, val_loss: 0.00159, lr: 4.78E-04, _patience: 9
Epoch: 10 | train_loss: 0.00057, val_loss: 0.00168, lr: 4.78E-04, _patience: 8
Epoch: 11 | train_loss: 0.00047, val_loss: 0.00182, lr: 4.78E-04, _patience: 7
Epoch: 12 | train_loss: 0.00043, val_loss: 0.00171, lr: 4.78E-04, _patience: 6
Epoch: 13 | train_loss: 0.00034, val_loss: 0.00172, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:40:24,613][0m Trial 13 finished with value: 0.6791010151428308 and parameters: {'embedding_dim': 415, 'num_filters': 221, 'hidden_dim': 400, 'dropout_p': 0.41386450844400835, 'lr': 0.00047823584487243756}. Best is trial 11 with value: 0.7051560266298316.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists

Epoch: 1 | train_loss: 0.00487, val_loss: 0.00325, lr: 2.15E-04, _patience: 10
Epoch: 2 | train_loss: 0.00321, val_loss: 0.00255, lr: 2.15E-04, _patience: 10
Epoch: 3 | train_loss: 0.00259, val_loss: 0.00232, lr: 2.15E-04, _patience: 10
Epoch: 4 | train_loss: 0.00227, val_loss: 0.00205, lr: 2.15E-04, _patience: 10
Epoch: 5 | train_loss: 0.00193, val_loss: 0.00189, lr: 2.15E-04, _patience: 10
Epoch: 6 | train_loss: 0.00168, val_loss: 0.00182, lr: 2.15E-04, _patience: 10
Epoch: 7 | train_loss: 0.00147, val_loss: 0.00170, lr: 2.15E-04, _patience: 10
Epoch: 8 | train_loss: 0.00130, val_loss: 0.00166, lr: 2.15E-04, _patience: 10
Epoch: 9 | train_loss: 0.00113, val_loss: 0.00168, lr: 2.15E-04, _patience: 9
Epoch: 10 | train_loss: 0.00101, val_loss: 0.00165, lr: 2.15E-04, _patience: 10
Epoch: 11 | train_loss: 0.00090, val_loss: 0.00164, lr: 2.15E-04, _patience: 10
Epoch: 12 | train_loss: 0.00080, val_loss: 0.00161, lr: 2.15E-04, _patience: 10
Epoch: 13 | train_loss: 0.00072, val_loss: 0.00158


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:41:22,568][0m Trial 14 finished with value: 0.674090385172307 and parameters: {'embedding_dim': 296, 'num_filters': 395, 'hidden_dim': 338, 'dropout_p': 0.30679127370480475, 'lr': 0.0002150334075930382}. Best is trial 11 with value: 0.7051560266298316.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00738, val_loss: 0.00292, lr: 4.86E-04, _patience: 10
Epoch: 2 | train_loss: 0.00320, val_loss: 0.00244, lr: 4.86E-04, _patience: 10
Epoch: 3 | train_loss: 0.00243, val_loss: 0.00194, lr: 4.86E-04, _patience: 10
Epoch: 4 | train_loss: 0.00189, val_loss: 0.00173, lr: 4.86E-04, _patience: 10
Epoch: 5 | train_loss: 0.00149, val_loss: 0.00163, lr: 4.86E-04, _patience: 10
Epoch: 6 | train_loss: 0.00118, val_loss: 0.00155, lr: 4.86E-04, _patience: 10
Epoch: 7 | train_loss: 0.00089, val_loss: 0.00154, lr: 4.86E-04, _patience: 10
Epoch: 8 | train_loss: 0.00068, val_loss: 0.00151, lr: 4.86E-04, _patience: 10
Epoch: 9 | train_loss: 0.00051, val_loss: 0.00158, lr: 4.86E-04, _patience: 9
Epoch: 10 | train_loss: 0.00039, val_loss: 0.00158, lr: 4.86E-04, _patience: 8
Epoch: 11 | train_loss: 0.00033, val_loss: 0.00157, lr: 4.86E-04, _patience: 7
Epoch: 12 | train_loss: 0.00029, val_loss: 0.00168, lr: 4.86E-04, _patience: 6
Epoch: 13 | train_loss: 0.00025, val_loss: 0.00201, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:42:38,023][0m Trial 15 finished with value: 0.7081352359515858 and parameters: {'embedding_dim': 473, 'num_filters': 512, 'hidden_dim': 505, 'dropout_p': 0.4337075433076099, 'lr': 0.00048576973840205764}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-

Epoch: 1 | train_loss: 0.00456, val_loss: 0.00343, lr: 1.04E-04, _patience: 10
Epoch: 2 | train_loss: 0.00326, val_loss: 0.00262, lr: 1.04E-04, _patience: 10
Epoch: 3 | train_loss: 0.00278, val_loss: 0.00249, lr: 1.04E-04, _patience: 10
Epoch: 4 | train_loss: 0.00250, val_loss: 0.00227, lr: 1.04E-04, _patience: 10
Epoch: 5 | train_loss: 0.00224, val_loss: 0.00214, lr: 1.04E-04, _patience: 10
Epoch: 6 | train_loss: 0.00202, val_loss: 0.00201, lr: 1.04E-04, _patience: 10
Epoch: 7 | train_loss: 0.00182, val_loss: 0.00190, lr: 1.04E-04, _patience: 10
Epoch: 8 | train_loss: 0.00170, val_loss: 0.00181, lr: 1.04E-04, _patience: 10
Epoch: 9 | train_loss: 0.00155, val_loss: 0.00173, lr: 1.04E-04, _patience: 10
Epoch: 10 | train_loss: 0.00145, val_loss: 0.00170, lr: 1.04E-04, _patience: 10
Epoch: 11 | train_loss: 0.00133, val_loss: 0.00163, lr: 1.04E-04, _patience: 10
Epoch: 12 | train_loss: 0.00121, val_loss: 0.00162, lr: 1.04E-04, _patience: 10
Epoch: 13 | train_loss: 0.00110, val_loss: 0.0016


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:44:26,782][0m Trial 16 finished with value: 0.684089996546423 and parameters: {'embedding_dim': 366, 'num_filters': 509, 'hidden_dim': 509, 'dropout_p': 0.44575261184512505, 'lr': 0.0001039515268754127}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00539, val_loss: 0.00338, lr: 1.92E-04, _patience: 10
Epoch: 2 | train_loss: 0.00344, val_loss: 0.00249, lr: 1.92E-04, _patience: 10
Epoch: 3 | train_loss: 0.00274, val_loss: 0.00226, lr: 1.92E-04, _patience: 10
Epoch: 4 | train_loss: 0.00240, val_loss: 0.00202, lr: 1.92E-04, _patience: 10
Epoch: 5 | train_loss: 0.00207, val_loss: 0.00187, lr: 1.92E-04, _patience: 10
Epoch: 6 | train_loss: 0.00182, val_loss: 0.00179, lr: 1.92E-04, _patience: 10
Epoch: 7 | train_loss: 0.00160, val_loss: 0.00171, lr: 1.92E-04, _patience: 10
Epoch: 8 | train_loss: 0.00142, val_loss: 0.00161, lr: 1.92E-04, _patience: 10
Epoch: 9 | train_loss: 0.00122, val_loss: 0.00160, lr: 1.92E-04, _patience: 10
Epoch: 10 | train_loss: 0.00111, val_loss: 0.00156, lr: 1.92E-04, _patience: 10
Epoch: 11 | train_loss: 0.00099, val_loss: 0.00156, lr: 1.92E-04, _patience: 10
Epoch: 12 | train_loss: 0.00087, val_loss: 0.00153, lr: 1.92E-04, _patience: 10
Epoch: 13 | train_loss: 0.00076, val_loss: 0.0015


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:46:13,191][0m Trial 17 finished with value: 0.6799434219138696 and parameters: {'embedding_dim': 447, 'num_filters': 429, 'hidden_dim': 501, 'dropout_p': 0.5747323985837085, 'lr': 0.00019153228046257846}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-

Epoch: 1 | train_loss: 0.00672, val_loss: 0.00302, lr: 3.71E-04, _patience: 10
Epoch: 2 | train_loss: 0.00340, val_loss: 0.00254, lr: 3.71E-04, _patience: 10
Epoch: 3 | train_loss: 0.00259, val_loss: 0.00209, lr: 3.71E-04, _patience: 10
Epoch: 4 | train_loss: 0.00204, val_loss: 0.00182, lr: 3.71E-04, _patience: 10
Epoch: 5 | train_loss: 0.00170, val_loss: 0.00172, lr: 3.71E-04, _patience: 10
Epoch: 6 | train_loss: 0.00143, val_loss: 0.00169, lr: 3.71E-04, _patience: 10
Epoch: 7 | train_loss: 0.00118, val_loss: 0.00158, lr: 3.71E-04, _patience: 10
Epoch: 8 | train_loss: 0.00094, val_loss: 0.00156, lr: 3.71E-04, _patience: 10
Epoch: 9 | train_loss: 0.00075, val_loss: 0.00157, lr: 3.71E-04, _patience: 9
Epoch: 10 | train_loss: 0.00060, val_loss: 0.00166, lr: 3.71E-04, _patience: 8
Epoch: 11 | train_loss: 0.00051, val_loss: 0.00169, lr: 3.71E-04, _patience: 7
Epoch: 12 | train_loss: 0.00043, val_loss: 0.00168, lr: 3.71E-04, _patience: 6
Epoch: 13 | train_loss: 0.00037, val_loss: 0.00178, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:47:15,871][0m Trial 18 finished with value: 0.696878853681489 and parameters: {'embedding_dim': 395, 'num_filters': 471, 'hidden_dim': 442, 'dropout_p': 0.46056925992011705, 'lr': 0.0003706221648172483}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00524, val_loss: 0.00314, lr: 2.80E-04, _patience: 10
Epoch: 2 | train_loss: 0.00327, val_loss: 0.00252, lr: 2.80E-04, _patience: 10
Epoch: 3 | train_loss: 0.00259, val_loss: 0.00217, lr: 2.80E-04, _patience: 10
Epoch: 4 | train_loss: 0.00213, val_loss: 0.00189, lr: 2.80E-04, _patience: 10
Epoch: 5 | train_loss: 0.00176, val_loss: 0.00174, lr: 2.80E-04, _patience: 10
Epoch: 6 | train_loss: 0.00149, val_loss: 0.00167, lr: 2.80E-04, _patience: 10
Epoch: 7 | train_loss: 0.00127, val_loss: 0.00161, lr: 2.80E-04, _patience: 10
Epoch: 8 | train_loss: 0.00111, val_loss: 0.00157, lr: 2.80E-04, _patience: 10
Epoch: 9 | train_loss: 0.00093, val_loss: 0.00151, lr: 2.80E-04, _patience: 10
Epoch: 10 | train_loss: 0.00079, val_loss: 0.00158, lr: 2.80E-04, _patience: 9
Epoch: 11 | train_loss: 0.00067, val_loss: 0.00155, lr: 2.80E-04, _patience: 8
Epoch: 12 | train_loss: 0.00056, val_loss: 0.00160, lr: 2.80E-04, _patience: 7
Epoch: 13 | train_loss: 0.00051, val_loss: 0.00163, 


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:48:19,022][0m Trial 19 finished with value: 0.663958896663722 and parameters: {'embedding_dim': 320, 'num_filters': 376, 'hidden_dim': 433, 'dropout_p': 0.3699893550575423, 'lr': 0.00027960910907676303}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00797, val_loss: 0.00299, lr: 4.98E-04, _patience: 10
Epoch: 2 | train_loss: 0.00351, val_loss: 0.00259, lr: 4.98E-04, _patience: 10
Epoch: 3 | train_loss: 0.00268, val_loss: 0.00212, lr: 4.98E-04, _patience: 10
Epoch: 4 | train_loss: 0.00212, val_loss: 0.00182, lr: 4.98E-04, _patience: 10
Epoch: 5 | train_loss: 0.00170, val_loss: 0.00173, lr: 4.98E-04, _patience: 10
Epoch: 6 | train_loss: 0.00142, val_loss: 0.00169, lr: 4.98E-04, _patience: 10
Epoch: 7 | train_loss: 0.00115, val_loss: 0.00160, lr: 4.98E-04, _patience: 10
Epoch: 8 | train_loss: 0.00090, val_loss: 0.00161, lr: 4.98E-04, _patience: 9
Epoch: 9 | train_loss: 0.00077, val_loss: 0.00171, lr: 4.98E-04, _patience: 8
Epoch: 10 | train_loss: 0.00062, val_loss: 0.00154, lr: 4.98E-04, _patience: 10
Epoch: 11 | train_loss: 0.00053, val_loss: 0.00168, lr: 4.98E-04, _patience: 9
Epoch: 12 | train_loss: 0.00048, val_loss: 0.00219, lr: 4.98E-04, _patience: 8
Epoch: 13 | train_loss: 0.00048, val_loss: 0.00180, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:49:40,927][0m Trial 20 finished with value: 0.690346389021688 and parameters: {'embedding_dim': 484, 'num_filters': 479, 'hidden_dim': 504, 'dropout_p': 0.6064430758390457, 'lr': 0.0004982926131849806}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or

Epoch: 1 | train_loss: 0.00506, val_loss: 0.00274, lr: 4.38E-04, _patience: 10
Epoch: 2 | train_loss: 0.00302, val_loss: 0.00238, lr: 4.38E-04, _patience: 10
Epoch: 3 | train_loss: 0.00229, val_loss: 0.00192, lr: 4.38E-04, _patience: 10
Epoch: 4 | train_loss: 0.00182, val_loss: 0.00170, lr: 4.38E-04, _patience: 10
Epoch: 5 | train_loss: 0.00146, val_loss: 0.00157, lr: 4.38E-04, _patience: 10
Epoch: 6 | train_loss: 0.00117, val_loss: 0.00158, lr: 4.38E-04, _patience: 9
Epoch: 7 | train_loss: 0.00095, val_loss: 0.00158, lr: 4.38E-04, _patience: 8
Epoch: 8 | train_loss: 0.00074, val_loss: 0.00155, lr: 4.38E-04, _patience: 10
Epoch: 9 | train_loss: 0.00059, val_loss: 0.00172, lr: 4.38E-04, _patience: 9
Epoch: 10 | train_loss: 0.00053, val_loss: 0.00172, lr: 4.38E-04, _patience: 8
Epoch: 11 | train_loss: 0.00044, val_loss: 0.00167, lr: 4.38E-04, _patience: 7
Epoch: 12 | train_loss: 0.00037, val_loss: 0.00177, lr: 4.38E-04, _patience: 6
Epoch: 13 | train_loss: 0.00029, val_loss: 0.00176, lr:


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:50:20,167][0m Trial 21 finished with value: 0.6943628551999735 and parameters: {'embedding_dim': 465, 'num_filters': 243, 'hidden_dim': 369, 'dropout_p': 0.351063897948802, 'lr': 0.00043796072414598145}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00621, val_loss: 0.00306, lr: 4.87E-04, _patience: 10
Epoch: 2 | train_loss: 0.00295, val_loss: 0.00236, lr: 4.87E-04, _patience: 10
Epoch: 3 | train_loss: 0.00223, val_loss: 0.00191, lr: 4.87E-04, _patience: 10
Epoch: 4 | train_loss: 0.00169, val_loss: 0.00169, lr: 4.87E-04, _patience: 10
Epoch: 5 | train_loss: 0.00131, val_loss: 0.00162, lr: 4.87E-04, _patience: 10
Epoch: 6 | train_loss: 0.00100, val_loss: 0.00158, lr: 4.87E-04, _patience: 10
Epoch: 7 | train_loss: 0.00077, val_loss: 0.00158, lr: 4.87E-04, _patience: 9
Epoch: 8 | train_loss: 0.00060, val_loss: 0.00158, lr: 4.87E-04, _patience: 10
Epoch: 9 | train_loss: 0.00049, val_loss: 0.00157, lr: 4.87E-04, _patience: 10
Epoch: 10 | train_loss: 0.00041, val_loss: 0.00163, lr: 4.87E-04, _patience: 9
Epoch: 11 | train_loss: 0.00039, val_loss: 0.00188, lr: 4.87E-04, _patience: 8
Epoch: 12 | train_loss: 0.00035, val_loss: 0.00210, lr: 4.87E-04, _patience: 7
Epoch: 13 | train_loss: 0.00035, val_loss: 0.00212, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:51:26,594][0m Trial 22 finished with value: 0.704333855407378 and parameters: {'embedding_dim': 507, 'num_filters': 373, 'hidden_dim': 402, 'dropout_p': 0.3098677295654228, 'lr': 0.00048684100168162913}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00577, val_loss: 0.00295, lr: 3.38E-04, _patience: 10
Epoch: 2 | train_loss: 0.00325, val_loss: 0.00243, lr: 3.38E-04, _patience: 10
Epoch: 3 | train_loss: 0.00249, val_loss: 0.00201, lr: 3.38E-04, _patience: 10
Epoch: 4 | train_loss: 0.00189, val_loss: 0.00175, lr: 3.38E-04, _patience: 10
Epoch: 5 | train_loss: 0.00154, val_loss: 0.00169, lr: 3.38E-04, _patience: 10
Epoch: 6 | train_loss: 0.00129, val_loss: 0.00158, lr: 3.38E-04, _patience: 10
Epoch: 7 | train_loss: 0.00106, val_loss: 0.00156, lr: 3.38E-04, _patience: 10
Epoch: 8 | train_loss: 0.00084, val_loss: 0.00154, lr: 3.38E-04, _patience: 10
Epoch: 9 | train_loss: 0.00070, val_loss: 0.00147, lr: 3.38E-04, _patience: 10
Epoch: 10 | train_loss: 0.00058, val_loss: 0.00148, lr: 3.38E-04, _patience: 9
Epoch: 11 | train_loss: 0.00050, val_loss: 0.00153, lr: 3.38E-04, _patience: 8
Epoch: 12 | train_loss: 0.00043, val_loss: 0.00174, lr: 3.38E-04, _patience: 7
Epoch: 13 | train_loss: 0.00036, val_loss: 0.00172, 


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:52:28,703][0m Trial 23 finished with value: 0.6888445275804926 and parameters: {'embedding_dim': 406, 'num_filters': 432, 'hidden_dim': 417, 'dropout_p': 0.402070930063632, 'lr': 0.0003382212186764488}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or

Epoch: 1 | train_loss: 0.00589, val_loss: 0.00287, lr: 4.18E-04, _patience: 10
Epoch: 2 | train_loss: 0.00316, val_loss: 0.00241, lr: 4.18E-04, _patience: 10
Epoch: 3 | train_loss: 0.00237, val_loss: 0.00196, lr: 4.18E-04, _patience: 10
Epoch: 4 | train_loss: 0.00185, val_loss: 0.00175, lr: 4.18E-04, _patience: 10
Epoch: 5 | train_loss: 0.00152, val_loss: 0.00163, lr: 4.18E-04, _patience: 10
Epoch: 6 | train_loss: 0.00115, val_loss: 0.00159, lr: 4.18E-04, _patience: 10
Epoch: 7 | train_loss: 0.00092, val_loss: 0.00157, lr: 4.18E-04, _patience: 10
Epoch: 8 | train_loss: 0.00075, val_loss: 0.00156, lr: 4.18E-04, _patience: 10
Epoch: 9 | train_loss: 0.00058, val_loss: 0.00158, lr: 4.18E-04, _patience: 9
Epoch: 10 | train_loss: 0.00050, val_loss: 0.00168, lr: 4.18E-04, _patience: 8
Epoch: 11 | train_loss: 0.00047, val_loss: 0.00164, lr: 4.18E-04, _patience: 7
Epoch: 12 | train_loss: 0.00039, val_loss: 0.00175, lr: 4.18E-04, _patience: 6
Epoch: 13 | train_loss: 0.00037, val_loss: 0.00159, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:53:28,380][0m Trial 24 finished with value: 0.689974614714511 and parameters: {'embedding_dim': 475, 'num_filters': 371, 'hidden_dim': 471, 'dropout_p': 0.4546831115643709, 'lr': 0.00041817003950477845}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00542, val_loss: 0.00297, lr: 2.61E-04, _patience: 10
Epoch: 2 | train_loss: 0.00342, val_loss: 0.00237, lr: 2.61E-04, _patience: 10
Epoch: 3 | train_loss: 0.00265, val_loss: 0.00204, lr: 2.61E-04, _patience: 10
Epoch: 4 | train_loss: 0.00215, val_loss: 0.00187, lr: 2.61E-04, _patience: 10
Epoch: 5 | train_loss: 0.00184, val_loss: 0.00174, lr: 2.61E-04, _patience: 10
Epoch: 6 | train_loss: 0.00159, val_loss: 0.00170, lr: 2.61E-04, _patience: 10
Epoch: 7 | train_loss: 0.00139, val_loss: 0.00172, lr: 2.61E-04, _patience: 9
Epoch: 8 | train_loss: 0.00120, val_loss: 0.00175, lr: 2.61E-04, _patience: 8
Epoch: 9 | train_loss: 0.00106, val_loss: 0.00166, lr: 2.61E-04, _patience: 10
Epoch: 10 | train_loss: 0.00088, val_loss: 0.00169, lr: 2.61E-04, _patience: 9
Epoch: 11 | train_loss: 0.00077, val_loss: 0.00158, lr: 2.61E-04, _patience: 10
Epoch: 12 | train_loss: 0.00067, val_loss: 0.00165, lr: 2.61E-04, _patience: 9
Epoch: 13 | train_loss: 0.00057, val_loss: 0.00170, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:55:27,807][0m Trial 25 finished with value: 0.6959524489590037 and parameters: {'embedding_dim': 510, 'num_filters': 394, 'hidden_dim': 304, 'dropout_p': 0.5192548045270228, 'lr': 0.0002606722598502823}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00594, val_loss: 0.00279, lr: 5.00E-04, _patience: 10
Epoch: 2 | train_loss: 0.00312, val_loss: 0.00229, lr: 5.00E-04, _patience: 10
Epoch: 3 | train_loss: 0.00231, val_loss: 0.00190, lr: 5.00E-04, _patience: 10
Epoch: 4 | train_loss: 0.00182, val_loss: 0.00173, lr: 5.00E-04, _patience: 10
Epoch: 5 | train_loss: 0.00143, val_loss: 0.00160, lr: 5.00E-04, _patience: 10
Epoch: 6 | train_loss: 0.00116, val_loss: 0.00155, lr: 5.00E-04, _patience: 10
Epoch: 7 | train_loss: 0.00090, val_loss: 0.00149, lr: 5.00E-04, _patience: 10
Epoch: 8 | train_loss: 0.00068, val_loss: 0.00161, lr: 5.00E-04, _patience: 9
Epoch: 9 | train_loss: 0.00054, val_loss: 0.00157, lr: 5.00E-04, _patience: 8
Epoch: 10 | train_loss: 0.00047, val_loss: 0.00173, lr: 5.00E-04, _patience: 7
Epoch: 11 | train_loss: 0.00040, val_loss: 0.00191, lr: 5.00E-04, _patience: 6
Epoch: 12 | train_loss: 0.00037, val_loss: 0.00174, lr: 5.00E-04, _patience: 5
Epoch: 13 | train_loss: 0.00034, val_loss: 0.00182, lr


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:56:18,820][0m Trial 26 finished with value: 0.7056624935481121 and parameters: {'embedding_dim': 440, 'num_filters': 361, 'hidden_dim': 351, 'dropout_p': 0.389737006706403, 'lr': 0.0004999261484134036}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or

Epoch: 1 | train_loss: 0.00523, val_loss: 0.00271, lr: 3.82E-04, _patience: 10
Epoch: 2 | train_loss: 0.00311, val_loss: 0.00245, lr: 3.82E-04, _patience: 10
Epoch: 3 | train_loss: 0.00244, val_loss: 0.00198, lr: 3.82E-04, _patience: 10
Epoch: 4 | train_loss: 0.00194, val_loss: 0.00178, lr: 3.82E-04, _patience: 10
Epoch: 5 | train_loss: 0.00157, val_loss: 0.00172, lr: 3.82E-04, _patience: 10
Epoch: 6 | train_loss: 0.00129, val_loss: 0.00163, lr: 3.82E-04, _patience: 10
Epoch: 7 | train_loss: 0.00108, val_loss: 0.00163, lr: 3.82E-04, _patience: 10
Epoch: 8 | train_loss: 0.00088, val_loss: 0.00159, lr: 3.82E-04, _patience: 10
Epoch: 9 | train_loss: 0.00074, val_loss: 0.00157, lr: 3.82E-04, _patience: 10
Epoch: 10 | train_loss: 0.00065, val_loss: 0.00158, lr: 3.82E-04, _patience: 9
Epoch: 11 | train_loss: 0.00053, val_loss: 0.00160, lr: 3.82E-04, _patience: 8
Epoch: 12 | train_loss: 0.00046, val_loss: 0.00166, lr: 3.82E-04, _patience: 7
Epoch: 13 | train_loss: 0.00039, val_loss: 0.00168, 


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:57:10,783][0m Trial 27 finished with value: 0.6836333663440631 and parameters: {'embedding_dim': 438, 'num_filters': 310, 'hidden_dim': 347, 'dropout_p': 0.42458187561352917, 'lr': 0.00038216070381515664}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists

Epoch: 1 | train_loss: 0.00543, val_loss: 0.00316, lr: 3.15E-04, _patience: 10
Epoch: 2 | train_loss: 0.00348, val_loss: 0.00250, lr: 3.15E-04, _patience: 10
Epoch: 3 | train_loss: 0.00277, val_loss: 0.00224, lr: 3.15E-04, _patience: 10
Epoch: 4 | train_loss: 0.00231, val_loss: 0.00199, lr: 3.15E-04, _patience: 10
Epoch: 5 | train_loss: 0.00195, val_loss: 0.00182, lr: 3.15E-04, _patience: 10
Epoch: 6 | train_loss: 0.00168, val_loss: 0.00173, lr: 3.15E-04, _patience: 10
Epoch: 7 | train_loss: 0.00148, val_loss: 0.00162, lr: 3.15E-04, _patience: 10
Epoch: 8 | train_loss: 0.00128, val_loss: 0.00161, lr: 3.15E-04, _patience: 10
Epoch: 9 | train_loss: 0.00108, val_loss: 0.00161, lr: 3.15E-04, _patience: 10
Epoch: 10 | train_loss: 0.00095, val_loss: 0.00153, lr: 3.15E-04, _patience: 10
Epoch: 11 | train_loss: 0.00082, val_loss: 0.00149, lr: 3.15E-04, _patience: 10
Epoch: 12 | train_loss: 0.00069, val_loss: 0.00159, lr: 3.15E-04, _patience: 9
Epoch: 13 | train_loss: 0.00058, val_loss: 0.00151


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:58:06,657][0m Trial 28 finished with value: 0.6917969982407566 and parameters: {'embedding_dim': 328, 'num_filters': 341, 'hidden_dim': 322, 'dropout_p': 0.4853861295937314, 'lr': 0.0003152052777023375}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00517, val_loss: 0.00351, lr: 1.61E-04, _patience: 10
Epoch: 2 | train_loss: 0.00376, val_loss: 0.00261, lr: 1.61E-04, _patience: 10
Epoch: 3 | train_loss: 0.00314, val_loss: 0.00248, lr: 1.61E-04, _patience: 10
Epoch: 4 | train_loss: 0.00281, val_loss: 0.00229, lr: 1.61E-04, _patience: 10
Epoch: 5 | train_loss: 0.00251, val_loss: 0.00215, lr: 1.61E-04, _patience: 10
Epoch: 6 | train_loss: 0.00231, val_loss: 0.00201, lr: 1.61E-04, _patience: 10
Epoch: 7 | train_loss: 0.00210, val_loss: 0.00189, lr: 1.61E-04, _patience: 10
Epoch: 8 | train_loss: 0.00189, val_loss: 0.00182, lr: 1.61E-04, _patience: 10
Epoch: 9 | train_loss: 0.00174, val_loss: 0.00178, lr: 1.61E-04, _patience: 10
Epoch: 10 | train_loss: 0.00161, val_loss: 0.00175, lr: 1.61E-04, _patience: 10
Epoch: 11 | train_loss: 0.00152, val_loss: 0.00170, lr: 1.61E-04, _patience: 10
Epoch: 12 | train_loss: 0.00138, val_loss: 0.00167, lr: 1.61E-04, _patience: 10
Epoch: 13 | train_loss: 0.00128, val_loss: 0.0016


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 21:59:28,637][0m Trial 29 finished with value: 0.6726796855907661 and parameters: {'embedding_dim': 420, 'num_filters': 283, 'hidden_dim': 233, 'dropout_p': 0.5417688582283965, 'lr': 0.0001607705487990761}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00645, val_loss: 0.00351, lr: 4.09E-04, _patience: 10
Epoch: 2 | train_loss: 0.00356, val_loss: 0.00256, lr: 4.09E-04, _patience: 10
Epoch: 3 | train_loss: 0.00273, val_loss: 0.00224, lr: 4.09E-04, _patience: 10
Epoch: 4 | train_loss: 0.00217, val_loss: 0.00193, lr: 4.09E-04, _patience: 10
Epoch: 5 | train_loss: 0.00177, val_loss: 0.00176, lr: 4.09E-04, _patience: 10
Epoch: 6 | train_loss: 0.00152, val_loss: 0.00167, lr: 4.09E-04, _patience: 10
Epoch: 7 | train_loss: 0.00125, val_loss: 0.00158, lr: 4.09E-04, _patience: 10
Epoch: 8 | train_loss: 0.00110, val_loss: 0.00159, lr: 4.09E-04, _patience: 9
Epoch: 9 | train_loss: 0.00091, val_loss: 0.00154, lr: 4.09E-04, _patience: 10
Epoch: 10 | train_loss: 0.00077, val_loss: 0.00156, lr: 4.09E-04, _patience: 9
Epoch: 11 | train_loss: 0.00061, val_loss: 0.00157, lr: 4.09E-04, _patience: 8
Epoch: 12 | train_loss: 0.00055, val_loss: 0.00171, lr: 4.09E-04, _patience: 7
Epoch: 13 | train_loss: 0.00047, val_loss: 0.00181, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:00:24,392][0m Trial 30 finished with value: 0.6811160369741512 and parameters: {'embedding_dim': 203, 'num_filters': 455, 'hidden_dim': 365, 'dropout_p': 0.38567725908305894, 'lr': 0.0004094186289105576}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-

Epoch: 1 | train_loss: 0.00608, val_loss: 0.00294, lr: 4.95E-04, _patience: 10
Epoch: 2 | train_loss: 0.00298, val_loss: 0.00234, lr: 4.95E-04, _patience: 10
Epoch: 3 | train_loss: 0.00218, val_loss: 0.00187, lr: 4.95E-04, _patience: 10
Epoch: 4 | train_loss: 0.00168, val_loss: 0.00174, lr: 4.95E-04, _patience: 10
Epoch: 5 | train_loss: 0.00133, val_loss: 0.00170, lr: 4.95E-04, _patience: 10
Epoch: 6 | train_loss: 0.00106, val_loss: 0.00165, lr: 4.95E-04, _patience: 10
Epoch: 7 | train_loss: 0.00083, val_loss: 0.00156, lr: 4.95E-04, _patience: 10
Epoch: 8 | train_loss: 0.00063, val_loss: 0.00163, lr: 4.95E-04, _patience: 9
Epoch: 9 | train_loss: 0.00047, val_loss: 0.00175, lr: 4.95E-04, _patience: 8
Epoch: 10 | train_loss: 0.00041, val_loss: 0.00183, lr: 4.95E-04, _patience: 7
Epoch: 11 | train_loss: 0.00034, val_loss: 0.00189, lr: 4.95E-04, _patience: 6
Epoch: 12 | train_loss: 0.00031, val_loss: 0.00191, lr: 4.95E-04, _patience: 5
Epoch: 13 | train_loss: 0.00031, val_loss: 0.00202, lr


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:01:21,693][0m Trial 31 finished with value: 0.6787386687018724 and parameters: {'embedding_dim': 512, 'num_filters': 364, 'hidden_dim': 427, 'dropout_p': 0.3256200732201236, 'lr': 0.0004946070893241252}. Best is trial 15 with value: 0.7081352359515858.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00628, val_loss: 0.00306, lr: 4.94E-04, _patience: 10
Epoch: 2 | train_loss: 0.00314, val_loss: 0.00243, lr: 4.94E-04, _patience: 10
Epoch: 3 | train_loss: 0.00231, val_loss: 0.00200, lr: 4.94E-04, _patience: 10
Epoch: 4 | train_loss: 0.00178, val_loss: 0.00173, lr: 4.94E-04, _patience: 10
Epoch: 5 | train_loss: 0.00139, val_loss: 0.00168, lr: 4.94E-04, _patience: 10
Epoch: 6 | train_loss: 0.00110, val_loss: 0.00158, lr: 4.94E-04, _patience: 10
Epoch: 7 | train_loss: 0.00084, val_loss: 0.00156, lr: 4.94E-04, _patience: 10
Epoch: 8 | train_loss: 0.00063, val_loss: 0.00161, lr: 4.94E-04, _patience: 9
Epoch: 9 | train_loss: 0.00052, val_loss: 0.00156, lr: 4.94E-04, _patience: 10
Epoch: 10 | train_loss: 0.00043, val_loss: 0.00175, lr: 4.94E-04, _patience: 9
Epoch: 11 | train_loss: 0.00036, val_loss: 0.00173, lr: 4.94E-04, _patience: 8
Epoch: 12 | train_loss: 0.00032, val_loss: 0.00189, lr: 4.94E-04, _patience: 7
Epoch: 13 | train_loss: 0.00027, val_loss: 0.00188, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:02:27,802][0m Trial 32 finished with value: 0.7102877118012281 and parameters: {'embedding_dim': 472, 'num_filters': 398, 'hidden_dim': 402, 'dropout_p': 0.37460230139713574, 'lr': 0.0004944035411985671}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-

Epoch: 1 | train_loss: 0.00452, val_loss: 0.00349, lr: 9.14E-05, _patience: 10
Epoch: 2 | train_loss: 0.00330, val_loss: 0.00256, lr: 9.14E-05, _patience: 10
Epoch: 3 | train_loss: 0.00280, val_loss: 0.00242, lr: 9.14E-05, _patience: 10
Epoch: 4 | train_loss: 0.00256, val_loss: 0.00228, lr: 9.14E-05, _patience: 10
Epoch: 5 | train_loss: 0.00231, val_loss: 0.00212, lr: 9.14E-05, _patience: 10
Epoch: 6 | train_loss: 0.00212, val_loss: 0.00202, lr: 9.14E-05, _patience: 10
Epoch: 7 | train_loss: 0.00191, val_loss: 0.00188, lr: 9.14E-05, _patience: 10
Epoch: 8 | train_loss: 0.00178, val_loss: 0.00181, lr: 9.14E-05, _patience: 10
Epoch: 9 | train_loss: 0.00165, val_loss: 0.00172, lr: 9.14E-05, _patience: 10
Epoch: 10 | train_loss: 0.00153, val_loss: 0.00167, lr: 9.14E-05, _patience: 10
Epoch: 11 | train_loss: 0.00139, val_loss: 0.00162, lr: 9.14E-05, _patience: 10
Epoch: 12 | train_loss: 0.00133, val_loss: 0.00159, lr: 9.14E-05, _patience: 10
Epoch: 13 | train_loss: 0.00120, val_loss: 0.0015


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:05:10,049][0m Trial 33 finished with value: 0.6790796462460503 and parameters: {'embedding_dim': 462, 'num_filters': 409, 'hidden_dim': 348, 'dropout_p': 0.4331617486149646, 'lr': 9.138542693941112e-05}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00564, val_loss: 0.00284, lr: 4.42E-04, _patience: 10
Epoch: 2 | train_loss: 0.00306, val_loss: 0.00241, lr: 4.42E-04, _patience: 10
Epoch: 3 | train_loss: 0.00235, val_loss: 0.00198, lr: 4.42E-04, _patience: 10
Epoch: 4 | train_loss: 0.00183, val_loss: 0.00175, lr: 4.42E-04, _patience: 10
Epoch: 5 | train_loss: 0.00146, val_loss: 0.00165, lr: 4.42E-04, _patience: 10
Epoch: 6 | train_loss: 0.00118, val_loss: 0.00162, lr: 4.42E-04, _patience: 10
Epoch: 7 | train_loss: 0.00093, val_loss: 0.00153, lr: 4.42E-04, _patience: 10
Epoch: 8 | train_loss: 0.00072, val_loss: 0.00159, lr: 4.42E-04, _patience: 9
Epoch: 9 | train_loss: 0.00058, val_loss: 0.00159, lr: 4.42E-04, _patience: 8
Epoch: 10 | train_loss: 0.00045, val_loss: 0.00168, lr: 4.42E-04, _patience: 7
Epoch: 11 | train_loss: 0.00041, val_loss: 0.00175, lr: 4.42E-04, _patience: 6
Epoch: 12 | train_loss: 0.00038, val_loss: 0.00177, lr: 4.42E-04, _patience: 5
Epoch: 13 | train_loss: 0.00037, val_loss: 0.00178, lr


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:06:01,557][0m Trial 34 finished with value: 0.6866179960249056 and parameters: {'embedding_dim': 481, 'num_filters': 309, 'hidden_dim': 456, 'dropout_p': 0.3715332291039401, 'lr': 0.00044201913228164184}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-

Epoch: 1 | train_loss: 0.00638, val_loss: 0.00298, lr: 3.67E-04, _patience: 10
Epoch: 2 | train_loss: 0.00350, val_loss: 0.00239, lr: 3.67E-04, _patience: 10
Epoch: 3 | train_loss: 0.00269, val_loss: 0.00207, lr: 3.67E-04, _patience: 10
Epoch: 4 | train_loss: 0.00214, val_loss: 0.00183, lr: 3.67E-04, _patience: 10
Epoch: 5 | train_loss: 0.00173, val_loss: 0.00168, lr: 3.67E-04, _patience: 10
Epoch: 6 | train_loss: 0.00150, val_loss: 0.00162, lr: 3.67E-04, _patience: 10
Epoch: 7 | train_loss: 0.00125, val_loss: 0.00164, lr: 3.67E-04, _patience: 9
Epoch: 8 | train_loss: 0.00109, val_loss: 0.00163, lr: 3.67E-04, _patience: 8
Epoch: 9 | train_loss: 0.00093, val_loss: 0.00156, lr: 3.67E-04, _patience: 10
Epoch: 10 | train_loss: 0.00077, val_loss: 0.00159, lr: 3.67E-04, _patience: 9
Epoch: 11 | train_loss: 0.00069, val_loss: 0.00162, lr: 3.67E-04, _patience: 8
Epoch: 12 | train_loss: 0.00058, val_loss: 0.00169, lr: 3.67E-04, _patience: 7
Epoch: 13 | train_loss: 0.00050, val_loss: 0.00171, lr


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:07:33,425][0m Trial 35 finished with value: 0.6895817063963423 and parameters: {'embedding_dim': 441, 'num_filters': 440, 'hidden_dim': 263, 'dropout_p': 0.48312288442939505, 'lr': 0.00036741064775232875}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists

Epoch: 1 | train_loss: 0.00697, val_loss: 0.00303, lr: 4.98E-04, _patience: 10
Epoch: 2 | train_loss: 0.00326, val_loss: 0.00248, lr: 4.98E-04, _patience: 10
Epoch: 3 | train_loss: 0.00240, val_loss: 0.00203, lr: 4.98E-04, _patience: 10
Epoch: 4 | train_loss: 0.00190, val_loss: 0.00179, lr: 4.98E-04, _patience: 10
Epoch: 5 | train_loss: 0.00149, val_loss: 0.00169, lr: 4.98E-04, _patience: 10
Epoch: 6 | train_loss: 0.00119, val_loss: 0.00157, lr: 4.98E-04, _patience: 10
Epoch: 7 | train_loss: 0.00093, val_loss: 0.00161, lr: 4.98E-04, _patience: 9
Epoch: 8 | train_loss: 0.00074, val_loss: 0.00160, lr: 4.98E-04, _patience: 8
Epoch: 9 | train_loss: 0.00058, val_loss: 0.00156, lr: 4.98E-04, _patience: 10
Epoch: 10 | train_loss: 0.00045, val_loss: 0.00167, lr: 4.98E-04, _patience: 9
Epoch: 11 | train_loss: 0.00041, val_loss: 0.00170, lr: 4.98E-04, _patience: 8
Epoch: 12 | train_loss: 0.00034, val_loss: 0.00183, lr: 4.98E-04, _patience: 7
Epoch: 13 | train_loss: 0.00030, val_loss: 0.00200, lr


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:08:36,516][0m Trial 36 finished with value: 0.6948986757829166 and parameters: {'embedding_dim': 383, 'num_filters': 471, 'hidden_dim': 388, 'dropout_p': 0.3516392586441111, 'lr': 0.000498392386803151}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or

Epoch: 1 | train_loss: 0.00577, val_loss: 0.00291, lr: 3.06E-04, _patience: 10
Epoch: 2 | train_loss: 0.00318, val_loss: 0.00247, lr: 3.06E-04, _patience: 10
Epoch: 3 | train_loss: 0.00254, val_loss: 0.00206, lr: 3.06E-04, _patience: 10
Epoch: 4 | train_loss: 0.00197, val_loss: 0.00181, lr: 3.06E-04, _patience: 10
Epoch: 5 | train_loss: 0.00160, val_loss: 0.00169, lr: 3.06E-04, _patience: 10
Epoch: 6 | train_loss: 0.00133, val_loss: 0.00159, lr: 3.06E-04, _patience: 10
Epoch: 7 | train_loss: 0.00110, val_loss: 0.00155, lr: 3.06E-04, _patience: 10
Epoch: 8 | train_loss: 0.00091, val_loss: 0.00154, lr: 3.06E-04, _patience: 10
Epoch: 9 | train_loss: 0.00076, val_loss: 0.00156, lr: 3.06E-04, _patience: 9
Epoch: 10 | train_loss: 0.00062, val_loss: 0.00147, lr: 3.06E-04, _patience: 10
Epoch: 11 | train_loss: 0.00050, val_loss: 0.00154, lr: 3.06E-04, _patience: 9
Epoch: 12 | train_loss: 0.00042, val_loss: 0.00159, lr: 3.06E-04, _patience: 8
Epoch: 13 | train_loss: 0.00036, val_loss: 0.00160, 


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:09:48,462][0m Trial 37 finished with value: 0.6989682101555443 and parameters: {'embedding_dim': 488, 'num_filters': 394, 'hidden_dim': 485, 'dropout_p': 0.4653716719987929, 'lr': 0.00030615883162829407}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-

Epoch: 1 | train_loss: 0.00624, val_loss: 0.00287, lr: 4.35E-04, _patience: 10
Epoch: 2 | train_loss: 0.00353, val_loss: 0.00252, lr: 4.35E-04, _patience: 10
Epoch: 3 | train_loss: 0.00269, val_loss: 0.00211, lr: 4.35E-04, _patience: 10
Epoch: 4 | train_loss: 0.00221, val_loss: 0.00185, lr: 4.35E-04, _patience: 10
Epoch: 5 | train_loss: 0.00180, val_loss: 0.00175, lr: 4.35E-04, _patience: 10
Epoch: 6 | train_loss: 0.00150, val_loss: 0.00167, lr: 4.35E-04, _patience: 10
Epoch: 7 | train_loss: 0.00127, val_loss: 0.00164, lr: 4.35E-04, _patience: 10
Epoch: 8 | train_loss: 0.00105, val_loss: 0.00164, lr: 4.35E-04, _patience: 10
Epoch: 9 | train_loss: 0.00086, val_loss: 0.00154, lr: 4.35E-04, _patience: 10
Epoch: 10 | train_loss: 0.00072, val_loss: 0.00168, lr: 4.35E-04, _patience: 9
Epoch: 11 | train_loss: 0.00061, val_loss: 0.00167, lr: 4.35E-04, _patience: 8
Epoch: 12 | train_loss: 0.00055, val_loss: 0.00159, lr: 4.35E-04, _patience: 7
Epoch: 13 | train_loss: 0.00048, val_loss: 0.00173, 


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:10:43,089][0m Trial 38 finished with value: 0.7073815720260342 and parameters: {'embedding_dim': 420, 'num_filters': 353, 'hidden_dim': 322, 'dropout_p': 0.5195856635041117, 'lr': 0.0004354849644707854}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00499, val_loss: 0.00326, lr: 2.40E-04, _patience: 10
Epoch: 2 | train_loss: 0.00335, val_loss: 0.00253, lr: 2.40E-04, _patience: 10
Epoch: 3 | train_loss: 0.00282, val_loss: 0.00231, lr: 2.40E-04, _patience: 10
Epoch: 4 | train_loss: 0.00241, val_loss: 0.00207, lr: 2.40E-04, _patience: 10
Epoch: 5 | train_loss: 0.00210, val_loss: 0.00190, lr: 2.40E-04, _patience: 10
Epoch: 6 | train_loss: 0.00183, val_loss: 0.00180, lr: 2.40E-04, _patience: 10
Epoch: 7 | train_loss: 0.00163, val_loss: 0.00173, lr: 2.40E-04, _patience: 10
Epoch: 8 | train_loss: 0.00145, val_loss: 0.00166, lr: 2.40E-04, _patience: 10
Epoch: 9 | train_loss: 0.00132, val_loss: 0.00164, lr: 2.40E-04, _patience: 10
Epoch: 10 | train_loss: 0.00116, val_loss: 0.00157, lr: 2.40E-04, _patience: 10
Epoch: 11 | train_loss: 0.00104, val_loss: 0.00162, lr: 2.40E-04, _patience: 9
Epoch: 12 | train_loss: 0.00092, val_loss: 0.00156, lr: 2.40E-04, _patience: 10
Epoch: 13 | train_loss: 0.00084, val_loss: 0.00161


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:11:35,714][0m Trial 39 finished with value: 0.6735362330036719 and parameters: {'embedding_dim': 431, 'num_filters': 256, 'hidden_dim': 307, 'dropout_p': 0.518570082923261, 'lr': 0.0002404127348161911}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or

Epoch: 1 | train_loss: 0.00582, val_loss: 0.00314, lr: 3.45E-04, _patience: 10
Epoch: 2 | train_loss: 0.00360, val_loss: 0.00254, lr: 3.45E-04, _patience: 10
Epoch: 3 | train_loss: 0.00290, val_loss: 0.00225, lr: 3.45E-04, _patience: 10
Epoch: 4 | train_loss: 0.00241, val_loss: 0.00203, lr: 3.45E-04, _patience: 10
Epoch: 5 | train_loss: 0.00207, val_loss: 0.00185, lr: 3.45E-04, _patience: 10
Epoch: 6 | train_loss: 0.00178, val_loss: 0.00170, lr: 3.45E-04, _patience: 10
Epoch: 7 | train_loss: 0.00155, val_loss: 0.00164, lr: 3.45E-04, _patience: 10
Epoch: 8 | train_loss: 0.00131, val_loss: 0.00158, lr: 3.45E-04, _patience: 10
Epoch: 9 | train_loss: 0.00120, val_loss: 0.00153, lr: 3.45E-04, _patience: 10
Epoch: 10 | train_loss: 0.00101, val_loss: 0.00159, lr: 3.45E-04, _patience: 9
Epoch: 11 | train_loss: 0.00089, val_loss: 0.00153, lr: 3.45E-04, _patience: 8
Epoch: 12 | train_loss: 0.00080, val_loss: 0.00158, lr: 3.45E-04, _patience: 7
Epoch: 13 | train_loss: 0.00070, val_loss: 0.00160, 


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:12:52,245][0m Trial 40 finished with value: 0.685355799566251 and parameters: {'embedding_dim': 341, 'num_filters': 337, 'hidden_dim': 325, 'dropout_p': 0.5839101733472284, 'lr': 0.00034518530245696276}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00606, val_loss: 0.00284, lr: 4.38E-04, _patience: 10
Epoch: 2 | train_loss: 0.00318, val_loss: 0.00248, lr: 4.38E-04, _patience: 10
Epoch: 3 | train_loss: 0.00248, val_loss: 0.00202, lr: 4.38E-04, _patience: 10
Epoch: 4 | train_loss: 0.00197, val_loss: 0.00176, lr: 4.38E-04, _patience: 10
Epoch: 5 | train_loss: 0.00155, val_loss: 0.00168, lr: 4.38E-04, _patience: 10
Epoch: 6 | train_loss: 0.00128, val_loss: 0.00161, lr: 4.38E-04, _patience: 10
Epoch: 7 | train_loss: 0.00103, val_loss: 0.00159, lr: 4.38E-04, _patience: 10
Epoch: 8 | train_loss: 0.00084, val_loss: 0.00154, lr: 4.38E-04, _patience: 10
Epoch: 9 | train_loss: 0.00064, val_loss: 0.00157, lr: 4.38E-04, _patience: 9
Epoch: 10 | train_loss: 0.00056, val_loss: 0.00161, lr: 4.38E-04, _patience: 8
Epoch: 11 | train_loss: 0.00046, val_loss: 0.00167, lr: 4.38E-04, _patience: 7
Epoch: 12 | train_loss: 0.00036, val_loss: 0.00188, lr: 4.38E-04, _patience: 6
Epoch: 13 | train_loss: 0.00033, val_loss: 0.00180, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:13:48,445][0m Trial 41 finished with value: 0.6887995256130629 and parameters: {'embedding_dim': 460, 'num_filters': 354, 'hidden_dim': 362, 'dropout_p': 0.4236308239363996, 'lr': 0.0004383276822243548}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00561, val_loss: 0.00285, lr: 4.39E-04, _patience: 10
Epoch: 2 | train_loss: 0.00314, val_loss: 0.00248, lr: 4.39E-04, _patience: 10
Epoch: 3 | train_loss: 0.00241, val_loss: 0.00200, lr: 4.39E-04, _patience: 10
Epoch: 4 | train_loss: 0.00191, val_loss: 0.00177, lr: 4.39E-04, _patience: 10
Epoch: 5 | train_loss: 0.00151, val_loss: 0.00171, lr: 4.39E-04, _patience: 10
Epoch: 6 | train_loss: 0.00125, val_loss: 0.00162, lr: 4.39E-04, _patience: 10
Epoch: 7 | train_loss: 0.00100, val_loss: 0.00163, lr: 4.39E-04, _patience: 9
Epoch: 8 | train_loss: 0.00080, val_loss: 0.00165, lr: 4.39E-04, _patience: 8
Epoch: 9 | train_loss: 0.00067, val_loss: 0.00168, lr: 4.39E-04, _patience: 7
Epoch: 10 | train_loss: 0.00053, val_loss: 0.00162, lr: 4.39E-04, _patience: 6
Epoch: 11 | train_loss: 0.00047, val_loss: 0.00168, lr: 4.39E-04, _patience: 5
Epoch: 12 | train_loss: 0.00044, val_loss: 0.00179, lr: 4.39E-05, _patience: 4
Epoch: 13 | train_loss: 0.00037, val_loss: 0.00164, lr:


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:15:07,422][0m Trial 42 finished with value: 0.6932799500543302 and parameters: {'embedding_dim': 493, 'num_filters': 322, 'hidden_dim': 289, 'dropout_p': 0.3939173782786757, 'lr': 0.0004393012452199026}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00714, val_loss: 0.00314, lr: 3.86E-04, _patience: 10
Epoch: 2 | train_loss: 0.00365, val_loss: 0.00257, lr: 3.86E-04, _patience: 10
Epoch: 3 | train_loss: 0.00288, val_loss: 0.00220, lr: 3.86E-04, _patience: 10
Epoch: 4 | train_loss: 0.00236, val_loss: 0.00191, lr: 3.86E-04, _patience: 10
Epoch: 5 | train_loss: 0.00199, val_loss: 0.00179, lr: 3.86E-04, _patience: 10
Epoch: 6 | train_loss: 0.00171, val_loss: 0.00167, lr: 3.86E-04, _patience: 10
Epoch: 7 | train_loss: 0.00141, val_loss: 0.00168, lr: 3.86E-04, _patience: 9
Epoch: 8 | train_loss: 0.00127, val_loss: 0.00160, lr: 3.86E-04, _patience: 10
Epoch: 9 | train_loss: 0.00104, val_loss: 0.00168, lr: 3.86E-04, _patience: 9
Epoch: 10 | train_loss: 0.00092, val_loss: 0.00158, lr: 3.86E-04, _patience: 10
Epoch: 11 | train_loss: 0.00076, val_loss: 0.00164, lr: 3.86E-04, _patience: 9
Epoch: 12 | train_loss: 0.00062, val_loss: 0.00166, lr: 3.86E-04, _patience: 8
Epoch: 13 | train_loss: 0.00059, val_loss: 0.00170, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:16:27,168][0m Trial 43 finished with value: 0.6945316004719305 and parameters: {'embedding_dim': 426, 'num_filters': 413, 'hidden_dim': 389, 'dropout_p': 0.628876765173201, 'lr': 0.00038592860137727785}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00591, val_loss: 0.00278, lr: 4.96E-04, _patience: 10
Epoch: 2 | train_loss: 0.00335, val_loss: 0.00247, lr: 4.96E-04, _patience: 10
Epoch: 3 | train_loss: 0.00257, val_loss: 0.00205, lr: 4.96E-04, _patience: 10
Epoch: 4 | train_loss: 0.00207, val_loss: 0.00183, lr: 4.96E-04, _patience: 10
Epoch: 5 | train_loss: 0.00169, val_loss: 0.00169, lr: 4.96E-04, _patience: 10
Epoch: 6 | train_loss: 0.00138, val_loss: 0.00160, lr: 4.96E-04, _patience: 10
Epoch: 7 | train_loss: 0.00115, val_loss: 0.00154, lr: 4.96E-04, _patience: 10
Epoch: 8 | train_loss: 0.00093, val_loss: 0.00157, lr: 4.96E-04, _patience: 9
Epoch: 9 | train_loss: 0.00079, val_loss: 0.00148, lr: 4.96E-04, _patience: 10
Epoch: 10 | train_loss: 0.00064, val_loss: 0.00165, lr: 4.96E-04, _patience: 9
Epoch: 11 | train_loss: 0.00054, val_loss: 0.00162, lr: 4.96E-04, _patience: 8
Epoch: 12 | train_loss: 0.00046, val_loss: 0.00167, lr: 4.96E-04, _patience: 7
Epoch: 13 | train_loss: 0.00040, val_loss: 0.00170, l


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:17:14,556][0m Trial 44 finished with value: 0.675960007342357 and parameters: {'embedding_dim': 394, 'num_filters': 301, 'hidden_dim': 334, 'dropout_p': 0.5041260011891522, 'lr': 0.000496400898771724}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-

Epoch: 1 | train_loss: 0.00587, val_loss: 0.00273, lr: 4.52E-04, _patience: 10
Epoch: 2 | train_loss: 0.00313, val_loss: 0.00230, lr: 4.52E-04, _patience: 10
Epoch: 3 | train_loss: 0.00231, val_loss: 0.00198, lr: 4.52E-04, _patience: 10
Epoch: 4 | train_loss: 0.00179, val_loss: 0.00180, lr: 4.52E-04, _patience: 10
Epoch: 5 | train_loss: 0.00139, val_loss: 0.00163, lr: 4.52E-04, _patience: 10
Epoch: 6 | train_loss: 0.00109, val_loss: 0.00158, lr: 4.52E-04, _patience: 10
Epoch: 7 | train_loss: 0.00084, val_loss: 0.00153, lr: 4.52E-04, _patience: 10
Epoch: 8 | train_loss: 0.00063, val_loss: 0.00156, lr: 4.52E-04, _patience: 9
Epoch: 9 | train_loss: 0.00053, val_loss: 0.00163, lr: 4.52E-04, _patience: 8
Epoch: 10 | train_loss: 0.00045, val_loss: 0.00175, lr: 4.52E-04, _patience: 7
Epoch: 11 | train_loss: 0.00038, val_loss: 0.00179, lr: 4.52E-04, _patience: 6
Epoch: 12 | train_loss: 0.00031, val_loss: 0.00196, lr: 4.52E-04, _patience: 5
Epoch: 13 | train_loss: 0.00033, val_loss: 0.00204, lr


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:18:16,998][0m Trial 45 finished with value: 0.6906438191943152 and parameters: {'embedding_dim': 498, 'num_filters': 388, 'hidden_dim': 353, 'dropout_p': 0.3708056474012058, 'lr': 0.0004523010115292895}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00569, val_loss: 0.00295, lr: 3.28E-04, _patience: 10
Epoch: 2 | train_loss: 0.00342, val_loss: 0.00245, lr: 3.28E-04, _patience: 10
Epoch: 3 | train_loss: 0.00280, val_loss: 0.00211, lr: 3.28E-04, _patience: 10
Epoch: 4 | train_loss: 0.00236, val_loss: 0.00191, lr: 3.28E-04, _patience: 10
Epoch: 5 | train_loss: 0.00198, val_loss: 0.00178, lr: 3.28E-04, _patience: 10
Epoch: 6 | train_loss: 0.00170, val_loss: 0.00172, lr: 3.28E-04, _patience: 10
Epoch: 7 | train_loss: 0.00150, val_loss: 0.00162, lr: 3.28E-04, _patience: 10
Epoch: 8 | train_loss: 0.00128, val_loss: 0.00155, lr: 3.28E-04, _patience: 10
Epoch: 9 | train_loss: 0.00110, val_loss: 0.00154, lr: 3.28E-04, _patience: 10
Epoch: 10 | train_loss: 0.00095, val_loss: 0.00158, lr: 3.28E-04, _patience: 9
Epoch: 11 | train_loss: 0.00081, val_loss: 0.00155, lr: 3.28E-04, _patience: 8
Epoch: 12 | train_loss: 0.00072, val_loss: 0.00163, lr: 3.28E-04, _patience: 7
Epoch: 13 | train_loss: 0.00066, val_loss: 0.00153, 


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:19:29,547][0m Trial 46 finished with value: 0.6839549509112915 and parameters: {'embedding_dim': 473, 'num_filters': 349, 'hidden_dim': 249, 'dropout_p': 0.5495169377706061, 'lr': 0.0003280497002570248}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00450, val_loss: 0.00336, lr: 6.27E-05, _patience: 10
Epoch: 2 | train_loss: 0.00334, val_loss: 0.00267, lr: 6.27E-05, _patience: 10
Epoch: 3 | train_loss: 0.00282, val_loss: 0.00251, lr: 6.27E-05, _patience: 10
Epoch: 4 | train_loss: 0.00260, val_loss: 0.00239, lr: 6.27E-05, _patience: 10
Epoch: 5 | train_loss: 0.00241, val_loss: 0.00226, lr: 6.27E-05, _patience: 10
Epoch: 6 | train_loss: 0.00228, val_loss: 0.00216, lr: 6.27E-05, _patience: 10
Epoch: 7 | train_loss: 0.00210, val_loss: 0.00205, lr: 6.27E-05, _patience: 10
Epoch: 8 | train_loss: 0.00197, val_loss: 0.00197, lr: 6.27E-05, _patience: 10
Epoch: 9 | train_loss: 0.00182, val_loss: 0.00189, lr: 6.27E-05, _patience: 10
Epoch: 10 | train_loss: 0.00172, val_loss: 0.00181, lr: 6.27E-05, _patience: 10
Epoch: 11 | train_loss: 0.00157, val_loss: 0.00175, lr: 6.27E-05, _patience: 10
Epoch: 12 | train_loss: 0.00149, val_loss: 0.00171, lr: 6.27E-05, _patience: 10
Epoch: 13 | train_loss: 0.00141, val_loss: 0.0016


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:22:17,755][0m Trial 47 finished with value: 0.6759391106545617 and parameters: {'embedding_dim': 449, 'num_filters': 494, 'hidden_dim': 283, 'dropout_p': 0.3247286479643003, 'lr': 6.269357965334842e-05}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-o

Epoch: 1 | train_loss: 0.00453, val_loss: 0.00320, lr: 5.06E-05, _patience: 10
Epoch: 2 | train_loss: 0.00350, val_loss: 0.00286, lr: 5.06E-05, _patience: 10
Epoch: 3 | train_loss: 0.00307, val_loss: 0.00264, lr: 5.06E-05, _patience: 10
Epoch: 4 | train_loss: 0.00290, val_loss: 0.00258, lr: 5.06E-05, _patience: 10
Epoch: 5 | train_loss: 0.00280, val_loss: 0.00253, lr: 5.06E-05, _patience: 10
Epoch: 6 | train_loss: 0.00270, val_loss: 0.00246, lr: 5.06E-05, _patience: 10
Epoch: 7 | train_loss: 0.00258, val_loss: 0.00241, lr: 5.06E-05, _patience: 10
Epoch: 8 | train_loss: 0.00249, val_loss: 0.00234, lr: 5.06E-05, _patience: 10
Epoch: 9 | train_loss: 0.00242, val_loss: 0.00227, lr: 5.06E-05, _patience: 10
Epoch: 10 | train_loss: 0.00231, val_loss: 0.00221, lr: 5.06E-05, _patience: 10
Epoch: 11 | train_loss: 0.00219, val_loss: 0.00214, lr: 5.06E-05, _patience: 10
Epoch: 12 | train_loss: 0.00212, val_loss: 0.00207, lr: 5.06E-05, _patience: 10
Epoch: 13 | train_loss: 0.00200, val_loss: 0.0020


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:24:46,158][0m Trial 48 finished with value: 0.641850578803056 and parameters: {'embedding_dim': 294, 'num_filters': 445, 'hidden_dim': 308, 'dropout_p': 0.4418280205121402, 'lr': 5.056615745741886e-05}. Best is trial 32 with value: 0.7102877118012281.[0m

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or

Epoch: 1 | train_loss: 0.00623, val_loss: 0.00314, lr: 3.99E-04, _patience: 10
Epoch: 2 | train_loss: 0.00337, val_loss: 0.00252, lr: 3.99E-04, _patience: 10
Epoch: 3 | train_loss: 0.00258, val_loss: 0.00206, lr: 3.99E-04, _patience: 10
Epoch: 4 | train_loss: 0.00204, val_loss: 0.00179, lr: 3.99E-04, _patience: 10
Epoch: 5 | train_loss: 0.00169, val_loss: 0.00171, lr: 3.99E-04, _patience: 10
Epoch: 6 | train_loss: 0.00140, val_loss: 0.00165, lr: 3.99E-04, _patience: 10
Epoch: 7 | train_loss: 0.00115, val_loss: 0.00157, lr: 3.99E-04, _patience: 10
Epoch: 8 | train_loss: 0.00090, val_loss: 0.00156, lr: 3.99E-04, _patience: 10
Epoch: 9 | train_loss: 0.00076, val_loss: 0.00153, lr: 3.99E-04, _patience: 10
Epoch: 10 | train_loss: 0.00064, val_loss: 0.00156, lr: 3.99E-04, _patience: 9
Epoch: 11 | train_loss: 0.00053, val_loss: 0.00162, lr: 3.99E-04, _patience: 8
Epoch: 12 | train_loss: 0.00044, val_loss: 0.00169, lr: 3.99E-04, _patience: 7
Epoch: 13 | train_loss: 0.00040, val_loss: 0.00179, 


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

[32m[I 2021-07-10 22:25:40,684][0m Trial 49 finished with value: 0.6863018900623078 and parameters: {'embedding_dim': 356, 'num_filters': 419, 'hidden_dim': 451, 'dropout_p': 0.4733659290881291, 'lr': 0.00039877990719003196}. Best is trial 32 with value: 0.7102877118012281.[0m


In [72]:
# MLFlow dashboard
get_ipython().system_raw("mlflow server -h 0.0.0.0 -p 5000 --backend-store-uri $PWD/experiments/ &")
ngrok.kill()
ngrok.set_auth_token("")
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

MLflow Tracking UI: https://c39c622847dc.ngrok.io


In [73]:
# All trials
trials_df = study.trials_dataframe()
trials_df = trials_df.sort_values(["value"], ascending=False)  # sort by metric
trials_df.head()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_dropout_p,params_embedding_dim,params_hidden_dim,params_lr,params_num_filters,user_attrs_f1,user_attrs_precision,user_attrs_recall,user_attrs_threshold,state
32,32,0.710288,2021-07-10 22:01:21.710400,2021-07-10 22:02:27.801826,0 days 00:01:06.091426,0.374602,472,402,0.000494,398,0.710288,0.849331,0.628755,0.332658,COMPLETE
15,15,0.708135,2021-07-10 21:41:22.584840,2021-07-10 21:42:38.022625,0 days 00:01:15.437785,0.433708,473,505,0.000486,512,0.708135,0.856881,0.618026,0.346464,COMPLETE
38,38,0.707382,2021-07-10 22:09:48.479380,2021-07-10 22:10:43.088947,0 days 00:00:54.609567,0.519586,420,322,0.000435,353,0.707382,0.834607,0.635193,0.290364,COMPLETE
26,26,0.705662,2021-07-10 21:55:27.826290,2021-07-10 21:56:18.820319,0 days 00:00:50.994029,0.389737,440,351,0.0005,361,0.705662,0.827111,0.635193,0.2904,COMPLETE
11,11,0.705156,2021-07-10 21:37:36.814178,2021-07-10 21:38:49.495839,0 days 00:01:12.681661,0.415083,492,420,0.00048,397,0.705156,0.840579,0.624464,0.296465,COMPLETE


In [74]:
# Best trial
print (f"Best value (f1): {study.best_trial.value}")
print (f"Best hyperparameters: {study.best_trial.params}")

Best value (f1): 0.7102877118012281
Best hyperparameters: {'embedding_dim': 472, 'num_filters': 398, 'hidden_dim': 402, 'dropout_p': 0.37460230139713574, 'lr': 0.0004944035411985671}


In [75]:
# Save best parameters
params = {**args.__dict__, **study.best_trial.params}
params["threshold"] = study.best_trial.user_attrs["threshold"]
print (json.dumps(params, indent=2, cls=NumpyEncoder))

{
  "char_level": true,
  "filter_sizes": [
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10
  ],
  "batch_size": 64,
  "embedding_dim": 472,
  "num_filters": 398,
  "hidden_dim": 402,
  "dropout_p": 0.37460230139713574,
  "lr": 0.0004944035411985671,
  "num_epochs": 200,
  "patience": 10,
  "threshold": 0.3326583206653595
}


In [76]:
params

{'batch_size': 64,
 'char_level': True,
 'dropout_p': 0.37460230139713574,
 'embedding_dim': 472,
 'filter_sizes': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 'hidden_dim': 402,
 'lr': 0.0004944035411985671,
 'num_epochs': 200,
 'num_filters': 398,
 'patience': 10,
 'threshold': 0.33265832}

In [84]:
params['threshold']=str(params['threshold'])

In [78]:
tempfile.TemporaryDirectory()

<TemporaryDirectory '/tmp/tmp131fct1a'>

In [87]:
import os
os.makedirs("data_new", exist_ok=True)
with open("data_new/best_params.json", 'w', encoding='utf-8') as f:
    json.dump(params, f, indent=2)

In [88]:
plams = load_dict(filepath="data_new/best_params.json")

In [89]:
plams

{'batch_size': 64,
 'char_level': True,
 'dropout_p': 0.37460230139713574,
 'embedding_dim': 472,
 'filter_sizes': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 'hidden_dim': 402,
 'lr': 0.0004944035411985671,
 'num_epochs': 200,
 'num_filters': 398,
 'patience': 10,
 'threshold': '0.33265832'}

In [90]:
plams['threshold'] =float(plams['threshold'])

In [91]:
plams

{'batch_size': 64,
 'char_level': True,
 'dropout_p': 0.37460230139713574,
 'embedding_dim': 472,
 'filter_sizes': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 'hidden_dim': 402,
 'lr': 0.0004944035411985671,
 'num_epochs': 200,
 'num_filters': 398,
 'patience': 10,
 'threshold': 0.33265832}

In [93]:
if 0.312 > plams['threshold']:
  print ('ek')

In [96]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [97]:
with open("/content/drive/My Drive/data/best_params.json", 'w', encoding='utf-8') as f:
    json.dump(plams, f, indent=2)

In [100]:
#ceck = json.load("/content/drive/My Drive/data/best_params.json")

with open("/content/drive/My Drive/data/best_params.json") as jsonFile:
    jsonObject = json.load(jsonFile)
    jsonFile.close()

In [101]:
jsonObject

{'batch_size': 64,
 'char_level': True,
 'dropout_p': 0.37460230139713574,
 'embedding_dim': 472,
 'filter_sizes': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 'hidden_dim': 402,
 'lr': 0.0004944035411985671,
 'num_epochs': 200,
 'num_filters': 398,
 'patience': 10,
 'threshold': 0.33265832}

In [103]:
jsonObject['threshold']

0.33265832

In [107]:
if 0.387 > jsonObject['threshold']:
  print ('ek')

ek
