In [1]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from tqdm.autonotebook import tqdm
import neurallm_utils as nutils
import wandb
import os

  from tqdm.autonotebook import tqdm
[nltk_data] Downloading package punkt to
[nltk_data]     /home/mezallamosas.j/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/mezallamosas.j/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


## Functions

In [2]:
# -------------------------------
# Data processing functions
# -------------------------------

def encode_tokens(data: list[list[str]], embedder: torch.nn.Embedding) -> list[list[int]]:
    """
    Replaces each natural-language token with its embedder index.

    e.g. [["<s>", "once", "upon", "a", "time"],
          ["there", "was", "a", ]]
        ->
        [[0, 59, 203, 1, 126],
         [26, 15, 1]]
        (The indices are arbitrary, as they are dependent on your embedder)

    Params:
        data: The corpus
        embedder: An embedder trained on the given data.
    """

    finalList = []
    for list in data:
        currList = []
        for word in list:
            index = embedder.token_to_index[word]
            currList.append(index)
        finalList.append(currList)

    return finalList


def create_ngrams(tokens: list, n: int) -> list:
    """Creates n-grams for the given token sequence.
    Args:
      tokens (list): a list of tokens as strings
      n (int): the length of n-grams to create

    Returns:
      list: list of tuples of strings, each tuple being one of the individual n-grams
    """
    # STUDENTS IMPLEMENT
    res = []
    for i in range(0, len(tokens)-n):
        #append n gram + yth value
        res.append(tokens[i:i+n+1])
    return res

def generate_ngram_training_samples(encoded: list[list[int]], ngram: int) -> list:
    """
    Takes the **encoded** data (list of lists of ints) and 
    generates the training samples out of it.
    
    Parameters:
        up to you, we've put in what we used
        but you can add/remove as needed
    return: 
    list of lists in the format [[x1, x2, ... , x(n-1), y], ...]
    """

    #1 2 3 4
    #[1,2, y=3]
    #[2,3, y=4]

    # if you'd like to use tqdm, you can use it like this:
    # for i in tqdm(range(len(encoded))):
    final_list = []
    for list in encoded:
        currList = create_ngrams(list, ngram-1)
        final_list.extend(currList)
    return final_list

def split_sequences(training_sample):
    x_sample = []
    y_sample = []
    for line in training_sample:
        x_sample.append(line[0:-1])
        y_sample.append(line[-1])
    return x_sample, y_sample

def create_dataloaders(X: list, y: list, num_sequences_per_batch: int, 
                       test_pct: float = 0.1, shuffle: bool = True) -> tuple[torch.utils.data.DataLoader]:
    """
    Convert our data into a PyTorch DataLoader.    
    A DataLoader is an object that splits the dataset into batches for training.
    PyTorch docs: 
        https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
        https://pytorch.org/docs/stable/data.html

    Note that you have to first convert your data into a PyTorch DataSet.
    You DO NOT have to implement this yourself, instead you should use a TensorDataset.

    You are in charge of splitting the data into train and test sets based on the given
    test_pct. There are several functions you can use to acheive this!

    The shuffle parameter refers to shuffling the data *in the loader* (look at the docs),
    not whether or not to shuffle the data before splitting it into train and test sets.
    (don't shuffle before splitting)

    Params:
        X: A list of input sequences
        Y: A list of labels
        num_sequences_per_batch: Batch size
        test_pct: The proportion of samples to use in the test set.
        shuffle: INSTRUCTORS ONLY

    Returns:
        One DataLoader for training, and one for testing.
    """
    
    dataSet = TensorDataset(torch.tensor(X), torch.tensor(y))
    test_size = int(len(dataSet)*test_pct)
    train_size = len(dataSet) - test_size
    train_data, test_data = torch.utils.data.random_split(dataSet, [train_size, test_size])
    dataloader_train = DataLoader(train_data, batch_size=num_sequences_per_batch, shuffle=shuffle)
    dataloader_test = DataLoader(test_data, batch_size=num_sequences_per_batch, shuffle=shuffle)
    return dataloader_train, dataloader_test

# -------------------------------
# FFNN Model and Training Functions
# -------------------------------

class FFNN(nn.Module):
    """
    A Feed-Forward Neural Network for language modeling.
    """
    def __init__(self, vocab_size: int, ngram: int, embedding_layer: torch.nn.Embedding, hidden_units=128, device: str = "cpu"):
        """
        Initialize a new untrained model.
        
        Params:
            vocab_size: Number of words in the vocabulary.
            ngram: The N value (window size) for training.
            embedding_layer: Pre-trained embedding layer.
            hidden_units: Number of hidden units in the hidden layer.
        """
        super().__init__()
        self.vocab_size = vocab_size
        self.ngram = ngram
        self.embedding_layer = embedding_layer
        self.hidden_units = hidden_units
        self.device = device
        
        # Get embedding dimension from the provided embedder.
        embedding_size = embedding_layer.embedding_dim
        
        # Define the network: flatten embedded n-gram tokens, then two linear layers with ReLU.
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(in_features=(ngram-1) * embedding_size, out_features=hidden_units, bias=True),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=vocab_size, bias=True)
        )
        
        # Move class to its own device
        self.to(device)
        
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        """
        Forward pass through the network.
        
        Params:
            X: Tensor of input indices with shape (batch_size, ngram-1)
        
        Returns:
            Logits of shape (batch_size, vocab_size).
        """
        embedded = self.embedding_layer(X)
        flat_embedded = self.flatten(embedded)
        logits = self.linear_relu_stack(flat_embedded)
        return logits

def train_one_epoch(dataloader, model, optimizer, loss_fn):
    epoch_loss = 0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(model.device), labels.to(model.device)
        optimizer.zero_grad()                  # Zero gradients for this batch.
        outputs = model(inputs)                # Forward pass.
        batch_loss = loss_fn(outputs, labels)  # Compute loss.
        batch_loss.backward()                  # Backpropagation.
        optimizer.step()                       # Update weights.
        epoch_loss += batch_loss.item()
    return epoch_loss

def train(dataloader, model, epochs: int = 1, lr: float = 0.001) -> None:
    """
    Train the model.
    
    Params:
        dataloader: Training data loader.
        model: The model to train.
        epochs: Number of epochs.
        lr: Learning rate.
    """
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    n_batches = len(dataloader)
    
    model.train()  # Set the model to training mode.
    
    for epoch in tqdm(range(epochs), desc="Training Epochs"):
        epoch_loss = train_one_epoch(dataloader, model, optimizer, loss_fn)
        avg_epoch_loss = epoch_loss / n_batches
        print(f"Epoch: {epoch+1}, Average Loss: {avg_epoch_loss:.4f}")
        # Log metrics to wandb
        wandb.log({"epoch": epoch+1, "avg_epoch_loss": avg_epoch_loss})
    return avg_epoch_loss

def full_pipeline(data, word_embeddings_filename: str, 
                  batch_size: int,
                  ngram: int,
                  hidden_units: int = 128,
                  epochs: int = 1,
                  lr: float = 0.001,
                  test_pct: float = 0.1, device: str = "cpu") -> FFNN:
    """
    Run the full training pipeline from loading embeddings to model training.
    
    Params:
        data: Raw data as a list of lists of tokens (here, integer indices).
        word_embeddings_filename: Filename for the pre-trained embeddings.
        batch_size: Batch size for training.
        ngram: N-gram size.
        hidden_units: Number of hidden units.
        epochs: Number of epochs.
        lr: Learning rate.
        test_pct: Percentage of data for testing (not used in training).
    
    Returns:
        The trained FFNN model.
    """
    # Load embeddings and create an embedder.
    token_embeddings = nutils.load_word2vec(word_embeddings_filename)
    embedder = nutils.create_embedder(token_embeddings)
    
    # Preprocess data.
    encoded_tokens = encode_tokens(data, embedder)
    vocab_size = embedder.num_embeddings
    training_sample = generate_ngram_training_samples(encoded_tokens, ngram)
    x_sample, y_sample = split_sequences(training_sample)
    dataloader_train, _ = create_dataloaders(x_sample, y_sample, batch_size, test_pct)
    
    # Initialize the model.
    model = FFNN(vocab_size=vocab_size, ngram=ngram, embedding_layer=embedder, hidden_units=hidden_units, device=device)

    # Train the model.
    final_loss = train(dataloader=dataloader_train, model=model, epochs=epochs, lr=lr)
    
    return model, final_loss

# -------------------------------
# Prediction and generation functions
# -------------------------------

# Create a function that predicts the next token in a sequence.
def predict(model, input_tokens) -> str:
    """
    Get the model's next word prediction for an input.
    This is where you'll use the softmax function!
    Assume that the input tokens do not contain any unknown tokens.

    Params:
        model: Your trained model
        input_tokens: A list of natural-language tokens. Must be length N-1.

    Returns:
        The predicted token (not the predicted index!)
    """
    # YOUR CODE HERE
	# Encode tokens
    encoded_tokens = [model.embedding_layer.token_to_index[token] for token in input_tokens]
    
	# Trasform to tensor
    encoded_tokens = torch.tensor([encoded_tokens]).to(model.device) # Dim [1, ngram-1]
    
    # Setting model to evaluation mode turns off Dropout and BatchNorm making the predictions deterministic
    model.eval()  # Set the model to evaluation mode if you haven't already
    
    with torch.no_grad(): # Speeds up inference and reduces memory usage by not having to calcualte gradients
        logits = model(encoded_tokens) # Forward pass on the model
        probability = nn.functional.softmax(logits, dim=1) # Normalize z scores to probability
        predicted_idx = torch.multinomial(probability, num_samples=1).item()

        #predicted_idx = probability.argmax(dim=1).item() # Retrieve int value
		
	# Transform index to natural-language token
    predicted_token = model.embedding_layer.index_to_token[predicted_idx] 
    
    return predicted_token

from typing import List
# Generate a sequence from the model until you get an end of sentence token.
def generate(model, seed: List[str], max_tokens: int = None) -> List[str]:
    """
    Use the trained model to generate a sentence.
    This should be somewhat similar to generation for HW2...
    Make sure to use your predict function!

    Params:
        model: Your trained model
        seed: [w_1, w_2, ..., w_(n-1)].
        max_tokens: The maximum number of tokens to generate. When None, should gener
            generate until the end of sentence token is reached.

    Return:
        A list of generated tokens.
    """ 
    n_tokens = 0 # Count tokens that have been generated
    tokens = seed.copy() # Copy of initial seed
    end_token = "<\s>"
    
    while True:
        for_prediction = seed[-(model.ngram-1):]
        predicted_token = predict(model, for_prediction)
        if predicted_token == end_token:
        	break
        tokens.append(predicted_token)
        n_tokens += 1
        if max_tokens is not None and n_tokens >= max_tokens:
            break
        
    return tokens

def generate_sentences(model, seed: List[str],  n_sentences: int, max_tokens: int = None) -> List[str]:
    return [generate(model, seed, max_tokens) for i in range(n_sentences)]

# you might want to define some functions to help you format the text nicely
# and/or generate multiple sequences

def format_sentence(tokens_list: List[List[str]], by_char = False) -> str:
  """Removes <s> at the start of the sentence and </s> at ehe end. Joins the list of tokens into a string and capitalizes it.
  Args:
    tokens (list(list)): the list of tokens list to be formatted into a sentence

  Returns:
    string: formatted sentence as a string
  
  """
  text = "" # Initializing final sentence
  for tokens in tokens_list: # Parsing through each individual sentence
    while tokens[0] == '<s>': # Removes all <s> at the beggining even if there are several for ngram > 2 models
      tokens.pop(0)
    if tokens[-1] == '</s>': # Removes the one </s> at the end of the sentence
      tokens.pop(-1)
    if by_char:
      sentence = "".join(tokens) # Converts list of tokens into a string
      sentence = sentence.capitalize() # Capitalizes the first letter of each sentence
    else:
      sentence = " ".join(tokens) # Converts list of tokens into a string
      sentence = sentence.capitalize() # Capitalizes the first letter of each sentence
    text += sentence + ".\n" # Adds a period and space separator between sentences
  return text.strip(" ") # Removes the last space in the last sentence


## Main Pipeline

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
def main():
	# Initialize a wandb run (hyperparameters come from wandb.config).
	wandb.init(
		entity = "northeastern-university",
		project = "neural-language-model"
		)
	
	config = wandb.config

	EMBEDDINGS_SIZE = config.embeddings_size
	NGRAM = config.ngram
	NUM_SEQUENCES_PER_BATCH = config.batch_size
	HIDDEN_UNITS = config.hidden_units
	EPOCHS = config.epochs
	LR = config.lr
	TEXT_TYPE = "word"

	TRAIN_FILE = 'spooky_author_train.csv' # The file to train your language model on

	EMBEDDING_SAVE_FILE_WORD = f"embeddings/spooky_embedding_word_{EMBEDDINGS_SIZE}.model" # The file to save your word embeddings to
	EMBEDDING_SAVE_FILE_CHAR = f"embeddings/spooky_embedding_char_{EMBEDDINGS_SIZE}.model" # The file to save your char embeddings to
	MODEL_FILE_WORD = f'models/spooky_author_model_word_{EMBEDDINGS_SIZE}_{NUM_SEQUENCES_PER_BATCH}_{NGRAM}_{HIDDEN_UNITS}_{EPOCHS}_{LR}.pt' # The file to save your trained word-based neural LM to
	MODEL_FILE_CHAR = f'models/spooky_author_model_char_{EMBEDDINGS_SIZE}_{NUM_SEQUENCES_PER_BATCH}_{NGRAM}_{HIDDEN_UNITS}_{EPOCHS}_{LR}.pt' # The file to save your trained char-based neural LM to

	if TEXT_TYPE == "word":
		data = nutils.read_file_spooky(TRAIN_FILE, NGRAM, by_character=False)
		word_embeddings_filename = EMBEDDING_SAVE_FILE_WORD
		if not os.path.exists(word_embeddings_filename):
			trained_word = nutils.train_word2vec(data, EMBEDDINGS_SIZE)
			nutils.save_word2vec(trained_word, EMBEDDING_SAVE_FILE_WORD)

	elif TEXT_TYPE == "char":
		data = nutils.read_file_spooky(TRAIN_FILE, NGRAM, by_character=True)
		word_embeddings_filename = EMBEDDING_SAVE_FILE_CHAR
		if not os.path.exists(word_embeddings_filename):
			trained_char = nutils.train_word2vec(data, EMBEDDINGS_SIZE)
			nutils.save_word2vec(trained_char, EMBEDDING_SAVE_FILE_CHAR)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	model, final_loss = full_pipeline(
		data=data,
		word_embeddings_filename = word_embeddings_filename,
		batch_size=NUM_SEQUENCES_PER_BATCH,
		ngram=NGRAM,
		hidden_units=config.hidden_units,
		epochs=config.epochs,
		lr=config.lr,
		test_pct=config.test_pct,
		device = device
	)

	if TEXT_TYPE == "word":
		torch.save(model.state_dict(), MODEL_FILE_WORD)

	elif TEXT_TYPE == "char":
		torch.save(model.state_dict(), MODEL_FILE_CHAR)

	wandb.log({"final_loss": final_loss})
	wandb.finish()
	

In [5]:
# Define the sweep configuration
sweep_config = {
    "name": "word_corrected_hyperparameter_sweep",
	"method": "grid",  # Options: "grid", "random", "bayes"
	"metric": {
		"name": "avg_epoch_loss",
		"goal": "minimize"  # We want to minimize the training loss.
	},
	"parameters": {
		"embeddings_size": {"values": [50, 100, 200]},
		"batch_size": {"values": [128, 256, 512]},
		"ngram": {"values": [2, 3, 4, 5]},
		"hidden_units": {"values": [128, 256, 512]},
		"epochs": {"values": [5, 10, 25]},
		"lr": {"values": [0.001, 0.0001]},
		"test_pct": {"value": 0.1} # Fixed value.
}
}

In [6]:
# Register the sweep with wandb.
sweep_id = wandb.sweep(sweep_config,
                       project="neural-language-model")
print("Sweep ID:", sweep_id)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: rxaeqrhj
Sweep URL: https://wandb.ai/biofx/neural-language-model/sweeps/rxaeqrhj
Sweep ID: rxaeqrhj


In [None]:
# Set count to the number of runs you wish to execute; here, 5 runs are used as an example.
wandb.agent(sweep_id, function=main, count=500)

[34m[1mwandb[0m: Agent Starting Run: 9ud5wkw1 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmezallamosasj[0m ([33mbiofx[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9964
Epoch: 2, Average Loss: 5.5068
Epoch: 3, Average Loss: 5.3378
Epoch: 4, Average Loss: 5.2316
Epoch: 5, Average Loss: 5.1587


0,1
avg_epoch_loss,█▄▂▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.15869
epoch,5.0
final_loss,5.15869


[34m[1mwandb[0m: Agent Starting Run: ms5egt2q with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.7676
Epoch: 2, Average Loss: 5.2246
Epoch: 3, Average Loss: 4.9954
Epoch: 4, Average Loss: 4.8214
Epoch: 5, Average Loss: 4.6824


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.6824
epoch,5.0
final_loss,4.6824


[34m[1mwandb[0m: Agent Starting Run: f2cs0uoc with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.5945
Epoch: 2, Average Loss: 5.0502
Epoch: 3, Average Loss: 4.8033
Epoch: 4, Average Loss: 4.6079
Epoch: 5, Average Loss: 4.4447


0,1
avg_epoch_loss,█▅▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.4447
epoch,5.0
final_loss,4.4447


[34m[1mwandb[0m: Agent Starting Run: bfbt9spv with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.4431
Epoch: 2, Average Loss: 4.9079
Epoch: 3, Average Loss: 4.6695
Epoch: 4, Average Loss: 4.4807
Epoch: 5, Average Loss: 4.3229


0,1
avg_epoch_loss,█▅▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.32292
epoch,5.0
final_loss,4.32292


[34m[1mwandb[0m: Agent Starting Run: 9nls1whs with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.6274
Epoch: 2, Average Loss: 5.9533
Epoch: 3, Average Loss: 5.7955
Epoch: 4, Average Loss: 5.6889
Epoch: 5, Average Loss: 5.6083


0,1
avg_epoch_loss,█▃▂▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.60834
epoch,5.0
final_loss,5.60834


[34m[1mwandb[0m: Agent Starting Run: 49xv8i5c with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.3527
Epoch: 2, Average Loss: 5.7593
Epoch: 3, Average Loss: 5.5840
Epoch: 4, Average Loss: 5.4616
Epoch: 5, Average Loss: 5.3664


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.36642
epoch,5.0
final_loss,5.36642


[34m[1mwandb[0m: Agent Starting Run: 8dxekzc8 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.1300
Epoch: 2, Average Loss: 5.5727
Epoch: 3, Average Loss: 5.3980
Epoch: 4, Average Loss: 5.2746
Epoch: 5, Average Loss: 5.1773


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.1773
epoch,5.0
final_loss,5.1773


[34m[1mwandb[0m: Agent Starting Run: yxjr1qyh with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9332
Epoch: 2, Average Loss: 5.4064
Epoch: 3, Average Loss: 5.2340
Epoch: 4, Average Loss: 5.1122
Epoch: 5, Average Loss: 5.0159


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.01587
epoch,5.0
final_loss,5.01587


[34m[1mwandb[0m: Agent Starting Run: 7m2scixo with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9315
Epoch: 2, Average Loss: 5.4285
Epoch: 3, Average Loss: 5.2513
Epoch: 4, Average Loss: 5.1440
Epoch: 5, Average Loss: 5.0718


0,1
avg_epoch_loss,█▄▂▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.0718
epoch,5.0
final_loss,5.0718


[34m[1mwandb[0m: Agent Starting Run: wtxewj08 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.7184
Epoch: 2, Average Loss: 5.1400
Epoch: 3, Average Loss: 4.8652
Epoch: 4, Average Loss: 4.6569
Epoch: 5, Average Loss: 4.4968


0,1
avg_epoch_loss,█▅▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.49683
epoch,5.0
final_loss,4.49683


[34m[1mwandb[0m: Agent Starting Run: 96zwutfy with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.5461
Epoch: 2, Average Loss: 4.9700
Epoch: 3, Average Loss: 4.6707
Epoch: 4, Average Loss: 4.4272
Epoch: 5, Average Loss: 4.2269


0,1
avg_epoch_loss,█▅▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.22694
epoch,5.0
final_loss,4.22694


[34m[1mwandb[0m: Agent Starting Run: nnl2k4jl with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.3892
Epoch: 2, Average Loss: 4.8264
Epoch: 3, Average Loss: 4.5406
Epoch: 4, Average Loss: 4.3013
Epoch: 5, Average Loss: 4.0953


0,1
avg_epoch_loss,█▅▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.09528
epoch,5.0
final_loss,4.09528


[34m[1mwandb[0m: Agent Starting Run: 0nmk0x35 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.4253
Epoch: 2, Average Loss: 5.8171
Epoch: 3, Average Loss: 5.6569
Epoch: 4, Average Loss: 5.5523
Epoch: 5, Average Loss: 5.4758


0,1
avg_epoch_loss,█▄▂▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.47585
epoch,5.0
final_loss,5.47585


[34m[1mwandb[0m: Agent Starting Run: naewxrhr with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.1923
Epoch: 2, Average Loss: 5.6116
Epoch: 3, Average Loss: 5.4298
Epoch: 4, Average Loss: 5.3013
Epoch: 5, Average Loss: 5.1988


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.1988
epoch,5.0
final_loss,5.1988


[34m[1mwandb[0m: Agent Starting Run: tx9w36lz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9934
Epoch: 2, Average Loss: 5.4474
Epoch: 3, Average Loss: 5.2583
Epoch: 4, Average Loss: 5.1227
Epoch: 5, Average Loss: 5.0131


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.01312
epoch,5.0
final_loss,5.01312


[34m[1mwandb[0m: Agent Starting Run: kw7prbvc with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.8180
Epoch: 2, Average Loss: 5.2903
Epoch: 3, Average Loss: 5.1003
Epoch: 4, Average Loss: 4.9639
Epoch: 5, Average Loss: 4.8523


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.8523
epoch,5.0
final_loss,4.8523


[34m[1mwandb[0m: Agent Starting Run: 8zn0g6n2 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.8958
Epoch: 2, Average Loss: 5.3816
Epoch: 3, Average Loss: 5.2007
Epoch: 4, Average Loss: 5.0961
Epoch: 5, Average Loss: 5.0256


0,1
avg_epoch_loss,█▄▂▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.02563
epoch,5.0
final_loss,5.02563


[34m[1mwandb[0m: Agent Starting Run: ysjynhf7 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.6733
Epoch: 2, Average Loss: 5.0755
Epoch: 3, Average Loss: 4.7806
Epoch: 4, Average Loss: 4.5620
Epoch: 5, Average Loss: 4.3925


0,1
avg_epoch_loss,█▅▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.39254
epoch,5.0
final_loss,4.39254


[34m[1mwandb[0m: Agent Starting Run: enlcjfxb with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.5115
Epoch: 2, Average Loss: 4.9155
Epoch: 3, Average Loss: 4.5982
Epoch: 4, Average Loss: 4.3385
Epoch: 5, Average Loss: 4.1224


0,1
avg_epoch_loss,█▅▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.12238
epoch,5.0
final_loss,4.12238


[34m[1mwandb[0m: Agent Starting Run: 3vqb1a0b with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.3725
Epoch: 2, Average Loss: 4.7967
Epoch: 3, Average Loss: 4.4903
Epoch: 4, Average Loss: 4.2301
Epoch: 5, Average Loss: 4.0053


0,1
avg_epoch_loss,█▅▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.00533
epoch,5.0
final_loss,4.00533


[34m[1mwandb[0m: Agent Starting Run: h8zo3p8m with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.2526
Epoch: 2, Average Loss: 5.6734
Epoch: 3, Average Loss: 5.5163
Epoch: 4, Average Loss: 5.4168
Epoch: 5, Average Loss: 5.3449


0,1
avg_epoch_loss,█▄▂▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.3449
epoch,5.0
final_loss,5.3449


[34m[1mwandb[0m: Agent Starting Run: 8sda57lt with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.0534
Epoch: 2, Average Loss: 5.4815
Epoch: 3, Average Loss: 5.2814
Epoch: 4, Average Loss: 5.1370
Epoch: 5, Average Loss: 5.0193


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,5.01932
epoch,5.0
final_loss,5.01932


[34m[1mwandb[0m: Agent Starting Run: g1ixcftk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.8778
Epoch: 2, Average Loss: 5.3173
Epoch: 3, Average Loss: 5.1024
Epoch: 4, Average Loss: 4.9434
Epoch: 5, Average Loss: 4.8081


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.80815
epoch,5.0
final_loss,4.80815


[34m[1mwandb[0m: Agent Starting Run: arrjxxn8 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.7107
Epoch: 2, Average Loss: 5.1651
Epoch: 3, Average Loss: 4.9496
Epoch: 4, Average Loss: 4.7876
Epoch: 5, Average Loss: 4.6464


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,4.64643
epoch,5.0
final_loss,4.64643


[34m[1mwandb[0m: Agent Starting Run: fvpy4atn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9916
Epoch: 2, Average Loss: 5.5016
Epoch: 3, Average Loss: 5.3367
Epoch: 4, Average Loss: 5.2324
Epoch: 5, Average Loss: 5.1599
Epoch: 6, Average Loss: 5.1070
Epoch: 7, Average Loss: 5.0673
Epoch: 8, Average Loss: 5.0358
Epoch: 9, Average Loss: 5.0105
Epoch: 10, Average Loss: 4.9896


0,1
avg_epoch_loss,█▅▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.98958
epoch,10.0
final_loss,4.98958


[34m[1mwandb[0m: Agent Starting Run: 13foh9du with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.7718
Epoch: 2, Average Loss: 5.2263
Epoch: 3, Average Loss: 4.9955
Epoch: 4, Average Loss: 4.8240
Epoch: 5, Average Loss: 4.6883
Epoch: 6, Average Loss: 4.5829
Epoch: 7, Average Loss: 4.5023
Epoch: 8, Average Loss: 4.4414
Epoch: 9, Average Loss: 4.3951
Epoch: 10, Average Loss: 4.3582


0,1
avg_epoch_loss,█▅▄▃▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.35817
epoch,10.0
final_loss,4.35817


[34m[1mwandb[0m: Agent Starting Run: 1dnyqad1 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.5956
Epoch: 2, Average Loss: 5.0470
Epoch: 3, Average Loss: 4.8042
Epoch: 4, Average Loss: 4.6165
Epoch: 5, Average Loss: 4.4638
Epoch: 6, Average Loss: 4.3387
Epoch: 7, Average Loss: 4.2418
Epoch: 8, Average Loss: 4.1678
Epoch: 9, Average Loss: 4.1126
Epoch: 10, Average Loss: 4.0680


0,1
avg_epoch_loss,█▅▄▄▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.068
epoch,10.0
final_loss,4.068


[34m[1mwandb[0m: Agent Starting Run: qowz62nn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.4374
Epoch: 2, Average Loss: 4.9042
Epoch: 3, Average Loss: 4.6550
Epoch: 4, Average Loss: 4.4602
Epoch: 5, Average Loss: 4.2993
Epoch: 6, Average Loss: 4.1672
Epoch: 7, Average Loss: 4.0629
Epoch: 8, Average Loss: 3.9867
Epoch: 9, Average Loss: 3.9294
Epoch: 10, Average Loss: 3.8862


0,1
avg_epoch_loss,█▆▄▄▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,3.88618
epoch,10.0
final_loss,3.88618


[34m[1mwandb[0m: Agent Starting Run: 42f5dkky with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.6169
Epoch: 2, Average Loss: 5.9525
Epoch: 3, Average Loss: 5.7919
Epoch: 4, Average Loss: 5.6852
Epoch: 5, Average Loss: 5.6052
Epoch: 6, Average Loss: 5.5423
Epoch: 7, Average Loss: 5.4914
Epoch: 8, Average Loss: 5.4489
Epoch: 9, Average Loss: 5.4129
Epoch: 10, Average Loss: 5.3819


0,1
avg_epoch_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,5.38188
epoch,10.0
final_loss,5.38188


[34m[1mwandb[0m: Agent Starting Run: gi0hz42c with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.3510
Epoch: 2, Average Loss: 5.7536
Epoch: 3, Average Loss: 5.5808
Epoch: 4, Average Loss: 5.4617
Epoch: 5, Average Loss: 5.3684
Epoch: 6, Average Loss: 5.2914
Epoch: 7, Average Loss: 5.2254
Epoch: 8, Average Loss: 5.1676
Epoch: 9, Average Loss: 5.1155
Epoch: 10, Average Loss: 5.0683


0,1
avg_epoch_loss,█▅▄▃▃▂▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,5.06832
epoch,10.0
final_loss,5.06832


[34m[1mwandb[0m: Agent Starting Run: yd5iai5d with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.1312
Epoch: 2, Average Loss: 5.5729
Epoch: 3, Average Loss: 5.3981
Epoch: 4, Average Loss: 5.2761
Epoch: 5, Average Loss: 5.1792
Epoch: 6, Average Loss: 5.0999
Epoch: 7, Average Loss: 5.0311
Epoch: 8, Average Loss: 4.9693
Epoch: 9, Average Loss: 4.9132
Epoch: 10, Average Loss: 4.8604


0,1
avg_epoch_loss,█▅▄▃▃▂▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.86043
epoch,10.0
final_loss,4.86043


[34m[1mwandb[0m: Agent Starting Run: kglwgikv with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9334
Epoch: 2, Average Loss: 5.3989
Epoch: 3, Average Loss: 5.2274
Epoch: 4, Average Loss: 5.1072
Epoch: 5, Average Loss: 5.0116
Epoch: 6, Average Loss: 4.9313
Epoch: 7, Average Loss: 4.8611
Epoch: 8, Average Loss: 4.7982
Epoch: 9, Average Loss: 4.7406
Epoch: 10, Average Loss: 4.6868


0,1
avg_epoch_loss,█▅▄▃▃▂▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.68684
epoch,10.0
final_loss,4.68684


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: antusjyn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9341
Epoch: 2, Average Loss: 5.4341
Epoch: 3, Average Loss: 5.2522
Epoch: 4, Average Loss: 5.1435
Epoch: 5, Average Loss: 5.0706
Epoch: 6, Average Loss: 5.0185
Epoch: 7, Average Loss: 4.9784
Epoch: 8, Average Loss: 4.9469
Epoch: 9, Average Loss: 4.9214
Epoch: 10, Average Loss: 4.8995


0,1
avg_epoch_loss,█▅▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.8995
epoch,10.0
final_loss,4.8995


[34m[1mwandb[0m: Agent Starting Run: hcwlwgtt with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.7110
Epoch: 2, Average Loss: 5.1322
Epoch: 3, Average Loss: 4.8558
Epoch: 4, Average Loss: 4.6477
Epoch: 5, Average Loss: 4.4872
Epoch: 6, Average Loss: 4.3670
Epoch: 7, Average Loss: 4.2771
Epoch: 8, Average Loss: 4.2083
Epoch: 9, Average Loss: 4.1552
Epoch: 10, Average Loss: 4.1117


0,1
avg_epoch_loss,█▅▄▃▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.11174
epoch,10.0
final_loss,4.11174


[34m[1mwandb[0m: Agent Starting Run: 3a7miow0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.5405
Epoch: 2, Average Loss: 4.9616
Epoch: 3, Average Loss: 4.6638
Epoch: 4, Average Loss: 4.4174
Epoch: 5, Average Loss: 4.2143
Epoch: 6, Average Loss: 4.0537
Epoch: 7, Average Loss: 3.9356
Epoch: 8, Average Loss: 3.8497
Epoch: 9, Average Loss: 3.7834
Epoch: 10, Average Loss: 3.7313


0,1
avg_epoch_loss,█▆▅▄▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,3.73125
epoch,10.0
final_loss,3.73125


[34m[1mwandb[0m: Agent Starting Run: njjryaw2 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.3949
Epoch: 2, Average Loss: 4.8338
Epoch: 3, Average Loss: 4.5515
Epoch: 4, Average Loss: 4.3145
Epoch: 5, Average Loss: 4.1108
Epoch: 6, Average Loss: 3.9401
Epoch: 7, Average Loss: 3.8072
Epoch: 8, Average Loss: 3.7095
Epoch: 9, Average Loss: 3.6386
Epoch: 10, Average Loss: 3.5829


0,1
avg_epoch_loss,█▆▅▄▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,3.58294
epoch,10.0
final_loss,3.58294


[34m[1mwandb[0m: Agent Starting Run: 5w5a8kmf with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.4154
Epoch: 2, Average Loss: 5.8167
Epoch: 3, Average Loss: 5.6582
Epoch: 4, Average Loss: 5.5535
Epoch: 5, Average Loss: 5.4772
Epoch: 6, Average Loss: 5.4181
Epoch: 7, Average Loss: 5.3703
Epoch: 8, Average Loss: 5.3304
Epoch: 9, Average Loss: 5.2963
Epoch: 10, Average Loss: 5.2669


0,1
avg_epoch_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,5.26694
epoch,10.0
final_loss,5.26694


[34m[1mwandb[0m: Agent Starting Run: ax16519w with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.1961
Epoch: 2, Average Loss: 5.6172
Epoch: 3, Average Loss: 5.4317
Epoch: 4, Average Loss: 5.3015
Epoch: 5, Average Loss: 5.1985
Epoch: 6, Average Loss: 5.1120
Epoch: 7, Average Loss: 5.0362
Epoch: 8, Average Loss: 4.9688
Epoch: 9, Average Loss: 4.9080
Epoch: 10, Average Loss: 4.8531


0,1
avg_epoch_loss,█▅▄▃▃▂▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.85309
epoch,10.0
final_loss,4.85309


[34m[1mwandb[0m: Agent Starting Run: tl9zooae with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.0047
Epoch: 2, Average Loss: 5.4506
Epoch: 3, Average Loss: 5.2590
Epoch: 4, Average Loss: 5.1223
Epoch: 5, Average Loss: 5.0128
Epoch: 6, Average Loss: 4.9180
Epoch: 7, Average Loss: 4.8327
Epoch: 8, Average Loss: 4.7548
Epoch: 9, Average Loss: 4.6802
Epoch: 10, Average Loss: 4.6111


0,1
avg_epoch_loss,█▅▄▄▃▃▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.61113
epoch,10.0
final_loss,4.61113


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pxl3lwv7 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.8187
Epoch: 2, Average Loss: 5.2935
Epoch: 3, Average Loss: 5.0996
Epoch: 4, Average Loss: 4.9611
Epoch: 5, Average Loss: 4.8483
Epoch: 6, Average Loss: 4.7500
Epoch: 7, Average Loss: 4.6607
Epoch: 8, Average Loss: 4.5771
Epoch: 9, Average Loss: 4.4971
Epoch: 10, Average Loss: 4.4209


0,1
avg_epoch_loss,█▅▄▄▃▃▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.42092
epoch,10.0
final_loss,4.42092


[34m[1mwandb[0m: Agent Starting Run: 8kvntlis with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.8956
Epoch: 2, Average Loss: 5.3827
Epoch: 3, Average Loss: 5.2011
Epoch: 4, Average Loss: 5.0950
Epoch: 5, Average Loss: 5.0242
Epoch: 6, Average Loss: 4.9722
Epoch: 7, Average Loss: 4.9323
Epoch: 8, Average Loss: 4.8999
Epoch: 9, Average Loss: 4.8733
Epoch: 10, Average Loss: 4.8516


0,1
avg_epoch_loss,█▅▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.85163
epoch,10.0
final_loss,4.85163


[34m[1mwandb[0m: Agent Starting Run: u19hapd7 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.6741
Epoch: 2, Average Loss: 5.0798
Epoch: 3, Average Loss: 4.7847
Epoch: 4, Average Loss: 4.5618
Epoch: 5, Average Loss: 4.3900
Epoch: 6, Average Loss: 4.2579
Epoch: 7, Average Loss: 4.1576
Epoch: 8, Average Loss: 4.0804
Epoch: 9, Average Loss: 4.0199
Epoch: 10, Average Loss: 3.9702


0,1
avg_epoch_loss,█▆▄▃▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,3.97021
epoch,10.0
final_loss,3.97021


[34m[1mwandb[0m: Agent Starting Run: dqofp4ou with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.5107
Epoch: 2, Average Loss: 4.9146
Epoch: 3, Average Loss: 4.6009
Epoch: 4, Average Loss: 4.3418
Epoch: 5, Average Loss: 4.1265
Epoch: 6, Average Loss: 3.9521
Epoch: 7, Average Loss: 3.8166
Epoch: 8, Average Loss: 3.7147
Epoch: 9, Average Loss: 3.6357
Epoch: 10, Average Loss: 3.5715


0,1
avg_epoch_loss,█▆▅▄▃▂▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,3.57153
epoch,10.0
final_loss,3.57153


[34m[1mwandb[0m: Agent Starting Run: 8ey0dl66 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.3671
Epoch: 2, Average Loss: 4.7962
Epoch: 3, Average Loss: 4.4976
Epoch: 4, Average Loss: 4.2424
Epoch: 5, Average Loss: 4.0242
Epoch: 6, Average Loss: 3.8390
Epoch: 7, Average Loss: 3.6882
Epoch: 8, Average Loss: 3.5723
Epoch: 9, Average Loss: 3.4829
Epoch: 10, Average Loss: 3.4131


0,1
avg_epoch_loss,█▆▅▄▃▃▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,3.41311
epoch,10.0
final_loss,3.41311


[34m[1mwandb[0m: Agent Starting Run: kry1pgxi with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.2577
Epoch: 2, Average Loss: 5.6819
Epoch: 3, Average Loss: 5.5243
Epoch: 4, Average Loss: 5.4240
Epoch: 5, Average Loss: 5.3517
Epoch: 6, Average Loss: 5.2956
Epoch: 7, Average Loss: 5.2505
Epoch: 8, Average Loss: 5.2131
Epoch: 9, Average Loss: 5.1817
Epoch: 10, Average Loss: 5.1547


0,1
avg_epoch_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,5.15467
epoch,10.0
final_loss,5.15467


[34m[1mwandb[0m: Agent Starting Run: 41f4uxiv with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.0554
Epoch: 2, Average Loss: 5.4854
Epoch: 3, Average Loss: 5.2843
Epoch: 4, Average Loss: 5.1395
Epoch: 5, Average Loss: 5.0221
Epoch: 6, Average Loss: 4.9207
Epoch: 7, Average Loss: 4.8321
Epoch: 8, Average Loss: 4.7549
Epoch: 9, Average Loss: 4.6888
Epoch: 10, Average Loss: 4.6328


0,1
avg_epoch_loss,█▅▄▃▃▂▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.63277
epoch,10.0
final_loss,4.63277


[34m[1mwandb[0m: Agent Starting Run: wxr0ue6l with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.8755
Epoch: 2, Average Loss: 5.3196
Epoch: 3, Average Loss: 5.1066
Epoch: 4, Average Loss: 4.9491
Epoch: 5, Average Loss: 4.8149
Epoch: 6, Average Loss: 4.6927
Epoch: 7, Average Loss: 4.5803
Epoch: 8, Average Loss: 4.4762
Epoch: 9, Average Loss: 4.3842
Epoch: 10, Average Loss: 4.3063


0,1
avg_epoch_loss,█▆▅▄▃▃▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.30632
epoch,10.0
final_loss,4.30632


[34m[1mwandb[0m: Agent Starting Run: zhv78hta with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.7057
Epoch: 2, Average Loss: 5.1625
Epoch: 3, Average Loss: 4.9458
Epoch: 4, Average Loss: 4.7835
Epoch: 5, Average Loss: 4.6415
Epoch: 6, Average Loss: 4.5080
Epoch: 7, Average Loss: 4.3792
Epoch: 8, Average Loss: 4.2574
Epoch: 9, Average Loss: 4.1470
Epoch: 10, Average Loss: 4.0534


0,1
avg_epoch_loss,█▆▅▄▃▃▂▂▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,4.05343
epoch,10.0
final_loss,4.05343


[34m[1mwandb[0m: Agent Starting Run: 4pshrxh5 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9894
Epoch: 2, Average Loss: 5.4957
Epoch: 3, Average Loss: 5.3287
Epoch: 4, Average Loss: 5.2243
Epoch: 5, Average Loss: 5.1535
Epoch: 6, Average Loss: 5.1022
Epoch: 7, Average Loss: 5.0633
Epoch: 8, Average Loss: 5.0332
Epoch: 9, Average Loss: 5.0084
Epoch: 10, Average Loss: 4.9881
Epoch: 11, Average Loss: 4.9704
Epoch: 12, Average Loss: 4.9554
Epoch: 13, Average Loss: 4.9417
Epoch: 14, Average Loss: 4.9302
Epoch: 15, Average Loss: 4.9197
Epoch: 16, Average Loss: 4.9098
Epoch: 17, Average Loss: 4.9009
Epoch: 18, Average Loss: 4.8925
Epoch: 19, Average Loss: 4.8850
Epoch: 20, Average Loss: 4.8779
Epoch: 21, Average Loss: 4.8710
Epoch: 22, Average Loss: 4.8648
Epoch: 23, Average Loss: 4.8591
Epoch: 24, Average Loss: 4.8541
Epoch: 25, Average Loss: 4.8491


0,1
avg_epoch_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,4.8491
epoch,25.0
final_loss,4.8491


[34m[1mwandb[0m: Agent Starting Run: rtnbscfv with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.7689
Epoch: 2, Average Loss: 5.2263
Epoch: 3, Average Loss: 4.9915
Epoch: 4, Average Loss: 4.8173
Epoch: 5, Average Loss: 4.6801
Epoch: 6, Average Loss: 4.5731
Epoch: 7, Average Loss: 4.4916
Epoch: 8, Average Loss: 4.4308
Epoch: 9, Average Loss: 4.3834
Epoch: 10, Average Loss: 4.3456
Epoch: 11, Average Loss: 4.3146
Epoch: 12, Average Loss: 4.2888
Epoch: 13, Average Loss: 4.2660
Epoch: 14, Average Loss: 4.2464
Epoch: 15, Average Loss: 4.2287
Epoch: 16, Average Loss: 4.2130
Epoch: 17, Average Loss: 4.1989
Epoch: 18, Average Loss: 4.1858
Epoch: 19, Average Loss: 4.1742
Epoch: 20, Average Loss: 4.1632
Epoch: 21, Average Loss: 4.1533
Epoch: 22, Average Loss: 4.1442
Epoch: 23, Average Loss: 4.1360
Epoch: 24, Average Loss: 4.1279
Epoch: 25, Average Loss: 4.1206


0,1
avg_epoch_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,4.1206
epoch,25.0
final_loss,4.1206


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: x7mnhjco with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.5969
Epoch: 2, Average Loss: 5.0535
Epoch: 3, Average Loss: 4.8046
Epoch: 4, Average Loss: 4.6075
Epoch: 5, Average Loss: 4.4449
Epoch: 6, Average Loss: 4.3136
Epoch: 7, Average Loss: 4.2132
Epoch: 8, Average Loss: 4.1378
Epoch: 9, Average Loss: 4.0811
Epoch: 10, Average Loss: 4.0376
Epoch: 11, Average Loss: 4.0022
Epoch: 12, Average Loss: 3.9735
Epoch: 13, Average Loss: 3.9485
Epoch: 14, Average Loss: 3.9275
Epoch: 15, Average Loss: 3.9077
Epoch: 16, Average Loss: 3.8906
Epoch: 17, Average Loss: 3.8748
Epoch: 18, Average Loss: 3.8619
Epoch: 19, Average Loss: 3.8499
Epoch: 20, Average Loss: 3.8389
Epoch: 21, Average Loss: 3.8289
Epoch: 22, Average Loss: 3.8194
Epoch: 23, Average Loss: 3.8113
Epoch: 24, Average Loss: 3.8035
Epoch: 25, Average Loss: 3.7961


0,1
avg_epoch_loss,█▆▅▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,3.79607
epoch,25.0
final_loss,3.79607


[34m[1mwandb[0m: Agent Starting Run: uwv7r599 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.4352
Epoch: 2, Average Loss: 4.9013
Epoch: 3, Average Loss: 4.6581
Epoch: 4, Average Loss: 4.4684
Epoch: 5, Average Loss: 4.3132
Epoch: 6, Average Loss: 4.1860
Epoch: 7, Average Loss: 4.0851
Epoch: 8, Average Loss: 4.0078
Epoch: 9, Average Loss: 3.9505
Epoch: 10, Average Loss: 3.9066
Epoch: 11, Average Loss: 3.8717
Epoch: 12, Average Loss: 3.8426
Epoch: 13, Average Loss: 3.8187
Epoch: 14, Average Loss: 3.7975
Epoch: 15, Average Loss: 3.7797
Epoch: 16, Average Loss: 3.7634
Epoch: 17, Average Loss: 3.7499
Epoch: 18, Average Loss: 3.7374
Epoch: 19, Average Loss: 3.7257
Epoch: 20, Average Loss: 3.7163
Epoch: 21, Average Loss: 3.7065
Epoch: 22, Average Loss: 3.6981
Epoch: 23, Average Loss: 3.6904
Epoch: 24, Average Loss: 3.6833
Epoch: 25, Average Loss: 3.6775


0,1
avg_epoch_loss,█▆▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,3.67752
epoch,25.0
final_loss,3.67752


[34m[1mwandb[0m: Agent Starting Run: 0ag8ie2f with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.6164
Epoch: 2, Average Loss: 5.9486
Epoch: 3, Average Loss: 5.7905
Epoch: 4, Average Loss: 5.6853
Epoch: 5, Average Loss: 5.6064
Epoch: 6, Average Loss: 5.5444
Epoch: 7, Average Loss: 5.4944
Epoch: 8, Average Loss: 5.4526
Epoch: 9, Average Loss: 5.4170
Epoch: 10, Average Loss: 5.3860
Epoch: 11, Average Loss: 5.3586
Epoch: 12, Average Loss: 5.3342
Epoch: 13, Average Loss: 5.3122
Epoch: 14, Average Loss: 5.2922
Epoch: 15, Average Loss: 5.2740
Epoch: 16, Average Loss: 5.2575
Epoch: 17, Average Loss: 5.2423
Epoch: 18, Average Loss: 5.2281
Epoch: 19, Average Loss: 5.2151
Epoch: 20, Average Loss: 5.2031
Epoch: 21, Average Loss: 5.1918
Epoch: 22, Average Loss: 5.1814
Epoch: 23, Average Loss: 5.1714
Epoch: 24, Average Loss: 5.1622
Epoch: 25, Average Loss: 5.1535


0,1
avg_epoch_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,5.15352
epoch,25.0
final_loss,5.15352


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5tfwo1la with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.3476
Epoch: 2, Average Loss: 5.7498
Epoch: 3, Average Loss: 5.5788
Epoch: 4, Average Loss: 5.4585
Epoch: 5, Average Loss: 5.3650
Epoch: 6, Average Loss: 5.2881
Epoch: 7, Average Loss: 5.2223
Epoch: 8, Average Loss: 5.1645
Epoch: 9, Average Loss: 5.1128
Epoch: 10, Average Loss: 5.0660
Epoch: 11, Average Loss: 5.0225
Epoch: 12, Average Loss: 4.9830
Epoch: 13, Average Loss: 4.9458
Epoch: 14, Average Loss: 4.9117
Epoch: 15, Average Loss: 4.8797
Epoch: 16, Average Loss: 4.8500
Epoch: 17, Average Loss: 4.8222
Epoch: 18, Average Loss: 4.7967
Epoch: 19, Average Loss: 4.7730
Epoch: 20, Average Loss: 4.7512
Epoch: 21, Average Loss: 4.7307
Epoch: 22, Average Loss: 4.7117
Epoch: 23, Average Loss: 4.6942
Epoch: 24, Average Loss: 4.6776
Epoch: 25, Average Loss: 4.6621


0,1
avg_epoch_loss,█▆▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,4.66206
epoch,25.0
final_loss,4.66206


[34m[1mwandb[0m: Agent Starting Run: g3fqynd3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.1267
Epoch: 2, Average Loss: 5.5549
Epoch: 3, Average Loss: 5.3832
Epoch: 4, Average Loss: 5.2636
Epoch: 5, Average Loss: 5.1696
Epoch: 6, Average Loss: 5.0922
Epoch: 7, Average Loss: 5.0244
Epoch: 8, Average Loss: 4.9640
Epoch: 9, Average Loss: 4.9083
Epoch: 10, Average Loss: 4.8576
Epoch: 11, Average Loss: 4.8096
Epoch: 12, Average Loss: 4.7642
Epoch: 13, Average Loss: 4.7212
Epoch: 14, Average Loss: 4.6804
Epoch: 15, Average Loss: 4.6418
Epoch: 16, Average Loss: 4.6053
Epoch: 17, Average Loss: 4.5710
Epoch: 18, Average Loss: 4.5391
Epoch: 19, Average Loss: 4.5099
Epoch: 20, Average Loss: 4.4819
Epoch: 21, Average Loss: 4.4563
Epoch: 22, Average Loss: 4.4330
Epoch: 23, Average Loss: 4.4117
Epoch: 24, Average Loss: 4.3918
Epoch: 25, Average Loss: 4.3730


0,1
avg_epoch_loss,█▆▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,4.37296
epoch,25.0
final_loss,4.37296


[34m[1mwandb[0m: Agent Starting Run: itknw7xp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9319
Epoch: 2, Average Loss: 5.4075
Epoch: 3, Average Loss: 5.2335
Epoch: 4, Average Loss: 5.1109
Epoch: 5, Average Loss: 5.0138
Epoch: 6, Average Loss: 4.9327
Epoch: 7, Average Loss: 4.8627
Epoch: 8, Average Loss: 4.7999
Epoch: 9, Average Loss: 4.7425
Epoch: 10, Average Loss: 4.6889
Epoch: 11, Average Loss: 4.6384
Epoch: 12, Average Loss: 4.5904
Epoch: 13, Average Loss: 4.5445
Epoch: 14, Average Loss: 4.5005
Epoch: 15, Average Loss: 4.4587
Epoch: 16, Average Loss: 4.4188
Epoch: 17, Average Loss: 4.3815
Epoch: 18, Average Loss: 4.3461
Epoch: 19, Average Loss: 4.3132
Epoch: 20, Average Loss: 4.2828
Epoch: 21, Average Loss: 4.2550
Epoch: 22, Average Loss: 4.2293
Epoch: 23, Average Loss: 4.2055
Epoch: 24, Average Loss: 4.1839
Epoch: 25, Average Loss: 4.1637


0,1
avg_epoch_loss,█▆▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,4.16367
epoch,25.0
final_loss,4.16367


[34m[1mwandb[0m: Agent Starting Run: j5tbn52b with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9323
Epoch: 2, Average Loss: 5.4291
Epoch: 3, Average Loss: 5.2535
Epoch: 4, Average Loss: 5.1477
Epoch: 5, Average Loss: 5.0761
Epoch: 6, Average Loss: 5.0241
Epoch: 7, Average Loss: 4.9847
Epoch: 8, Average Loss: 4.9529
Epoch: 9, Average Loss: 4.9274
Epoch: 10, Average Loss: 4.9060
Epoch: 11, Average Loss: 4.8871
Epoch: 12, Average Loss: 4.8711
Epoch: 13, Average Loss: 4.8561
Epoch: 14, Average Loss: 4.8438
Epoch: 15, Average Loss: 4.8317
Epoch: 16, Average Loss: 4.8208
Epoch: 17, Average Loss: 4.8103
Epoch: 18, Average Loss: 4.8011
Epoch: 19, Average Loss: 4.7925
Epoch: 20, Average Loss: 4.7844
Epoch: 21, Average Loss: 4.7768
Epoch: 22, Average Loss: 4.7698
Epoch: 23, Average Loss: 4.7625
Epoch: 24, Average Loss: 4.7566
Epoch: 25, Average Loss: 4.7507


0,1
avg_epoch_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,4.75075
epoch,25.0
final_loss,4.75075


[34m[1mwandb[0m: Agent Starting Run: 7sj2brso with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.7115
Epoch: 2, Average Loss: 5.1380
Epoch: 3, Average Loss: 4.8642
Epoch: 4, Average Loss: 4.6569
Epoch: 5, Average Loss: 4.4961
Epoch: 6, Average Loss: 4.3737
Epoch: 7, Average Loss: 4.2830
Epoch: 8, Average Loss: 4.2149
Epoch: 9, Average Loss: 4.1609
Epoch: 10, Average Loss: 4.1173
Epoch: 11, Average Loss: 4.0811
Epoch: 12, Average Loss: 4.0504
Epoch: 13, Average Loss: 4.0234
Epoch: 14, Average Loss: 3.9996
Epoch: 15, Average Loss: 3.9786
Epoch: 16, Average Loss: 3.9595
Epoch: 17, Average Loss: 3.9427
Epoch: 18, Average Loss: 3.9275
Epoch: 19, Average Loss: 3.9131
Epoch: 20, Average Loss: 3.9003
Epoch: 21, Average Loss: 3.8888
Epoch: 22, Average Loss: 3.8775
Epoch: 23, Average Loss: 3.8672
Epoch: 24, Average Loss: 3.8579
Epoch: 25, Average Loss: 3.8489


0,1
avg_epoch_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,3.84891
epoch,25.0
final_loss,3.84891


[34m[1mwandb[0m: Agent Starting Run: tr8ug4qz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.5421
Epoch: 2, Average Loss: 4.9600
Epoch: 3, Average Loss: 4.6697
Epoch: 4, Average Loss: 4.4308
Epoch: 5, Average Loss: 4.2324
Epoch: 6, Average Loss: 4.0739
Epoch: 7, Average Loss: 3.9559
Epoch: 8, Average Loss: 3.8691
Epoch: 9, Average Loss: 3.8030
Epoch: 10, Average Loss: 3.7498
Epoch: 11, Average Loss: 3.7065
Epoch: 12, Average Loss: 3.6699
Epoch: 13, Average Loss: 3.6383
Epoch: 14, Average Loss: 3.6110
Epoch: 15, Average Loss: 3.5868
Epoch: 16, Average Loss: 3.5654
Epoch: 17, Average Loss: 3.5460
Epoch: 18, Average Loss: 3.5291
Epoch: 19, Average Loss: 3.5131
Epoch: 20, Average Loss: 3.4990
Epoch: 21, Average Loss: 3.4857
Epoch: 22, Average Loss: 3.4742
Epoch: 23, Average Loss: 3.4624
Epoch: 24, Average Loss: 3.4526
Epoch: 25, Average Loss: 3.4431


0,1
avg_epoch_loss,█▆▅▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,3.44315
epoch,25.0
final_loss,3.44315


[34m[1mwandb[0m: Agent Starting Run: 0nn0rihz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.3885
Epoch: 2, Average Loss: 4.8289
Epoch: 3, Average Loss: 4.5470
Epoch: 4, Average Loss: 4.3113
Epoch: 5, Average Loss: 4.1087
Epoch: 6, Average Loss: 3.9415
Epoch: 7, Average Loss: 3.8131
Epoch: 8, Average Loss: 3.7190
Epoch: 9, Average Loss: 3.6486
Epoch: 10, Average Loss: 3.5935
Epoch: 11, Average Loss: 3.5490
Epoch: 12, Average Loss: 3.5118
Epoch: 13, Average Loss: 3.4798
Epoch: 14, Average Loss: 3.4532
Epoch: 15, Average Loss: 3.4283
Epoch: 16, Average Loss: 3.4058
Epoch: 17, Average Loss: 3.3875
Epoch: 18, Average Loss: 3.3703
Epoch: 19, Average Loss: 3.3548
Epoch: 20, Average Loss: 3.3400
Epoch: 21, Average Loss: 3.3271
Epoch: 22, Average Loss: 3.3146
Epoch: 23, Average Loss: 3.3042
Epoch: 24, Average Loss: 3.2936
Epoch: 25, Average Loss: 3.2847


0,1
avg_epoch_loss,█▆▅▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,3.28474
epoch,25.0
final_loss,3.28474


[34m[1mwandb[0m: Agent Starting Run: 8j0ymcch with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.4210
Epoch: 2, Average Loss: 5.8019
Epoch: 3, Average Loss: 5.6461
Epoch: 4, Average Loss: 5.5434
Epoch: 5, Average Loss: 5.4687
Epoch: 6, Average Loss: 5.4113
Epoch: 7, Average Loss: 5.3647
Epoch: 8, Average Loss: 5.3254
Epoch: 9, Average Loss: 5.2919
Epoch: 10, Average Loss: 5.2627
Epoch: 11, Average Loss: 5.2371
Epoch: 12, Average Loss: 5.2144
Epoch: 13, Average Loss: 5.1939
Epoch: 14, Average Loss: 5.1757
Epoch: 15, Average Loss: 5.1594
Epoch: 16, Average Loss: 5.1443
Epoch: 17, Average Loss: 5.1304
Epoch: 18, Average Loss: 5.1177
Epoch: 19, Average Loss: 5.1059
Epoch: 20, Average Loss: 5.0949
Epoch: 21, Average Loss: 5.0846
Epoch: 22, Average Loss: 5.0752
Epoch: 23, Average Loss: 5.0660
Epoch: 24, Average Loss: 5.0575
Epoch: 25, Average Loss: 5.0494


0,1
avg_epoch_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,5.04936
epoch,25.0
final_loss,5.04936


[34m[1mwandb[0m: Agent Starting Run: wa9yo3nu with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 6.1922
Epoch: 2, Average Loss: 5.6186
Epoch: 3, Average Loss: 5.4342
Epoch: 4, Average Loss: 5.3040
Epoch: 5, Average Loss: 5.2007
Epoch: 6, Average Loss: 5.1138
Epoch: 7, Average Loss: 5.0380
Epoch: 8, Average Loss: 4.9704
Epoch: 9, Average Loss: 4.9094
Epoch: 10, Average Loss: 4.8540
Epoch: 11, Average Loss: 4.8044
Epoch: 12, Average Loss: 4.7600
Epoch: 13, Average Loss: 4.7200
Epoch: 14, Average Loss: 4.6842
Epoch: 15, Average Loss: 4.6529
Epoch: 16, Average Loss: 4.6240
Epoch: 17, Average Loss: 4.5984
Epoch: 18, Average Loss: 4.5749


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Epoch: 21, Average Loss: 4.5147
Epoch: 22, Average Loss: 4.4973
Epoch: 23, Average Loss: 4.4806
Epoch: 24, Average Loss: 4.4649
Epoch: 25, Average Loss: 4.4501


0,1
avg_epoch_loss,█▆▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,4.45012
epoch,25.0
final_loss,4.45012


[34m[1mwandb[0m: Agent Starting Run: rxpmdh6h with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.9943
Epoch: 2, Average Loss: 5.4412
Epoch: 3, Average Loss: 5.2535
Epoch: 4, Average Loss: 5.1203
Epoch: 5, Average Loss: 5.0115
Epoch: 6, Average Loss: 4.9177
Epoch: 7, Average Loss: 4.8328
Epoch: 8, Average Loss: 4.7548
Epoch: 9, Average Loss: 4.6813
Epoch: 10, Average Loss: 4.6120
Epoch: 11, Average Loss: 4.5481
Epoch: 12, Average Loss: 4.4893
Epoch: 13, Average Loss: 4.4359
Epoch: 14, Average Loss: 4.3878
Epoch: 15, Average Loss: 4.3460
Epoch: 16, Average Loss: 4.3087
Epoch: 17, Average Loss: 4.2755
Epoch: 18, Average Loss: 4.2457
Epoch: 19, Average Loss: 4.2187
Epoch: 20, Average Loss: 4.1934
Epoch: 21, Average Loss: 4.1706
Epoch: 22, Average Loss: 4.1487
Epoch: 23, Average Loss: 4.1271
Epoch: 24, Average Loss: 4.1076
Epoch: 25, Average Loss: 4.0886


0,1
avg_epoch_loss,█▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,4.08861
epoch,25.0
final_loss,4.08861


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: odsew8ph with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.8149
Epoch: 2, Average Loss: 5.2893
Epoch: 3, Average Loss: 5.0969
Epoch: 4, Average Loss: 4.9599
Epoch: 5, Average Loss: 4.8488
Epoch: 6, Average Loss: 4.7518
Epoch: 7, Average Loss: 4.6633
Epoch: 8, Average Loss: 4.5797
Epoch: 9, Average Loss: 4.5000
Epoch: 10, Average Loss: 4.4238
Epoch: 11, Average Loss: 4.3514
Epoch: 12, Average Loss: 4.2841
Epoch: 13, Average Loss: 4.2230
Epoch: 14, Average Loss: 4.1685
Epoch: 15, Average Loss: 4.1209
Epoch: 16, Average Loss: 4.0796
Epoch: 17, Average Loss: 4.0430
Epoch: 18, Average Loss: 4.0105
Epoch: 19, Average Loss: 3.9806
Epoch: 20, Average Loss: 3.9530
Epoch: 21, Average Loss: 3.9269
Epoch: 22, Average Loss: 3.9022
Epoch: 23, Average Loss: 3.8791
Epoch: 24, Average Loss: 3.8567
Epoch: 25, Average Loss: 3.8353


0,1
avg_epoch_loss,█▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
final_loss,▁

0,1
avg_epoch_loss,3.83533
epoch,25.0
final_loss,3.83533


[34m[1mwandb[0m: Agent Starting Run: z64ai1nz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 5.8947
Epoch: 2, Average Loss: 5.3807
Epoch: 3, Average Loss: 5.1989
Epoch: 4, Average Loss: 5.0933
Epoch: 5, Average Loss: 5.0228
Epoch: 6, Average Loss: 4.9724
Epoch: 7, Average Loss: 4.9329
Epoch: 8, Average Loss: 4.9016
Epoch: 9, Average Loss: 4.8754
Epoch: 10, Average Loss: 4.8531
Epoch: 11, Average Loss: 4.8337
Epoch: 12, Average Loss: 4.8170
Epoch: 13, Average Loss: 4.8020
Epoch: 14, Average Loss: 4.7886
Epoch: 15, Average Loss: 4.7765
Epoch: 16, Average Loss: 4.7653
Epoch: 17, Average Loss: 4.7550
Epoch: 18, Average Loss: 4.7455
Epoch: 19, Average Loss: 4.7367
Epoch: 20, Average Loss: 4.7283
Epoch: 21, Average Loss: 4.7208
