In [1]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from tqdm.autonotebook import tqdm
import neurallm_utils as nutils
import wandb
import os

  from tqdm.autonotebook import tqdm
[nltk_data] Downloading package punkt to
[nltk_data]     /home/mezallamosas.j/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/mezallamosas.j/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


## Functions

In [2]:
# -------------------------------
# Data processing functions
# -------------------------------

def encode_tokens(data: list[list[str]], embedder: torch.nn.Embedding) -> list[list[int]]:
    """
    Replaces each natural-language token with its embedder index.

    e.g. [["<s>", "once", "upon", "a", "time"],
          ["there", "was", "a", ]]
        ->
        [[0, 59, 203, 1, 126],
         [26, 15, 1]]
        (The indices are arbitrary, as they are dependent on your embedder)

    Params:
        data: The corpus
        embedder: An embedder trained on the given data.
    """

    finalList = []
    for list in data:
        currList = []
        for word in list:
            index = embedder.token_to_index[word]
            currList.append(index)
        finalList.append(currList)

    return finalList


def create_ngrams(tokens: list, n: int) -> list:
    """Creates n-grams for the given token sequence.
    Args:
      tokens (list): a list of tokens as strings
      n (int): the length of n-grams to create

    Returns:
      list: list of tuples of strings, each tuple being one of the individual n-grams
    """
    # STUDENTS IMPLEMENT
    res = []
    for i in range(0, len(tokens)-n):
        #append n gram + yth value
        res.append(tokens[i:i+n+1])
    return res

def generate_ngram_training_samples(encoded: list[list[int]], ngram: int) -> list:
    """
    Takes the **encoded** data (list of lists of ints) and 
    generates the training samples out of it.
    
    Parameters:
        up to you, we've put in what we used
        but you can add/remove as needed
    return: 
    list of lists in the format [[x1, x2, ... , x(n-1), y], ...]
    """

    #1 2 3 4
    #[1,2, y=3]
    #[2,3, y=4]

    # if you'd like to use tqdm, you can use it like this:
    # for i in tqdm(range(len(encoded))):
    final_list = []
    for list in encoded:
        currList = create_ngrams(list, ngram-1)
        final_list.extend(currList)
    return final_list

def split_sequences(training_sample):
    x_sample = []
    y_sample = []
    for line in training_sample:
        x_sample.append(line[0:-1])
        y_sample.append(line[-1])
    return x_sample, y_sample

def create_dataloaders(X: list, y: list, num_sequences_per_batch: int, 
                       test_pct: float = 0.1, shuffle: bool = True) -> tuple[torch.utils.data.DataLoader]:
    """
    Convert our data into a PyTorch DataLoader.    
    A DataLoader is an object that splits the dataset into batches for training.
    PyTorch docs: 
        https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
        https://pytorch.org/docs/stable/data.html

    Note that you have to first convert your data into a PyTorch DataSet.
    You DO NOT have to implement this yourself, instead you should use a TensorDataset.

    You are in charge of splitting the data into train and test sets based on the given
    test_pct. There are several functions you can use to acheive this!

    The shuffle parameter refers to shuffling the data *in the loader* (look at the docs),
    not whether or not to shuffle the data before splitting it into train and test sets.
    (don't shuffle before splitting)

    Params:
        X: A list of input sequences
        Y: A list of labels
        num_sequences_per_batch: Batch size
        test_pct: The proportion of samples to use in the test set.
        shuffle: INSTRUCTORS ONLY

    Returns:
        One DataLoader for training, and one for testing.
    """
    
    dataSet = TensorDataset(torch.tensor(X), torch.tensor(y))
    test_size = int(len(dataSet)*test_pct)
    train_size = len(dataSet) - test_size
    train_data, test_data = torch.utils.data.random_split(dataSet, [train_size, test_size])
    dataloader_train = DataLoader(train_data, batch_size=num_sequences_per_batch, shuffle=shuffle)
    dataloader_test = DataLoader(test_data, batch_size=num_sequences_per_batch, shuffle=shuffle)
    return dataloader_train, dataloader_test

# -------------------------------
# FFNN Model and Training Functions
# -------------------------------

class FFNN(nn.Module):
    """
    A Feed-Forward Neural Network for language modeling.
    """
    def __init__(self, vocab_size: int, ngram: int, embedding_layer: torch.nn.Embedding, hidden_units=128, device: str = "cpu"):
        """
        Initialize a new untrained model.
        
        Params:
            vocab_size: Number of words in the vocabulary.
            ngram: The N value (window size) for training.
            embedding_layer: Pre-trained embedding layer.
            hidden_units: Number of hidden units in the hidden layer.
        """
        super().__init__()
        self.vocab_size = vocab_size
        self.ngram = ngram
        self.embedding_layer = embedding_layer
        self.hidden_units = hidden_units
        self.device = device
        
        # Get embedding dimension from the provided embedder.
        embedding_size = embedding_layer.embedding_dim
        
        # Define the network: flatten embedded n-gram tokens, then two linear layers with ReLU.
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(in_features=(ngram-1) * embedding_size, out_features=hidden_units, bias=True),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=vocab_size, bias=True)
        )
        
        # Move class to its own device
        self.to(device)
        
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        """
        Forward pass through the network.
        
        Params:
            X: Tensor of input indices with shape (batch_size, ngram-1)
        
        Returns:
            Logits of shape (batch_size, vocab_size).
        """
        embedded = self.embedding_layer(X)
        flat_embedded = self.flatten(embedded)
        logits = self.linear_relu_stack(flat_embedded)
        return logits

def train_one_epoch(dataloader, model, optimizer, loss_fn):
    epoch_loss = 0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(model.device), labels.to(model.device)
        optimizer.zero_grad()                  # Zero gradients for this batch.
        outputs = model(inputs)                # Forward pass.
        batch_loss = loss_fn(outputs, labels)  # Compute loss.
        batch_loss.backward()                  # Backpropagation.
        optimizer.step()                       # Update weights.
        epoch_loss += batch_loss.item()
    return epoch_loss

def train(dataloader, model, epochs: int = 1, lr: float = 0.001) -> None:
    """
    Train the model.
    
    Params:
        dataloader: Training data loader.
        model: The model to train.
        epochs: Number of epochs.
        lr: Learning rate.
    """
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    n_batches = len(dataloader)
    
    model.train()  # Set the model to training mode.
    
    for epoch in tqdm(range(epochs), desc="Training Epochs"):
        epoch_loss = train_one_epoch(dataloader, model, optimizer, loss_fn)
        avg_epoch_loss = epoch_loss / n_batches
        print(f"Epoch: {epoch+1}, Average Loss: {avg_epoch_loss:.4f}")
        # Log metrics to wandb
        wandb.log({"epoch": epoch+1, "avg_epoch_loss": avg_epoch_loss})
    return avg_epoch_loss

def full_pipeline(data, word_embeddings_filename: str, 
                  batch_size: int,
                  ngram: int,
                  hidden_units: int = 128,
                  epochs: int = 1,
                  lr: float = 0.001,
                  test_pct: float = 0.1, device: str = "cpu") -> FFNN:
    """
    Run the full training pipeline from loading embeddings to model training.
    
    Params:
        data: Raw data as a list of lists of tokens (here, integer indices).
        word_embeddings_filename: Filename for the pre-trained embeddings.
        batch_size: Batch size for training.
        ngram: N-gram size.
        hidden_units: Number of hidden units.
        epochs: Number of epochs.
        lr: Learning rate.
        test_pct: Percentage of data for testing (not used in training).
    
    Returns:
        The trained FFNN model.
    """
    # Load embeddings and create an embedder.
    token_embeddings = nutils.load_word2vec(word_embeddings_filename)
    embedder = nutils.create_embedder(token_embeddings)
    
    # Preprocess data.
    encoded_tokens = encode_tokens(data, embedder)
    vocab_size = embedder.num_embeddings
    training_sample = generate_ngram_training_samples(encoded_tokens, ngram)
    x_sample, y_sample = split_sequences(training_sample)
    dataloader_train, _ = create_dataloaders(x_sample, y_sample, batch_size, test_pct)
    
    # Initialize the model.
    model = FFNN(vocab_size=vocab_size, ngram=ngram, embedding_layer=embedder, hidden_units=hidden_units, device=device)

    # Train the model.
    final_loss = train(dataloader=dataloader_train, model=model, epochs=epochs, lr=lr)
    
    return model, final_loss

# -------------------------------
# Prediction and generation functions
# -------------------------------

# Create a function that predicts the next token in a sequence.
def predict(model, input_tokens) -> str:
    """
    Get the model's next word prediction for an input.
    This is where you'll use the softmax function!
    Assume that the input tokens do not contain any unknown tokens.

    Params:
        model: Your trained model
        input_tokens: A list of natural-language tokens. Must be length N-1.

    Returns:
        The predicted token (not the predicted index!)
    """
    # YOUR CODE HERE
	# Encode tokens
    encoded_tokens = [model.embedding_layer.token_to_index[token] for token in input_tokens]
    
	# Trasform to tensor
    encoded_tokens = torch.tensor([encoded_tokens]).to(model.device) # Dim [1, ngram-1]
    
    # Setting model to evaluation mode turns off Dropout and BatchNorm making the predictions deterministic
    model.eval()  # Set the model to evaluation mode if you haven't already
    
    with torch.no_grad(): # Speeds up inference and reduces memory usage by not having to calcualte gradients
        logits = model(encoded_tokens) # Forward pass on the model
        probability = nn.functional.softmax(logits, dim=1) # Normalize z scores to probability
        predicted_idx = torch.multinomial(probability, num_samples=1).item()

        #predicted_idx = probability.argmax(dim=1).item() # Retrieve int value
		
	# Transform index to natural-language token
    predicted_token = model.embedding_layer.index_to_token[predicted_idx] 
    
    return predicted_token

from typing import List
# Generate a sequence from the model until you get an end of sentence token.
def generate(model, seed: List[str], max_tokens: int = None) -> List[str]:
    """
    Use the trained model to generate a sentence.
    This should be somewhat similar to generation for HW2...
    Make sure to use your predict function!

    Params:
        model: Your trained model
        seed: [w_1, w_2, ..., w_(n-1)].
        max_tokens: The maximum number of tokens to generate. When None, should gener
            generate until the end of sentence token is reached.

    Return:
        A list of generated tokens.
    """ 
    n_tokens = 0 # Count tokens that have been generated
    tokens = seed.copy() # Copy of initial seed
    end_token = "<\s>"
    
    while True:
        for_prediction = seed[-(model.ngram-1):]
        predicted_token = predict(model, for_prediction)
        if predicted_token == end_token:
        	break
        tokens.append(predicted_token)
        n_tokens += 1
        if max_tokens is not None and n_tokens >= max_tokens:
            break
        
    return tokens

def generate_sentences(model, seed: List[str],  n_sentences: int, max_tokens: int = None) -> List[str]:
    return [generate(model, seed, max_tokens) for i in range(n_sentences)]

# you might want to define some functions to help you format the text nicely
# and/or generate multiple sequences

def format_sentence(tokens_list: List[List[str]], by_char = False) -> str:
  """Removes <s> at the start of the sentence and </s> at ehe end. Joins the list of tokens into a string and capitalizes it.
  Args:
    tokens (list(list)): the list of tokens list to be formatted into a sentence

  Returns:
    string: formatted sentence as a string
  
  """
  text = "" # Initializing final sentence
  for tokens in tokens_list: # Parsing through each individual sentence
    while tokens[0] == '<s>': # Removes all <s> at the beggining even if there are several for ngram > 2 models
      tokens.pop(0)
    if tokens[-1] == '</s>': # Removes the one </s> at the end of the sentence
      tokens.pop(-1)
    if by_char:
      sentence = "".join(tokens) # Converts list of tokens into a string
      sentence = sentence.capitalize() # Capitalizes the first letter of each sentence
    else:
      sentence = " ".join(tokens) # Converts list of tokens into a string
      sentence = sentence.capitalize() # Capitalizes the first letter of each sentence
    text += sentence + ".\n" # Adds a period and space separator between sentences
  return text.strip(" ") # Removes the last space in the last sentence


## Main Pipeline

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
def main():
	# Initialize a wandb run (hyperparameters come from wandb.config).
	wandb.init(
		entity = "northeastern-university",
		project = "neural-language-model"
		)
	
	config = wandb.config

	EMBEDDINGS_SIZE = config.embeddings_size
	NGRAM = config.ngram
	NUM_SEQUENCES_PER_BATCH = config.batch_size
	HIDDEN_UNITS = config.hidden_units
	EPOCHS = config.epochs
	LR = config.lr
	TEXT_TYPE = "char"

	TRAIN_FILE = 'spooky_author_train.csv' # The file to train your language model on

	EMBEDDING_SAVE_FILE_WORD = f"embeddings/spooky_embedding_word_{EMBEDDINGS_SIZE}.model" # The file to save your word embeddings to
	EMBEDDING_SAVE_FILE_CHAR = f"embeddings/spooky_embedding_char_{EMBEDDINGS_SIZE}.model" # The file to save your char embeddings to
	MODEL_FILE_WORD = f'models/spooky_author_model_word_{EMBEDDINGS_SIZE}_{NUM_SEQUENCES_PER_BATCH}_{NGRAM}_{HIDDEN_UNITS}_{EPOCHS}_{LR}.pt' # The file to save your trained word-based neural LM to
	MODEL_FILE_CHAR = f'models/spooky_author_model_char_{EMBEDDINGS_SIZE}_{NUM_SEQUENCES_PER_BATCH}_{NGRAM}_{HIDDEN_UNITS}_{EPOCHS}_{LR}.pt' # The file to save your trained char-based neural LM to

	if TEXT_TYPE == "word":
		data = nutils.read_file_spooky(TRAIN_FILE, NGRAM, by_character=False)
		word_embeddings_filename = EMBEDDING_SAVE_FILE_WORD
		if not os.path.exists(word_embeddings_filename):
			trained_word = nutils.train_word2vec(data, EMBEDDINGS_SIZE)
			nutils.save_word2vec(trained_word, EMBEDDING_SAVE_FILE_WORD)

	elif TEXT_TYPE == "char":
		data = nutils.read_file_spooky(TRAIN_FILE, NGRAM, by_character=True)
		word_embeddings_filename = EMBEDDING_SAVE_FILE_CHAR
		if not os.path.exists(word_embeddings_filename):
			trained_char = nutils.train_word2vec(data, EMBEDDINGS_SIZE)
			nutils.save_word2vec(trained_char, EMBEDDING_SAVE_FILE_CHAR)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	model, final_loss = full_pipeline(
		data=data,
		word_embeddings_filename = word_embeddings_filename,
		batch_size=NUM_SEQUENCES_PER_BATCH,
		ngram=NGRAM,
		hidden_units=config.hidden_units,
		epochs=config.epochs,
		lr=config.lr,
		test_pct=config.test_pct,
		device = device
	)

	if TEXT_TYPE == "word":
		torch.save(model.state_dict(), MODEL_FILE_WORD)

	elif TEXT_TYPE == "char":
		torch.save(model.state_dict(), MODEL_FILE_CHAR)

	wandb.log({"final_loss": final_loss})
	wandb.finish()
	

In [5]:
# Define the sweep configuration
sweep_config = {
    "name": "char_corrected_hyperparameter_sweep",
	"method": "grid",  # Options: "grid", "random", "bayes"
	"metric": {
		"name": "avg_epoch_loss",
		"goal": "minimize"  # We want to minimize the training loss.
	},
	"parameters": {
		"embeddings_size": {"values": [50, 100, 200]},
		"batch_size": {"values": [128, 256, 512]},
		"ngram": {"values": [2, 3, 4, 5]},
		"hidden_units": {"values": [128, 256, 512]},
		"epochs": {"values": [5, 10, 25]},
		"lr": {"values": [0.001, 0.0001]},
		"test_pct": {"value": 0.1} # Fixed value.
}
}

In [6]:
# Register the sweep with wandb.
sweep_id = wandb.sweep(sweep_config,
                       project="neural-language-model")
print("Sweep ID:", sweep_id)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: g0yl702x
Sweep URL: https://wandb.ai/biofx/neural-language-model/sweeps/g0yl702x
Sweep ID: g0yl702x


In [None]:
# Set count to the number of runs you wish to execute; here, 5 runs are used as an example.
wandb.agent(sweep_id, function=main, count=500)

[34m[1mwandb[0m: Agent Starting Run: zapkdhya with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmezallamosasj[0m ([33mbiofx[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3884
Epoch: 2, Average Loss: 2.3659
Epoch: 3, Average Loss: 2.3645
Epoch: 4, Average Loss: 2.3637
Epoch: 5, Average Loss: 2.3632


0,1
avg_epoch_loss,█▂▁▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,2.36317
epoch,5.0
final_loss,2.36317


[34m[1mwandb[0m: Agent Starting Run: wdsezvwc with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.0981
Epoch: 2, Average Loss: 1.9883
Epoch: 3, Average Loss: 1.9714
Epoch: 4, Average Loss: 1.9641
Epoch: 5, Average Loss: 1.9598


0,1
avg_epoch_loss,█▂▂▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.95977
epoch,5.0
final_loss,1.95977


[34m[1mwandb[0m: Agent Starting Run: 0hryghn6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.9852
Epoch: 2, Average Loss: 1.8198
Epoch: 3, Average Loss: 1.7823
Epoch: 4, Average Loss: 1.7622
Epoch: 5, Average Loss: 1.7499


0,1
avg_epoch_loss,█▃▂▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.74987
epoch,5.0
final_loss,1.74987


[34m[1mwandb[0m: Agent Starting Run: skl8ddjy with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.9519
Epoch: 2, Average Loss: 1.7647
Epoch: 3, Average Loss: 1.7229
Epoch: 4, Average Loss: 1.7019
Epoch: 5, Average Loss: 1.6883


0,1
avg_epoch_loss,█▃▂▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.68834
epoch,5.0
final_loss,1.68834


[34m[1mwandb[0m: Agent Starting Run: brxnjqcz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.5050
Epoch: 2, Average Loss: 2.3895
Epoch: 3, Average Loss: 2.3784
Epoch: 4, Average Loss: 2.3727
Epoch: 5, Average Loss: 2.3692


0,1
avg_epoch_loss,█▂▁▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,2.36919
epoch,5.0
final_loss,2.36919


[34m[1mwandb[0m: Agent Starting Run: nt42q43z with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3757
Epoch: 2, Average Loss: 2.1945
Epoch: 3, Average Loss: 2.1372
Epoch: 4, Average Loss: 2.1029
Epoch: 5, Average Loss: 2.0784


0,1
avg_epoch_loss,█▄▂▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,2.07844
epoch,5.0
final_loss,2.07844


[34m[1mwandb[0m: Agent Starting Run: uzzkq0ct with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3323
Epoch: 2, Average Loss: 2.1268
Epoch: 3, Average Loss: 2.0556
Epoch: 4, Average Loss: 2.0098
Epoch: 5, Average Loss: 1.9767


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.97673
epoch,5.0
final_loss,1.97673


[34m[1mwandb[0m: Agent Starting Run: oyny2z5c with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3061
Epoch: 2, Average Loss: 2.0897
Epoch: 3, Average Loss: 2.0101
Epoch: 4, Average Loss: 1.9568
Epoch: 5, Average Loss: 1.9198


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.91983
epoch,5.0
final_loss,1.91983


[34m[1mwandb[0m: Agent Starting Run: tax6p3it with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3824
Epoch: 2, Average Loss: 2.3652
Epoch: 3, Average Loss: 2.3637
Epoch: 4, Average Loss: 2.3629
Epoch: 5, Average Loss: 2.3625


0,1
avg_epoch_loss,█▂▁▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,2.36247
epoch,5.0
final_loss,2.36247


[34m[1mwandb[0m: Agent Starting Run: a74o8b5z with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.0492
Epoch: 2, Average Loss: 1.9673
Epoch: 3, Average Loss: 1.9564
Epoch: 4, Average Loss: 1.9515
Epoch: 5, Average Loss: 1.9487


0,1
avg_epoch_loss,█▂▂▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.94868
epoch,5.0
final_loss,1.94868


[34m[1mwandb[0m: Agent Starting Run: nopkfw8o with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.9070
Epoch: 2, Average Loss: 1.7449
Epoch: 3, Average Loss: 1.7091
Epoch: 4, Average Loss: 1.6914
Epoch: 5, Average Loss: 1.6806


0,1
avg_epoch_loss,█▃▂▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.68058
epoch,5.0
final_loss,1.68058


[34m[1mwandb[0m: Agent Starting Run: 807u35ar with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.8671
Epoch: 2, Average Loss: 1.6769
Epoch: 3, Average Loss: 1.6319
Epoch: 4, Average Loss: 1.6089
Epoch: 5, Average Loss: 1.5942


0,1
avg_epoch_loss,█▃▂▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.59417
epoch,5.0
final_loss,1.59417


[34m[1mwandb[0m: Agent Starting Run: e9q6e8j5 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.4691
Epoch: 2, Average Loss: 2.3802
Epoch: 3, Average Loss: 2.3713
Epoch: 4, Average Loss: 2.3671
Epoch: 5, Average Loss: 2.3649


0,1
avg_epoch_loss,█▂▁▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,2.36486
epoch,5.0
final_loss,2.36486


[34m[1mwandb[0m: Agent Starting Run: a5n91kyj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3126
Epoch: 2, Average Loss: 2.1340
Epoch: 3, Average Loss: 2.0814
Epoch: 4, Average Loss: 2.0500
Epoch: 5, Average Loss: 2.0297


0,1
avg_epoch_loss,█▄▂▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,2.02969
epoch,5.0
final_loss,2.02969


[34m[1mwandb[0m: Agent Starting Run: c06ow38s with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.2722
Epoch: 2, Average Loss: 2.0659
Epoch: 3, Average Loss: 1.9837
Epoch: 4, Average Loss: 1.9324
Epoch: 5, Average Loss: 1.8970


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.89702
epoch,5.0
final_loss,1.89702


[34m[1mwandb[0m: Agent Starting Run: e88s2zk6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.2422
Epoch: 2, Average Loss: 2.0271
Epoch: 3, Average Loss: 1.9441
Epoch: 4, Average Loss: 1.8869
Epoch: 5, Average Loss: 1.8448


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.84484
epoch,5.0
final_loss,1.84484


[34m[1mwandb[0m: Agent Starting Run: 355bzrzy with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3779
Epoch: 2, Average Loss: 2.3647
Epoch: 3, Average Loss: 2.3633
Epoch: 4, Average Loss: 2.3626
Epoch: 5, Average Loss: 2.3624


0,1
avg_epoch_loss,█▂▁▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,2.36236
epoch,5.0
final_loss,2.36236


[34m[1mwandb[0m: Agent Starting Run: 3uorhs58 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.0234
Epoch: 2, Average Loss: 1.9584
Epoch: 3, Average Loss: 1.9501
Epoch: 4, Average Loss: 1.9459
Epoch: 5, Average Loss: 1.9432


0,1
avg_epoch_loss,█▂▂▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.94315
epoch,5.0
final_loss,1.94315


[34m[1mwandb[0m: Agent Starting Run: ynwdogqk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.8470
Epoch: 2, Average Loss: 1.6985
Epoch: 3, Average Loss: 1.6682
Epoch: 4, Average Loss: 1.6531
Epoch: 5, Average Loss: 1.6438


0,1
avg_epoch_loss,█▃▂▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.64375
epoch,5.0
final_loss,1.64375


[34m[1mwandb[0m: Agent Starting Run: dvsrf1h9 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.7867
Epoch: 2, Average Loss: 1.5991
Epoch: 3, Average Loss: 1.5539
Epoch: 4, Average Loss: 1.5299
Epoch: 5, Average Loss: 1.5146


0,1
avg_epoch_loss,█▃▂▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.51461
epoch,5.0
final_loss,1.51461


[34m[1mwandb[0m: Agent Starting Run: xea1yfop with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.4381
Epoch: 2, Average Loss: 2.3723
Epoch: 3, Average Loss: 2.3663
Epoch: 4, Average Loss: 2.3642
Epoch: 5, Average Loss: 2.3632


0,1
avg_epoch_loss,█▂▁▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,2.3632
epoch,5.0
final_loss,2.3632


[34m[1mwandb[0m: Agent Starting Run: sh2va8r9 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.2592
Epoch: 2, Average Loss: 2.0909
Epoch: 3, Average Loss: 2.0393
Epoch: 4, Average Loss: 2.0123
Epoch: 5, Average Loss: 1.9959


0,1
avg_epoch_loss,█▄▂▁▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.99591
epoch,5.0
final_loss,1.99591


[34m[1mwandb[0m: Agent Starting Run: s4btj45o with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.2075
Epoch: 2, Average Loss: 1.9907
Epoch: 3, Average Loss: 1.9080
Epoch: 4, Average Loss: 1.8581
Epoch: 5, Average Loss: 1.8249


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.82486
epoch,5.0
final_loss,1.82486


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: me3iys2i with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.1861
Epoch: 2, Average Loss: 1.9631
Epoch: 3, Average Loss: 1.8684
Epoch: 4, Average Loss: 1.8075
Epoch: 5, Average Loss: 1.7648


0,1
avg_epoch_loss,█▄▃▂▁
epoch,▁▃▅▆█
final_loss,▁

0,1
avg_epoch_loss,1.76482
epoch,5.0
final_loss,1.76482


[34m[1mwandb[0m: Agent Starting Run: sqf6skh3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3886
Epoch: 2, Average Loss: 2.3655
Epoch: 3, Average Loss: 2.3640
Epoch: 4, Average Loss: 2.3632
Epoch: 5, Average Loss: 2.3627
Epoch: 6, Average Loss: 2.3622
Epoch: 7, Average Loss: 2.3620
Epoch: 8, Average Loss: 2.3618
Epoch: 9, Average Loss: 2.3616
Epoch: 10, Average Loss: 2.3615


0,1
avg_epoch_loss,█▂▂▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,2.36151
epoch,10.0
final_loss,2.36151


[34m[1mwandb[0m: Agent Starting Run: c4y9na8q with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.0925
Epoch: 2, Average Loss: 1.9891
Epoch: 3, Average Loss: 1.9723
Epoch: 4, Average Loss: 1.9650
Epoch: 5, Average Loss: 1.9605
Epoch: 6, Average Loss: 1.9575
Epoch: 7, Average Loss: 1.9553
Epoch: 8, Average Loss: 1.9536
Epoch: 9, Average Loss: 1.9522
Epoch: 10, Average Loss: 1.9513


0,1
avg_epoch_loss,█▃▂▂▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.95126
epoch,10.0
final_loss,1.95126


[34m[1mwandb[0m: Agent Starting Run: fseh41lq with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.9791
Epoch: 2, Average Loss: 1.8140
Epoch: 3, Average Loss: 1.7763
Epoch: 4, Average Loss: 1.7567
Epoch: 5, Average Loss: 1.7443
Epoch: 6, Average Loss: 1.7359
Epoch: 7, Average Loss: 1.7293
Epoch: 8, Average Loss: 1.7241
Epoch: 9, Average Loss: 1.7199
Epoch: 10, Average Loss: 1.7166


0,1
avg_epoch_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.71662
epoch,10.0
final_loss,1.71662


[34m[1mwandb[0m: Agent Starting Run: qk7nlqsk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.9532
Epoch: 2, Average Loss: 1.7583
Epoch: 3, Average Loss: 1.7150
Epoch: 4, Average Loss: 1.6929
Epoch: 5, Average Loss: 1.6790
Epoch: 6, Average Loss: 1.6695
Epoch: 7, Average Loss: 1.6623
Epoch: 8, Average Loss: 1.6567
Epoch: 9, Average Loss: 1.6525
Epoch: 10, Average Loss: 1.6483


0,1
avg_epoch_loss,█▄▃▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.64832
epoch,10.0
final_loss,1.64832


[34m[1mwandb[0m: Agent Starting Run: vn4gkm6b with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.5078
Epoch: 2, Average Loss: 2.3915
Epoch: 3, Average Loss: 2.3795
Epoch: 4, Average Loss: 2.3733
Epoch: 5, Average Loss: 2.3695
Epoch: 6, Average Loss: 2.3670
Epoch: 7, Average Loss: 2.3653
Epoch: 8, Average Loss: 2.3642
Epoch: 9, Average Loss: 2.3634
Epoch: 10, Average Loss: 2.3629


0,1
avg_epoch_loss,█▂▂▂▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,2.36289
epoch,10.0
final_loss,2.36289


[34m[1mwandb[0m: Agent Starting Run: xc8oy0y5 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3734
Epoch: 2, Average Loss: 2.1905
Epoch: 3, Average Loss: 2.1368
Epoch: 4, Average Loss: 2.1040
Epoch: 5, Average Loss: 2.0799
Epoch: 6, Average Loss: 2.0615
Epoch: 7, Average Loss: 2.0473
Epoch: 8, Average Loss: 2.0359
Epoch: 9, Average Loss: 2.0267
Epoch: 10, Average Loss: 2.0190


0,1
avg_epoch_loss,█▄▃▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,2.01898
epoch,10.0
final_loss,2.01898


[34m[1mwandb[0m: Agent Starting Run: ztf1vxm3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3303
Epoch: 2, Average Loss: 2.1203
Epoch: 3, Average Loss: 2.0452
Epoch: 4, Average Loss: 1.9988
Epoch: 5, Average Loss: 1.9646
Epoch: 6, Average Loss: 1.9384
Epoch: 7, Average Loss: 1.9172
Epoch: 8, Average Loss: 1.9001
Epoch: 9, Average Loss: 1.8860
Epoch: 10, Average Loss: 1.8739


0,1
avg_epoch_loss,█▅▄▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.8739
epoch,10.0
final_loss,1.8739


[34m[1mwandb[0m: Agent Starting Run: 0wtw8di0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 128
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3107
Epoch: 2, Average Loss: 2.1023
Epoch: 3, Average Loss: 2.0225
Epoch: 4, Average Loss: 1.9720
Epoch: 5, Average Loss: 1.9349
Epoch: 6, Average Loss: 1.9054
Epoch: 7, Average Loss: 1.8810
Epoch: 8, Average Loss: 1.8607
Epoch: 9, Average Loss: 1.8439
Epoch: 10, Average Loss: 1.8296


0,1
avg_epoch_loss,█▅▄▃▃▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.82956
epoch,10.0
final_loss,1.82956


[34m[1mwandb[0m: Agent Starting Run: mwdlpuxy with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3816
Epoch: 2, Average Loss: 2.3645
Epoch: 3, Average Loss: 2.3630
Epoch: 4, Average Loss: 2.3623
Epoch: 5, Average Loss: 2.3619
Epoch: 6, Average Loss: 2.3615
Epoch: 7, Average Loss: 2.3614
Epoch: 8, Average Loss: 2.3612
Epoch: 9, Average Loss: 2.3611
Epoch: 10, Average Loss: 2.3611


0,1
avg_epoch_loss,█▂▂▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,2.36108
epoch,10.0
final_loss,2.36108


[34m[1mwandb[0m: Agent Starting Run: wie4v512 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.0527
Epoch: 2, Average Loss: 1.9678
Epoch: 3, Average Loss: 1.9572
Epoch: 4, Average Loss: 1.9524
Epoch: 5, Average Loss: 1.9494
Epoch: 6, Average Loss: 1.9474
Epoch: 7, Average Loss: 1.9457
Epoch: 8, Average Loss: 1.9445
Epoch: 9, Average Loss: 1.9435
Epoch: 10, Average Loss: 1.9427


0,1
avg_epoch_loss,█▃▂▂▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.94266
epoch,10.0
final_loss,1.94266


[34m[1mwandb[0m: Agent Starting Run: 23sghhmr with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.9096
Epoch: 2, Average Loss: 1.7437
Epoch: 3, Average Loss: 1.7082
Epoch: 4, Average Loss: 1.6898
Epoch: 5, Average Loss: 1.6789
Epoch: 6, Average Loss: 1.6714
Epoch: 7, Average Loss: 1.6659
Epoch: 8, Average Loss: 1.6614
Epoch: 9, Average Loss: 1.6576
Epoch: 10, Average Loss: 1.6545


0,1
avg_epoch_loss,█▃▂▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.65448
epoch,10.0
final_loss,1.65448


[34m[1mwandb[0m: Agent Starting Run: 6hr4wflr with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.8600
Epoch: 2, Average Loss: 1.6699
Epoch: 3, Average Loss: 1.6252
Epoch: 4, Average Loss: 1.6024
Epoch: 5, Average Loss: 1.5880
Epoch: 6, Average Loss: 1.5774
Epoch: 7, Average Loss: 1.5695
Epoch: 8, Average Loss: 1.5635
Epoch: 9, Average Loss: 1.5581
Epoch: 10, Average Loss: 1.5536


0,1
avg_epoch_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.55355
epoch,10.0
final_loss,1.55355


[34m[1mwandb[0m: Agent Starting Run: 0hui392p with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.4663
Epoch: 2, Average Loss: 2.3788
Epoch: 3, Average Loss: 2.3701
Epoch: 4, Average Loss: 2.3662
Epoch: 5, Average Loss: 2.3643
Epoch: 6, Average Loss: 2.3632
Epoch: 7, Average Loss: 2.3625
Epoch: 8, Average Loss: 2.3621
Epoch: 9, Average Loss: 2.3618
Epoch: 10, Average Loss: 2.3615


0,1
avg_epoch_loss,█▂▂▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,2.36149
epoch,10.0
final_loss,2.36149


[34m[1mwandb[0m: Agent Starting Run: akgc9urk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3208
Epoch: 2, Average Loss: 2.1462
Epoch: 3, Average Loss: 2.0881
Epoch: 4, Average Loss: 2.0548
Epoch: 5, Average Loss: 2.0333
Epoch: 6, Average Loss: 2.0180
Epoch: 7, Average Loss: 2.0067
Epoch: 8, Average Loss: 1.9979
Epoch: 9, Average Loss: 1.9908
Epoch: 10, Average Loss: 1.9849


0,1
avg_epoch_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.98493
epoch,10.0
final_loss,1.98493


[34m[1mwandb[0m: Agent Starting Run: zl62tmeb with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.2673
Epoch: 2, Average Loss: 2.0612
Epoch: 3, Average Loss: 1.9842
Epoch: 4, Average Loss: 1.9337
Epoch: 5, Average Loss: 1.8968
Epoch: 6, Average Loss: 1.8689
Epoch: 7, Average Loss: 1.8466
Epoch: 8, Average Loss: 1.8287
Epoch: 9, Average Loss: 1.8138
Epoch: 10, Average Loss: 1.8010


0,1
avg_epoch_loss,█▅▄▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.80102
epoch,10.0
final_loss,1.80102


[34m[1mwandb[0m: Agent Starting Run: iit5zadp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.2494
Epoch: 2, Average Loss: 2.0344
Epoch: 3, Average Loss: 1.9452
Epoch: 4, Average Loss: 1.8864
Epoch: 5, Average Loss: 1.8458
Epoch: 6, Average Loss: 1.8154
Epoch: 7, Average Loss: 1.7909
Epoch: 8, Average Loss: 1.7704
Epoch: 9, Average Loss: 1.7529
Epoch: 10, Average Loss: 1.7379


0,1
avg_epoch_loss,█▅▄▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.73789
epoch,10.0
final_loss,1.73789


[34m[1mwandb[0m: Agent Starting Run: fumqaor5 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.3783
Epoch: 2, Average Loss: 2.3648
Epoch: 3, Average Loss: 2.3634
Epoch: 4, Average Loss: 2.3627
Epoch: 5, Average Loss: 2.3624
Epoch: 6, Average Loss: 2.3621
Epoch: 7, Average Loss: 2.3619
Epoch: 8, Average Loss: 2.3618
Epoch: 9, Average Loss: 2.3617
Epoch: 10, Average Loss: 2.3616


0,1
avg_epoch_loss,█▂▂▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,2.36159
epoch,10.0
final_loss,2.36159


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kc36tysq with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.0246
Epoch: 2, Average Loss: 1.9589
Epoch: 3, Average Loss: 1.9508
Epoch: 4, Average Loss: 1.9465
Epoch: 5, Average Loss: 1.9437
Epoch: 6, Average Loss: 1.9417
Epoch: 7, Average Loss: 1.9403
Epoch: 8, Average Loss: 1.9392
Epoch: 9, Average Loss: 1.9381
Epoch: 10, Average Loss: 1.9374


0,1
avg_epoch_loss,█▃▂▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.93735
epoch,10.0
final_loss,1.93735


[34m[1mwandb[0m: Agent Starting Run: dfmiw9ik with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.8473
Epoch: 2, Average Loss: 1.6984
Epoch: 3, Average Loss: 1.6684
Epoch: 4, Average Loss: 1.6534
Epoch: 5, Average Loss: 1.6441
Epoch: 6, Average Loss: 1.6374
Epoch: 7, Average Loss: 1.6325
Epoch: 8, Average Loss: 1.6285
Epoch: 9, Average Loss: 1.6253
Epoch: 10, Average Loss: 1.6227


0,1
avg_epoch_loss,█▃▂▂▂▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.62267
epoch,10.0
final_loss,1.62267


[34m[1mwandb[0m: Agent Starting Run: z2lngydj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 1.7860
Epoch: 2, Average Loss: 1.5988
Epoch: 3, Average Loss: 1.5534
Epoch: 4, Average Loss: 1.5293
Epoch: 5, Average Loss: 1.5140
Epoch: 6, Average Loss: 1.5032
Epoch: 7, Average Loss: 1.4950
Epoch: 8, Average Loss: 1.4884
Epoch: 9, Average Loss: 1.4829
Epoch: 10, Average Loss: 1.4784


0,1
avg_epoch_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.4784
epoch,10.0
final_loss,1.4784


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g9ua8dx2 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 2
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.4378
Epoch: 2, Average Loss: 2.3718
Epoch: 3, Average Loss: 2.3659
Epoch: 4, Average Loss: 2.3637
Epoch: 5, Average Loss: 2.3627
Epoch: 6, Average Loss: 2.3622
Epoch: 7, Average Loss: 2.3617
Epoch: 8, Average Loss: 2.3615
Epoch: 9, Average Loss: 2.3613
Epoch: 10, Average Loss: 2.3611


0,1
avg_epoch_loss,█▂▁▁▁▁▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,2.36113
epoch,10.0
final_loss,2.36113


[34m[1mwandb[0m: Agent Starting Run: hi4m2u6n with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 3
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.2565
Epoch: 2, Average Loss: 2.0855
Epoch: 3, Average Loss: 2.0361
Epoch: 4, Average Loss: 2.0102
Epoch: 5, Average Loss: 1.9944
Epoch: 6, Average Loss: 1.9836
Epoch: 7, Average Loss: 1.9758
Epoch: 8, Average Loss: 1.9698
Epoch: 9, Average Loss: 1.9652
Epoch: 10, Average Loss: 1.9615


0,1
avg_epoch_loss,█▄▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.96151
epoch,10.0
final_loss,1.96151


[34m[1mwandb[0m: Agent Starting Run: 3emvjo0a with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 4
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.2041
Epoch: 2, Average Loss: 1.9883
Epoch: 3, Average Loss: 1.9096
Epoch: 4, Average Loss: 1.8616
Epoch: 5, Average Loss: 1.8274
Epoch: 6, Average Loss: 1.8016
Epoch: 7, Average Loss: 1.7815
Epoch: 8, Average Loss: 1.7652
Epoch: 9, Average Loss: 1.7516
Epoch: 10, Average Loss: 1.7400


0,1
avg_epoch_loss,█▅▄▃▂▂▂▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
final_loss,▁

0,1
avg_epoch_loss,1.74
epoch,10.0
final_loss,1.74


[34m[1mwandb[0m: Agent Starting Run: sz9gkwcy with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	embeddings_size: 50
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_units: 512
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	ngram: 5
[34m[1mwandb[0m: 	test_pct: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Training Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1, Average Loss: 2.1858
Epoch: 2, Average Loss: 1.9601
Epoch: 3, Average Loss: 1.8657
