### Credit: https://www.youtube.com/watch?v=kCc8FmEb1nY&t=6s
### This is an LLM that will be trained to generate poetry, hopefully haikus. This is a fun experiment. Haikus were created with OpenAI GPT-4

In [90]:
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

In [91]:
file_path = './haikus.txt'
with open(file_path, 'r') as file:
    contents = file.read()

contents

"Gentle breeze flows through,\\n\nWhispering leaves tell secrets,\\n\nNature's song, so soft.\\n\\n,\n\nMoonlight bathes the night,\\n\nSilver glow on tranquil sea,\\n\nPeace reigns in darkness.\\n\\n,\n\nCity wakes slowly,\\n\nSunrise over concrete peaks,\\n\nDay's first light glimmers.\\n\\n,\n\nLeaves turn gold and red,\\n\nAutumn's crisp breath in the air,\\n\nSeasons shift their dance.\\n\\n,\n\nPeaks touch the sky's edge,\\n\nAlone in vast wilderness,\\n\nNature's grandeur reigns.\\n\\n,\n\nStars twinkle above,\\n\nIn the vast, endless night sky,\\n\nDreams sail on moonbeams.\\n\\n,\n\nRaindrops kiss the earth,\\n\nA soft, rhythmic melody,\\n\nNature's lullaby.\\n\\n,\n\nSnow blankets the land,\\n\nWhite silence, winter's embrace,\\n\nNature sleeps in peace.\\n\\n,\n\nBlossoms greet the sun,\\n\nSpring whispers through awakening buds,\\n\nNew life in bloom.\\n\\n,\n\nSands stretch to the sky,\\n\nHeat shimmers like ocean waves,\\n\nDesert's mirage dance.\\n\\n,\n\nDeep in ocean's

In [92]:
unique_chars = sorted(list(set(contents)))
unique_chars_len = len(unique_chars)
print(''.join(unique_chars))
print(unique_chars_len)


 ',.ABCDGHILMNOPRSTUW\abcdefghiklmnopqrstuvwyz
47


In [93]:
#Lets try One Hot Encoding and pring out vocab dictionary

def one_hot_encode(data_input, char_set):
    # Create a dictionary mapping each character to its index
    char_to_index = {ch: i for i, ch in enumerate(char_set)}
    vector_set = []

    for i in list(data_input):
    # Initialize a vector of zeros with the length of the character set
        one_hot_vector = [0] * len(char_set)

        # Set the position corresponding to the character to 1
        if i in char_to_index:
            one_hot_vector[char_to_index[i]] = 1
            vector_set.append(one_hot_vector)
        else:
            raise ValueError(f"Character '{i}' not in character set")

    return vector_set


In [94]:
#Decoder
def one_hot_decode(encoded_data, char_set):
    # Create a dictionary mapping each index to its character
    index_to_char = {i: ch for i, ch in enumerate(char_set)}

    decoded_string = ""

    for vector in encoded_data:
        # Find the index of the 1 in the vector
        index = vector.index(1)

        # Append the corresponding character to the decoded string
        decoded_string += index_to_char[index]

    return decoded_string

# Example usage
char_set = ',.ABCDGHILMNOPRSTUWabcdefghiklmnopqrstuvwyz'
data_input = "Hello"
encoded_data = one_hot_encode(data_input, char_set)


In [95]:
#test the encoding
data = ',.ABCDGHILMNOPRSTUWabcdefghiklmnopqrstuvwyz'
encoding = one_hot_encode(data, unique_chars)
print(encoding)

[[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [96]:
#Test the decoding
decoded_string = one_hot_decode(encoding, unique_chars)
print(decoded_string)

,.ABCDGHILMNOPRSTUWabcdefghiklmnopqrstuvwyz


In [97]:
#Let's Encode our Data
prepped_data = contents.replace(" ", "")
prepped_data
encoded_data = one_hot_encode(prepped_data, unique_chars)
print(encoded_data)

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [98]:
#Transform encoded data into Tensor
tensor_data = torch.tensor(encoded_data, dtype=torch.float32)
print(tensor_data.shape, tensor_data.dtype)
print(tensor_data)

torch.Size([1195, 47]) torch.float32
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])


In [99]:
#Train and Validation Sets
n = int(0.8*len(tensor_data))
train = tensor_data[:n]
val = tensor_data[n:]
print(train)
print(val)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])


In [100]:
block_size = 8
train[:block_size + 1]

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [101]:
x = train[:block_size]
y = train[1:block_size+1]
for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print(f"When the input is {context}, the target is {target}")

When the input is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
When the input is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [102]:
print(train)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [103]:
#Introducing the batch dimension
torch.manual_seed(1337)
batch_size = 4
block_size = 8


def get_batch(split):
    data = train if split == 'train' else val
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix]) 
    return x, y  

xb, yb = get_batch('train')
print("inputs:")
print(xb.shape)
print(xb)
print('targets')
print(yb.shape)
print(yb)

print("-----")

for b in range(batch_size):
    for t in range(block_size):
        context = xb[b, :t+1]
        target = yb[b,t]
        print(f"when the input is {context.tolist()}, the target is: {target}")

inputs:
torch.Size([4, 8, 47])
tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 1., 0.]],

        [[1., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
     

In [104]:
x = train[:block_size]
y = train[1:block_size+1]
for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print(f"When context is {context}, the target is {target}")

When context is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
When context is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [105]:
#RNN can handle One-Hot vectors well
class SimpleRNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNNModel, self).__init__()
        self.hidden_size = hidden_size

        # RNN layer
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)

        # Output layer
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(1, x.size(0), self.hidden_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, _ = self.rnn(x, h0)

        # Reshape the output to (batch_size*sequence_length, hidden_size)
        out = out.contiguous().view(-1, self.hidden_size)

        # Get the final output
        out = self.linear(out)

        return out
model = SimpleRNNModel(unique_chars_len, 128, unique_chars_len)

In [106]:
# Loss function
loss_function = nn.CrossEntropyLoss()

# Optimizer (example: using Adam)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [107]:
class CustomDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]


In [108]:
# Assuming train_data and train_targets are your data tensors
train_dataset = CustomDataset(xb, yb)

# Create the DataLoader
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)


### Test Run

In [109]:
num_epochs = 500
previous_loss = float('inf') #somthing ridiculous to start out with

for epoch in range(num_epochs):
    for batch in train_loader:  # Assuming you have a DataLoader
        # Split batch data
        x_batch, y_batch = batch  # x_batch is input, y_batch is target labels
        y_batch = y_batch.view(-1, unique_chars_len) #Need to reshape in order to meet the expected shape of the models output
        
        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(x_batch)
        
        # Compute and print loss
        current_loss = loss_function(y_pred, y_batch)

        #Save the model if the loss is not improving
        if current_loss < previous_loss:
            previous_loss = current_loss
            torch.save(model.state_dict(), 'best_model.pth')

        
        print(f"Epoch {epoch}, Loss: {current_loss.item()}")

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        current_loss.backward()
        optimizer.step()


Epoch 0, Loss: 3.850008964538574
Epoch 1, Loss: 3.8131065368652344
Epoch 2, Loss: 3.77542781829834
Epoch 3, Loss: 3.735625743865967
Epoch 4, Loss: 3.6921780109405518
Epoch 5, Loss: 3.643368721008301
Epoch 6, Loss: 3.587339162826538
Epoch 7, Loss: 3.5223238468170166
Epoch 8, Loss: 3.4472644329071045
Epoch 9, Loss: 3.362854242324829
Epoch 10, Loss: 3.272573471069336
Epoch 11, Loss: 3.182471752166748
Epoch 12, Loss: 3.098926067352295
Epoch 13, Loss: 3.025941848754883
Epoch 14, Loss: 2.9641733169555664
Epoch 15, Loss: 2.91213321685791
Epoch 16, Loss: 2.8682312965393066
Epoch 17, Loss: 2.831655502319336
Epoch 18, Loss: 2.8018293380737305
Epoch 19, Loss: 2.7777366638183594
Epoch 20, Loss: 2.757831573486328
Epoch 21, Loss: 2.74037766456604
Epoch 22, Loss: 2.7238330841064453
Epoch 23, Loss: 2.707092761993408
Epoch 24, Loss: 2.6895456314086914
Epoch 25, Loss: 2.6709678173065186
Epoch 26, Loss: 2.651360511779785
Epoch 27, Loss: 2.6308350563049316
Epoch 28, Loss: 2.6095688343048096
Epoch 29, Loss

### Put it in a function

In [110]:
def train_function(epochs, train_data_loader, model, b, l):
  previous_loss = float('inf')
  loss_function = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
  for epoch in range(epochs):
      for batch in train_data_loader:  # Assuming you have a DataLoader
        # Split batch data
        x_batch, y_batch = batch  # x_batch is input, y_batch is target labels
        y_batch = y_batch.view(-1, unique_chars_len) #Need to reshape in order to meet the expected shape of the models output

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(x_batch)

        # Compute and print loss
        current_loss = loss_function(y_pred, y_batch)

        if current_loss < previous_loss:
           previous_loss = current_loss
           torch.save(model.state_dict(), "best_model_batch_{}_layers_{}_epochs_{}".format(b, l, epochs))

        print(f"Epoch {epoch}, Loss: {current_loss.item()}")

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        current_loss.backward()
        optimizer.step()
  

### Train using different hyperparameters

In [111]:
batch_sizes = [8, 16, 32, 64]
num_epochs = [500, 1000]
layers = [128, 256, 512]


for batch in batch_sizes:
    train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
    for layer in layers:
        model = SimpleRNNModel(unique_chars_len, layer, unique_chars_len)
        for epochs in num_epochs:
            print("Size (hidden state): {} Epoch: {} Batch Size: {}".format(layer, epochs, batch))
            train_function(epochs, train_loader, model, batch, layer)

Size (hidden state): 128 Epoch: 500 Batch Size: 8
Epoch 0, Loss: 3.8568038940429688
Epoch 1, Loss: 3.820084571838379
Epoch 2, Loss: 3.7827038764953613
Epoch 3, Loss: 3.743277072906494
Epoch 4, Loss: 3.7002668380737305
Epoch 5, Loss: 3.6519293785095215
Epoch 6, Loss: 3.596435070037842
Epoch 7, Loss: 3.532181978225708
Epoch 8, Loss: 3.458425521850586
Epoch 9, Loss: 3.376145362854004
Epoch 10, Loss: 3.288628339767456
Epoch 11, Loss: 3.201028823852539
Epoch 12, Loss: 3.118856906890869
Epoch 13, Loss: 3.046215295791626
Epoch 14, Loss: 2.9848575592041016
Epoch 15, Loss: 2.9343180656433105
Epoch 16, Loss: 2.8926844596862793
Epoch 17, Loss: 2.857586622238159
Epoch 18, Loss: 2.8270275592803955
Epoch 19, Loss: 2.799720048904419
Epoch 20, Loss: 2.7749409675598145
Epoch 21, Loss: 2.7522263526916504
Epoch 22, Loss: 2.731170892715454
Epoch 23, Loss: 2.7113728523254395
Epoch 24, Loss: 2.692458391189575
Epoch 25, Loss: 2.6740872859954834
Epoch 26, Loss: 2.6559135913848877
Epoch 27, Loss: 2.63756060600