In [48]:
import data_processing
import numpy as np
import torch

In [49]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")

In [50]:
print("Device: ", device)

Device:  cuda


In [51]:
torch.set_default_device(device)

In [52]:
text = data_processing.read_data()

chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}

In [53]:
encoded = np.array([char2int[ch] for ch in text])


In [54]:
encoded.shape

(1115394,)

In [55]:
def window_data_test_train(data,train_split=0.8,window_size=5):
    train = data[:int(len(data)*train_split)]
    test = data[int(len(data)*train_split):]
    train_window = np.lib.stride_tricks.sliding_window_view(train, window_shape=window_size, axis=0)
    test_window = np.lib.stride_tricks.sliding_window_view(test, window_shape=window_size, axis=0)
    X_train = train_window[:-1, :]
    X_test = test_window[:-1, :]
    Y_train = train_window[1:, :]
    Y_test = test_window[1:, :]
    return torch.tensor(X_train),torch.tensor(X_test),torch.tensor(Y_train),torch.tensor(Y_test)

In [56]:
x_train,x_test,y_train,y_test = window_data_test_train(encoded)

In [57]:
x_train.shape[0] + x_test.shape[0] == encoded.shape[0] - (2*4 + 2)

True

In [58]:
from torch.nn import functional as F
from torch import nn

test = nn.Linear(10,20)

test(torch.randn(5,10)).shape

torch.Size([5, 20])

In [59]:
test.weight.shape

torch.Size([20, 10])

In [60]:
x_train,y_train = data_processing.batch_data(x_train,y_train)

In [61]:
x_train.shape,x_train[0].shape

(torch.Size([27884, 32, 5]), torch.Size([32, 5]))

In [62]:
from self_attention import Self_Attention

self_att = Self_Attention(10,len(set(text)),10,len(set(text)))

self_att(x_train[0],mask=True).shape

torch.Size([32, 5, 65])

In [63]:
model = self_att

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 1
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for i in range(x_train.shape[0]):
        inputs = x_train[i].to(device)
        targets = y_train[i].to(device)

        # Forward pass
        outputs = model(inputs)
        outputs = outputs.view(-1, outputs.shape[-1])
        targets = targets.view(-1)
        loss = F.cross_entropy(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        if i % 1000 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{x_train.shape[0]}], Loss: {loss.item():.4f}')

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(x_train):.4f}')
    # Validation loop
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for i in range(x_test.shape[0]//2,x_test.shape[0]):
            inputs = x_test[i].to(device)
            targets = y_test[i].to(device)

            # Forward pass
            outputs = model(inputs)
            loss = F.cross_entropy(outputs, targets)

            val_loss += loss.item()

    print(f'Validation Loss: {val_loss/len(x_test):.4f}')

Epoch [1/1], Step [1/27884], Loss: 4.1813
Epoch [1/1], Step [1001/27884], Loss: 2.6611
Epoch [1/1], Step [2001/27884], Loss: 2.5095
Epoch [1/1], Step [3001/27884], Loss: 2.1648
Epoch [1/1], Step [4001/27884], Loss: 1.8017
Epoch [1/1], Step [5001/27884], Loss: 2.1877
Epoch [1/1], Step [6001/27884], Loss: 2.0524
Epoch [1/1], Step [7001/27884], Loss: 2.3179
Epoch [1/1], Step [8001/27884], Loss: 1.7227
Epoch [1/1], Step [9001/27884], Loss: 1.6664
Epoch [1/1], Step [10001/27884], Loss: 1.9115
Epoch [1/1], Step [11001/27884], Loss: 1.8766
Epoch [1/1], Step [12001/27884], Loss: 2.1099
Epoch [1/1], Step [13001/27884], Loss: 1.9156
Epoch [1/1], Step [14001/27884], Loss: 1.8559
Epoch [1/1], Step [15001/27884], Loss: 1.7728
Epoch [1/1], Step [16001/27884], Loss: 1.7328
Epoch [1/1], Step [17001/27884], Loss: 1.7262
Epoch [1/1], Step [18001/27884], Loss: 1.9363
Epoch [1/1], Step [19001/27884], Loss: 1.5691
Epoch [1/1], Step [20001/27884], Loss: 1.9759
Epoch [1/1], Step [21001/27884], Loss: 1.9401
E

In [65]:
test = model.generate(y_test[0:1],100)

In [66]:
test_np = test[0].detach().cpu().numpy().tolist()

In [67]:
list_char = [int2char[index] for index in test_np]
print(''.join(list_char))

ou ar ar ar ar d arhi ar ar  ar ar a araror ar umamrary w arar a k a y ar arar es arar alwarar , airarmwy
