# Character Embeddings

In [1]:
import torch 
from torch import tensor 
import torch.nn.functional as F 
import torch.nn as nn 

## Get the data

In [2]:
import json 
data = open(json.__file__).read() 
data_tensor = torch.tensor([ord(c) for c in data])

In [3]:
num_classes = 126

## Define neural net layers

In [4]:
layer_1 = nn.Linear(in_features=num_classes, out_features=1, bias=False)
layer_2 = nn.Linear(in_features=1, out_features=num_classes, bias=False)

print(f"Layer 1 has {layer_1.in_features} in features and {layer_1.out_features} out features")
print(f"Layer 2 has {layer_2.in_features} in features and {layer_2.out_features} out features")

Layer 1 has 126 in features and 1 out features
Layer 2 has 1 in features and 126 out features


In [16]:
data_tensor.shape

torch.Size([14019])

## Get the input batch

In [5]:
input_batch = data_tensor[0:5]
input_batch

tensor([114,  34,  34,  34,  74])

In [6]:
one_hot_input = F.one_hot(input_batch, num_classes).float()
one_hot_input.shape

torch.Size([5, 126])

## A forward pass

Pass the input to the first layer

In [7]:
act_1 = layer_1(one_hot_input)
act_1

tensor([[-0.0432],
        [-0.0091],
        [-0.0091],
        [-0.0091],
        [ 0.0618]], grad_fn=<MmBackward0>)

Pass the input to the second layer

In [8]:
act_2 = layer_2(act_1)
act_2.shape

torch.Size([5, 126])

In [9]:
chr(act_2[4].argmax().item())

'['

* 5: Amount of data we're working with 
* 126: Number of characters in the dictionary

## Backpropagation and SGD

In [10]:
LEARNING_RATE = .1
target_ids = F.one_hot(data_tensor[1:6], num_classes).float()
EPOCH = 30

In [11]:
loss = F.cross_entropy(act_2, target_ids)
print(f"Epoch 0: the loss is {loss.item()}")

Epoch 0: the loss is 4.8406291007995605


In [12]:
loss.backward()
layer_1.weight.data -= LEARNING_RATE * layer_1.weight.grad
layer_2.weight.data -= LEARNING_RATE * layer_2.weight.grad

In [13]:
for i in range(EPOCH):
    act_1 = layer_1(one_hot_input)
    act_2 = layer_2(act_1)
    
    loss = F.cross_entropy(act_2, target_ids)
    print(f"Epoch {i+1}: the loss is {loss.item()}")
    loss.backward()
    
    layer_1.weight.data -= LEARNING_RATE * layer_1.weight.grad
    layer_2.weight.data -= LEARNING_RATE * layer_2.weight.grad

Epoch 1: the loss is 4.839855670928955
Epoch 2: the loss is 4.838334083557129
Epoch 3: the loss is 4.836106300354004
Epoch 4: the loss is 4.833216667175293
Epoch 5: the loss is 4.829684734344482
Epoch 6: the loss is 4.825475215911865
Epoch 7: the loss is 4.820461273193359
Epoch 8: the loss is 4.814371109008789
Epoch 9: the loss is 4.806732177734375
Epoch 10: the loss is 4.796785831451416
Epoch 11: the loss is 4.783381462097168
Epoch 12: the loss is 4.764822483062744
Epoch 13: the loss is 4.738641262054443
Epoch 14: the loss is 4.7012810707092285
Epoch 15: the loss is 4.64762020111084
Epoch 16: the loss is 4.570286273956299
Epoch 17: the loss is 4.458667755126953
Epoch 18: the loss is 4.297555446624756
Epoch 19: the loss is 4.065454483032227
Epoch 20: the loss is 3.733349323272705
Epoch 21: the loss is 3.2681548595428467
Epoch 22: the loss is 2.6619107723236084
Epoch 23: the loss is 2.0558314323425293
Epoch 24: the loss is 1.789001226425171
Epoch 25: the loss is 1.8268064260482788
Epoch

In [14]:
act_2[4].argmax()

tensor(83)

In [15]:
chr(act_2[4].argmax().item())

'S'

The model correctly predicted the next character. The character after `74` (`J`) was predicted to be `84`, which is equal to `S`