In [16]:
import torch
import matplotlib.pyplot as plt 

torch.__version__

g = torch.Generator().manual_seed(123456)

In [17]:
import random
with open("names.txt", "r+") as f:
	words = f.read().splitlines()
	words = [word.strip() for word in words] # get rid of any trailing spaces
	names = [w for w in words if w] # get rid of any empty strings
	random.shuffle(names)

min_chars = min(len(v) for v in names)
max_chars = max(len(v) for v in names)
chars = sorted(list(set("".join(names))))

# in replacement of the start and end token. Every name should end with a period. and there should be no start token to begin a sequence
chars = ['.'] + chars
chars_count = len(chars)
print("names: ", names[:5])
print("number of names: ", len(names))
print("(list of chars, count): ", ("".join(chars), chars_count))
print("(max word length, min word length): ", (max_chars, min_chars))

atoi = {ch:i for i,ch in enumerate(chars)}
itoa = {i:ch for i,ch in enumerate(chars)}

# adding end token to each name
names = [list(name) + ['.'] for name in names]

names:  ['macie', 'brix', 'gabbanelli', 'rylon', 'yash']
number of names:  32033
(list of chars, count):  ('.abcdefghijklmnopqrstuvwxyz', 27)
(max word length, min word length):  (15, 2)


In [18]:
block_size = 5

def build_dset(dset):
    X, Y = [], []
    for name in dset:
        ctx = [0] * block_size
        for ch in name:
            ix = atoi[ch]
            X.append(ctx)
            Y.append(ix)
            ctx = ctx[1:] + [ix]
    
    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X, Y

n1 = int(0.8*len(names))
n2 = int(0.9*len(names))

X_train, Y_train = build_dset(names[:n1])
X_val, Y_val = build_dset(names[n1:n2])
X_test, Y_test = build_dset(names[n2:])

for c, d in zip(X_train[:len(names[1])], Y_train[:len(names[1])]):
    print(''.join(itoa[i.item()] for i in c), "=>", itoa[d.item()])
    
# build_dset(names[:3])

..... => m
....m => a
...ma => c
..mac => i
.maci => e


In [37]:
class Emmbedding:
    def __init__(self, num_embedding, embedding_dim):
        self.weights = torch.randn((num_embedding, embedding_dim), generator=g)
    def __call__(self, IX):
        self.out = self.weights[IX]
        return self.out
    def parameters(self):
        return [self.weights]
        
# ------------------------

class Linear:
    def __init__(self, fan_in, fan_out, bias=True, dtype=None):
        self.weights = torch.randn((fan_in, fan_out), generator=g, dtype=dtype) / fan_in**0.5 # note: kaiming init
        self.bias = torch.randn((fan_out), generator=g, dtype=dtype) if bias else None
    
    def __call__(self, X):
        self.out = X @ self.weights
        if self.bias is not None:
            self.out = self.out + self.bias
        return self.out
    
    def parameters(self):
        return [self.weights] if self.bias is None else [self.weights, self.bias]

# ------------------------

class Tanh:
    def __init__(self):
        self.tanh = torch.tanh
    def __call__(self,x):
        self.out = self.tanh(x)
        return self.out
    def parameters(self):
        return []
        
# ------------------------

class BatchNorm1d:
    def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None):
        self.beta = torch.zeros((num_features))
        self.gamma = torch.ones((num_features))
        self.running = track_running_stats
        self.mean = None
        self.var = None
        self.eps = eps
        self.momentum = 0.1
        self.running_mean = torch.zeros(num_features)
        self.running_var = torch.ones(num_features)
    def __call__(self, X):
        if self.running:
            mean = self.running_mean
            var = self.running_var
        else:
            mean = torch.mean(X, keepdim=True)
            var = torch.var(X, unbiased=False)
        
        xi = (X - mean) / (var + self.eps) ** 0.5
        self.out = self.gamma * xi + self.beta
        if not self.running:
            with torch.no_grad():
                self.running_mean = self.running_mean * (1-momentum) + momentum * self.mean
                self.running_var = self.running_var * (1-momentum) + momentum * self.var            
                
        return self.out

    def parameters(self):
        return [self.beta, self.gamma]
        
# ------------------------

class Sequential:
    def __init__(self, layers):
        self.layers = layers
    def __call__(self, X):
        for layer in self.layers:
            X = layer(X)
        self.out = X
        return self.out
    def parameters(self):
        return [param for param in layer.parameters() for layer in self.layers]

Sequential([
        Linear(5, 30), BatchNorm1d(30), Tanh(),
        Linear(30, 30), BatchNorm1d(30), Tanh(),
        Linear(30, 30), BatchNorm1d(30), Tanh(),
        Linear(30, 30), BatchNorm1d(30), Tanh(),
        Linear(30, 5),        
    ]
)(torch.randn((1,5)))

tensor([[ 1.6954, -1.0087,  0.4551, -1.1384, -1.7112]])