In [192]:
words = open('Chinese_Names_Corpus（120W）.txt','r').read().splitlines()

In [193]:
words[:8]

['阿安', '阿彬', '阿斌', '阿滨', '阿冰', '阿冰冰', '阿兵', '阿婵']

In [194]:
from torch import nn
import torch
import torch.nn.functional as F

In [195]:
class Linear:
    
    def __init__(self,input_size,output_size,bias=True):
        self.weight = torch.randn((input_size,output_size))
        self.bias = torch.randn(output_size) if bias else None
    
    def __call__(self,x):
        self.out = x @ self.weight
        if self.bias is not None:
            self.out += self.bias
        return self.out
    
    def parameters(self):
        return [self.weight] + [self.bias] if self.bias is not None else []

class BatchNorm:
    
    def __init__(self,dim):
        self.runing_mean = torch.zeros(dim)
        self.runing_var = torch.ones(dim)
        self.eps = 1e-5
        self.training = True
        self.gamma = torch.randn(dim)
        self.beta = torch.randn(dim)
    
    def __call__(self,x):
        if self.training:
            x_mean = x.mean(dim=0,keepdims=True)
            x_var = x.var(dim=0,keepdims=True)
        else:
            x_mean = self.runing_mean
            x_var = self.runing_var
        self.out = self.gamma*(x-x_mean)/torch.sqrt(x_var+self.eps)+self.beta
        if self.training:
            self.runing_mean = 0.999 * self.runing_mean + 0.001*x_mean
            self.runing_var = 0.999 * self.runing_var + 0.001*x_var
        return self.out
    
    def parameters(self):
        return [self.gamma,self.beta]

    
class Flatten:
    
    def __call__(self,x):
        self.out = x.view(len(x),-1)
        return self.out
    
    def parameters(self):
        return []
        

class Tanh:
    
    def __call__(self,x):
        self.out = torch.tanh(x)
        return self.out
    
    def parameters(self):
        return []

class Embedding:
    
    def __init__(self,vocab_size,embed_size):
        self.vocab_size = vocab_size
        self.embed = torch.randn(vocab_size,embed_size)
    
    def __call__(self,x):
        return self.embed[x]
    
    def parameters(self):
        return [self.embed]

class Sequential:
    
    def __init__(self,layers):
        self.layers = layers
    
    def parameters(self):
        params = []
        for layer in self.layers:
            params.extend(layer.parameters())
        return params
    
    def __call__(self,x):
        for layer in self.layers:
            x = layer(x)
        return x

In [196]:
stoi = {}
itos = {}
stoi['.'] = 0
itos[0] = '.'
chars = sorted(list(set(''.join(words))))
for i,c in enumerate(chars):
    stoi[c] = i+1
    itos[i+1]=c

In [197]:
len(stoi)

2271

In [198]:
block_size = 5
def get_data():
    X = []
    Y = []
    for w in words:
        context = [0]*block_size
        for c in w + '.':
            ix = stoi[c]
            X.append(context)
            Y.append(ix)
            context = context[1:]+[ix]
    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X,Y

In [199]:
X,Y = get_data()

In [200]:
vocab_size = len(stoi)
embed_size = 100

In [201]:
embedding = Embedding(vocab_size,embed_size)

In [202]:
layers = [
    Embedding(vocab_size,embed_size),
    Flatten(),
    Linear(embed_size*block_size,100),
    BatchNorm(100),
    Tanh(),
    Linear(100,100),
    BatchNorm(100),
    Tanh(),
    Linear(100,100),
    BatchNorm(100),
    Tanh(),
    Linear(100,vocab_size),
    BatchNorm(vocab_size)
]

In [203]:
sequential = Sequential(layers)

In [204]:
batch_size = 32

In [205]:
params = sequential.parameters()

In [206]:
for p in params:
    p.requires_grad = True

In [222]:
for i in range(400000):
    
    idxs = torch.randint(0,len(X),(batch_size,))
    Xtr = X[idxs] # b,T,C
    Ytr = Y[idxs]
    logits = sequential(Xtr)
    loss = F.cross_entropy(logits,Ytr)
    for p in params:
        p.grad = None
    loss.backward()
    lr =  0.01 if i < 10000 else 0.001
    for p in params:
        p.data += -lr*p.grad
    if i % 10000 == 0:
        print(loss.item())

5.130544662475586
4.906082630157471
4.103755950927734
4.410313129425049
4.025326251983643
4.489019393920898
4.305686950683594
4.775646686553955
5.2324676513671875
4.384654521942139
4.132236480712891
3.981794834136963
4.626075744628906
3.5692386627197266
4.354118347167969
4.857334136962891
4.904262542724609
4.51820707321167
4.5569167137146
4.9520649909973145


[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument


KeyboardInterrupt: 

In [223]:
for layer in layers:
    layer.training = False

In [242]:
stoi['胡']

1659

In [243]:
for _ in range(50):
    ix = 1659
    context = [0]*(block_size-1)+[ix]
    w = []
    while True:
        w.append(itos[ix])
        x = torch.tensor(context).view(1,-1)
        logits = sequential(x)
        probs = F.softmax(logits,dim=1)
        ix = torch.multinomial(probs,num_samples=1).item()
        context = context[1:]+[ix]
        if ix == 0:
            break
    name = ''.join(w)
    if name not in words:
        print(''.join(w))

胡景荻
胡乐暖
胡亚晓
胡舜店
胡年居
胡夔
胡坤志
胡民芬
胡绯文
胡滕万
胡正卓
胡波超
胡超红
胡共忠
胡黄安
胡康辉
胡霞林
胡建婴
胡僖斌
胡帝
胡磬观
胡牙冕
胡日
胡庠
胡玉媛
胡良诚
胡家岱
胡秀潮
胡浑烟
胡宏清
胡冶生
胡黎妤


In [210]:
stoi['刘']

214