游戏的规则如下：

- 当数字为3的倍数时，打印FIZZ。
- 当数字为5的倍数时，打印BUZZ。
- 当数字既是3的倍数，又是5的倍数时，打印FIZZBUZZ。


# 使用numpy实现

In [None]:
import numpy as np

#定义变量
input_size = 10
epochs = 1000
batches = 64
lr = 0.01

#sigmod函数
def sig(val):
    return 1 / (1 + np.exp(-val))

#sigmod求导 
def sig_d(val):
    sig_val = sig(val)
    return sig_val * (1 - sig_val)

#十进制数转二进制数据（长度为input_size）
def binary_enc(num):
    #将num转为二进制， 其中 '{0:b}'.format(num) 将一个数转为二进制
    ret = [int(i) for i in '{0:b}'.format(num)]  
    #返回固定长度的二进制数
    return [0] * (input_size - len(ret)) + ret

#二进制数转十进制数
def binary_dec(array):
    ret = 0
    for i in array:
        ret = ret * 2 + int(i)
    return ret

#将数据划分为测试集和数据集
def training_test_gen(x, y):
    assert len(x) == len(y)
    indices = np.random.permutation(range(len(x)))  
    split_size = int(0.9 * len(indices))
    trX = x[indices[:split_size]]
    trY = y[indices[:split_size]]
    teX = x[indices[split_size:]]
    teY = y[indices[split_size:]]
    return trX, trY, teX, teY

#生产训练集和数据集
def x_y_gen():
    x = []
    y = []
    for i in range(1000):
        x.append(binary_enc(i))
        if i % 15 == 0:
            y.append([1, 0, 0, 0])
        elif i % 5 == 0:
            y.append([0, 1, 0, 0])
        elif i % 3 == 0:
            y.append([0, 0, 1, 0])
        else:
            y.append([0, 0, 0, 1])
    return training_test_gen(np.array(x), np.array(y))


def check_fizbuz(i):
    if i % 15 == 0:
        return 'fizbuz'
    elif i % 5 == 0:
        return 'buz'
    elif i % 3 == 0:
        return 'fiz'
    else:
        return 'number'

#生产训练集和测试集合
#trX : 900 * 10
#trY : 900 * 4
#teX : 100 * 10
#teY : 100 * 4
trX, trY, teX, teY = x_y_gen()

#定义网络参数
w1 = np.random.randn(10, 100)
w2 = np.random.randn(100, 4)
b1 = np.zeros((1, 100))
b2 = np.zeros((1, 4))

#训练的批次
no_of_batches = int(len(trX) / batches)


for epoch in range(epochs):
    for batch in range(no_of_batches):
        # forward 前向传播
        start = batch * batches
        end = start + batches
        x = trX[start:end]
        y = trY[start:end]
        
        #定义一层神经网络
        z1 = x.dot(w1) + b1  #z1 = w1 * x + b1
        a1 = sig(z1)         #a1 = sigmod(z1)
        z2 = a1.dot(w2) + b2 #z2 = w2 * a1 + b2
        a2 = sig(z2)         #a2 = sigmod(z2)
        y_ = a2              #输出
        
        #损失函数
        error = y_ - y
        loss = (error ** 2).mean()  # L(y,y_)

        #反向传播backward 
        outgrad = error * sig_d(z2)
        delta_w2 = a1.T.dot(outgrad)   # dw2 = dL(y,y_)/dw2
        delta_b2 = np.ones([1, batches]).dot(outgrad)  #db2 = dL(y,y_)/db2

        hidden_error = error.dot(w2.T)
        hidden_grad = hidden_error * sig_d(z1)
        delta_w1 = x.T.dot(hidden_grad)   # dw1 = dL(y,y_)/dw1
        delta_b1 = np.ones([1, batches]).dot(hidden_grad) #db1 = dL(y,y_)/db1

        #更新w和b
        #w_ = w - lr * dw
        #b_ = b- lr * db
        w1 -= delta_w1 * lr
        b1 -= delta_b1 * lr
        w2 -= delta_w2 * lr
        b2 -= delta_b2 * lr
    print(epoch, loss)

#测试
z1 = teX.dot(w1) + b1
a1 = sig(z1)
z2 = a1.dot(w2) + b2
y_ = sig(z2)
outli = ['fizbuz', 'buz', 'fiz', 'number']
for i in range(len(teX)):
    num = binary_dec(teX[i])
    print('Number: {} -- Actual: {} -- Prediction: {}'.format(num, check_fizbuz(num), outli[y_[i].argmax()]))

#准确率
print('Test loss: ', np.mean(teY - y_))

# 使用numpy和pytorch混合实现

In [None]:
import numpy as np
import torch as th
from torch.autograd import Variable

input_size = 10
epochs = 1000
batches = 64
lr = 0.01


def binary_enc(num):
    ret = [int(i) for i in '{0:b}'.format(num)]
    return [0] * (input_size - len(ret)) + ret


def binary_dec(array):
    ret = 0
    for i in array:
        ret = ret * 2 + int(i)
    return ret


def training_test_gen(x, y):
    assert len(x) == len(y)
    indices = np.random.permutation(range(len(x)))
    split_size = int(0.9 * len(indices))
    trX = x[indices[:split_size]]
    trY = y[indices[:split_size]]
    teX = x[indices[split_size:]]
    teY = y[indices[split_size:]]
    return trX, trY, teX, teY


def x_y_gen():
    x = []
    y = []
    for i in range(1000):
        x.append(binary_enc(i))
        if i % 15 == 0:
            y.append([1, 0, 0, 0])
        elif i % 5 == 0:
            y.append([0, 1, 0, 0])
        elif i % 3 == 0:
            y.append([0, 0, 1, 0])
        else:
            y.append([0, 0, 0, 1])
    return training_test_gen(np.array(x), np.array(y))


def check_fizbuz(i):
    if i % 15 == 0:
        return 'fizbuz'
    elif i % 5 == 0:
        return 'buz'
    elif i % 3 == 0:
        return 'fiz'
    else:
        return 'number'


trX, trY, teX, teY = x_y_gen()
if th.cuda.is_available():
    dtype = th.cuda.FloatTensor
else:
    dtype = th.FloatTensor
trX_tensor = Variable(th.from_numpy(trX).type(dtype), requires_grad=False)
trY_tensor = Variable(th.from_numpy(trY).type(dtype), requires_grad=False)
teX_tensor = Variable(th.from_numpy(teX).type(dtype), requires_grad=False)
teY_tensor = Variable(th.from_numpy(teY).type(dtype), requires_grad=False)

w1 = Variable(th.randn(10, 100).type(dtype), requires_grad=True)
w2 = Variable(th.randn(100, 4).type(dtype), requires_grad=True)

b1 = Variable(th.zeros(1, 100).type(dtype), requires_grad=True)
b2 = Variable(th.zeros(1, 4).type(dtype), requires_grad=True)

#训练的批次
no_of_batches = int(len(trX) / batches)

#训练epochs轮
for epoch in range(epochs):
    for batch in range(no_of_batches):
        #切分数据，batches为一次性喂入的数据量
        start = batch * batches
        end = start + batches
        trX_tensor_batch = trX_tensor[start:end]
        trY_tensor_batch = trY_tensor[start:end]

        #z1 = w1 * x + b1
        #a1 = sigmod(z1)
        #z2 = w2 * a1 + b2
        #a2 = sigmod(z2)
        #输出y_ = a2
        z1 = trX_tensor_batch.matmul(w1)
        z1 = z1.add(b1)
        a1 = z1.sigmoid()

        z2 = a1.matmul(w2)
        z2 = z2.add(b2)
        y_ = z2.sigmoid()

        error = trY_tensor_batch - y_
        loss = error.pow(2).sum()
        loss.backward()

        w1.data -= lr * w1.grad.data
        w2.data -= lr * w2.grad.data
        b1.data -= lr * b1.grad.data
        b2.data -= lr * b2.grad.data
        
        #梯度记录清空
        w1.grad.data.zero_()
        w2.grad.data.zero_()
    print(epoch, error.mean().item())
    
#测试
z1 = teX_tensor.matmul(w1)
z1 = z1.add(b1)
a1 = z1.sigmoid()

z2 = a1.matmul(w2)
z2 = z2.add(b2)
y_predict = z2.sigmoid()
outli = ['fizbuz', 'buz', 'fiz', 'number']
for i in range(len(teX_tensor)):
    num = binary_dec(teX_tensor[i])
    print('Number: {} -- Actual: {} -- Prediction: {}'.format(num, check_fizbuz(num), outli[hyp[i].argmax()]))

#准确率
print('Test loss: ', (teY_tensor - y_predict).mean().item())

# 使用pytorch的高阶API

In [None]:
import numpy as np
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

input_size = 10 #输入尺寸
hidden_size = 100 #隐藏层尺寸，一层神经网络
output_size = 4  #输出尺寸
epochs = 1000   #训练的总轮数
batches = 64    #一次喂入的数据量
lr = 0.01      #学习率

#十进制数转固定长度（input_size）的二进制数
def binary_enc(num):
    ret = [int(i) for i in '{0:b}'.format(num)]
    return [0] * (input_size - len(ret)) + ret

#二进制数转十进制数
def binary_dec(array):
    ret = 0
    for i in array:
        ret = ret * 2 + int(i)
    return ret


def training_test_gen(x, y):
    assert len(x) == len(y)
    indices = np.random.permutation(range(len(x)))
    split_size = int(0.9 * len(indices))
    trX = x[indices[:split_size]]
    trY = y[indices[:split_size]]
    teX = x[indices[split_size:]]
    teY = y[indices[split_size:]]
    return trX, trY, teX, teY

#生产训练数据和测试数据
def x_y_gen():
    x = []
    y = []
    for i in range(1000):
        x.append(binary_enc(i))
        if i % 15 == 0:
            y.append([1, 0, 0, 0])
        elif i % 5 == 0:
            y.append([0, 1, 0, 0])
        elif i % 3 == 0:
            y.append([0, 0, 1, 0])
        else:
            y.append([0, 0, 0, 1])
    return training_test_gen(np.array(x), np.array(y))

#验证
def check_fizbuz(i):
    if i % 15 == 0:
        return 'fizbuz'  #同时能被3和5整除
    elif i % 5 == 0:
        return 'buz'     #能被5整除
    elif i % 3 == 0:
        return 'fiz'     #能被3整除
    else:
        return 'number'

#生产训练数据和测试数据
trX, trY, teX, teY = x_y_gen()

#定义tensor，如果有cuda使用cuda
if th.cuda.is_available():
    dtype = th.cuda.FloatTensor
else:
    dtype = th.FloatTensor
trX_tensor = Variable(th.from_numpy(trX).type(dtype), requires_grad=False)
trY_tensor = Variable(th.from_numpy(trY).type(dtype), requires_grad=False)
teX_tensor = Variable(th.from_numpy(teX).type(dtype), requires_grad=False)
teY_tensor = Variable(th.from_numpy(teY).type(dtype), requires_grad=False)


#定义网络
class FizBuzNet(nn.Module):

    def __init__(self):
        super(FizBuzNet, self).__init__()
        self.hidden = nn.Linear(input_size, hidden_size)  #定义一层神经网络
        self.hidden.cuda()
        self.out = nn.Linear(hidden_size, output_size)   #输出层
        self.out.cuda()

    def forward(self, input_batch):
        #z1 = w1 * x + b1
        #a1 = sigmod(z1)
        #z2 = w2 * a1 + b2
        #a2 = sigmod(z2)
        #输出y_ = a2
        z1 = self.hidden(input_batch)
        a1 = torch.sigmoid(z1)
        a2 = self.out(a1)
        return a2

#创建网络
net = FizBuzNet()
print(net)

#训练的批次
no_of_batches = int(len(trX) / batches)

#定义优化器
optimizer = torch.optim.SGD(net.parameters(), lr = lr)

#训练
for epoch in range(epochs):
    for batch in range(no_of_batches):
        start = batch * batches
        end = batch * batches
        trX_tensor_batch = trX_tensor[start : end]
        trY_tensor_batch = trY_tensor[start : end]
        
        y_ = net(trX_tensor_batch)
        
        #损失函数
        loss_fn = torch.nn.MSELoss(reduce=True, size_average=True)
        loss = loss_fn(trY_tensor_batch, y_)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

#测试
y_predict = net(teX_tensor)
outli = ['fizbuz', 'buz', 'fiz', 'number']
for i in range(len(teX_tensor)):
    num = binary_dec(teX_tensor[i])
    print('Number: {} -- Actual: {} -- Prediction: {}'.format(num, check_fizbuz(num), outli[hyp[i].argmax()]))

#准确率
print('Test loss: ', (teY_tensor - y_predict).mean().item())