In [87]:
#CNN (d-M-1)

In [2]:
import numpy as np 

In [3]:
#get data
train_data=np.loadtxt('hw4_nnet_train.txt')
test_data=np.loadtxt('hw4_nnet_test.txt')

train_X=train_data[:,0:-1]
train_y=train_data[:,-1]
test_X=test_data[:,0:-1]
test_y=test_data[:,-1]

In [4]:
class CNN(object):
    def __init__(self,M,r,eta,T):  # M是隐藏层神经元数目，r是theta随机化的范围，eta是学习率,T表示总共训练T个数据
        self.M=M
        self.r=r
        self.eta=eta
        self.T=T
        
    # tanh的导数函数
    def dertanh(self,s):
        return 1-np.tanh(s)**2
    
    # 初始化theta函数
    def inittheta(self,d):
        theta1 = np.random.uniform(-self.r, self.r, (d, self.M))
        theta2 = np.random.uniform(-self.r, self.r, (self.M+1, 1))
        return theta1,theta2
    
    # 神经网络函数---BP更新参数 （前向+反向）
    def updateTheta(self,train_X, train_y):
        row, col = train_X.shape           #col这里为2，表示2个特征值
        theta1,theta2=self.inittheta(col)
        for i in range(self.T):
            # 前向传播
            randpos = np.random.randint(0, row)
            xone = train_X[randpos: randpos+1, :]               #一行两列，随机取出一个数据训练
            yone = train_y[randpos]
            s1 = xone.dot(theta1)
            x1 = np.tanh(s1)
            x1 = np.c_[np.ones((1, 1)), x1]      # np.c_是行连接两个矩阵,np.r_是列连接两个矩阵
            s2 = x1.dot(theta2)
            x2 = np.tanh(s2)[0][0]
            delta2 = -2*(yone-x2)
            delta1 = delta2*theta2[1:, :].T*self.dertanh(s1)
            theta2 -= self.eta*x1.T*delta2
            theta1 -= self.eta*xone.T.dot(delta1)
        return theta1,theta2
    
    # 误差衡量函数
    def errfun(self,test_X, test_y,theta):
        row, col = test_X.shape
        l = len(theta)          #这里l=2
        x = test_X
        for i in range(l-1):
            x = np.c_[np.ones((row, 1)), np.tanh(x.dot(theta[i]))]
        x2 = np.tanh(x.dot(theta[l-1]))
        Yhat = x2
        Yhat[Yhat>=0] = 1
        Yhat[Yhat<0] = -1
        Yhat=Yhat.reshape(Yhat.shape[0],)             #把dim从原来的（M，1）变成 （M，）
        return np.sum(Yhat != test_y)/row

In [5]:
# Q11，比较中间隐藏层不同神经元个数对结果的影响
M = [1, 6, 11, 16, 21]
eout = np.zeros((len(M),))
for i in range(3):
    if (i+1)%5 == 0:
        print("completion rate: ",i+1,"/ 3")
    for j in range(len(M)):
        network=CNN(M[j],0.1,0.1,50000)
        theta1,theta2=network.updateTheta(train_X,train_y)
        theta=[theta1,theta2]
        eout[j] += network.errfun(test_X, test_y,theta)
print(eout/3)

completion rate:  0 / 3
completion rate:  5 / 3
completion rate:  10 / 3
completion rate:  15 / 3
completion rate:  20 / 3
completion rate:  25 / 3
[4.796      4.9        4.764      4.61333333 4.756     ]


In [98]:
# Q12，比较theta随机化的范围对结果的影响
r = [0, 0.1, 10, 100, 1000]
eout = np.zeros((len(r),))
for i in range(50):
    if (i+1)%5 == 0:
        print("completion rate: ",i+1,"/ 50")
    for j in range(len(r)):
        network=CNN(3,r[j],0.1,50000)
        theta=network.updateTheta(train_X,train_y)
        eout[j] += network.errfun(test_X, test_y, theta)
print(eout / 50)

completion rate:  5 / 20
completion rate:  10 / 20
completion rate:  15 / 20
completion rate:  20 / 20
[0.4888 0.5008 0.4418 0.4234 0.4526]


In [111]:
# Q13 比较学习率对结果的影响
eta = [0.001, 0.01, 0.1, 1, 10]
eout = np.zeros((len(eta),))
for i in range(50):
    if (i+1)%5 == 0:
        print("completion rate: ",i+1,"/ 50")
    for j in range(len(eta)):
        network=CNN(3,0.1,eta[j],50000)
        theta=network.updateTheta(train_X,train_y)
        eout[j] += network.errfun(test_X, test_y, theta)
print(eout / 50)

completion rate:  5 / 50
completion rate:  10 / 50
completion rate:  15 / 50
completion rate:  20 / 50
completion rate:  25 / 50
completion rate:  30 / 50
completion rate:  35 / 50
completion rate:  40 / 50
completion rate:  45 / 50
completion rate:  50 / 50
[0.62256 0.49632 0.49024 0.45712 0.45424]


In [None]:
#扩展网络，将其变为d−8−3−1型的神经网络，其他与之前网络均类似
# 多层神经网络
def nnetwork2hidden(X, Y, d1, d2, T):
    row, col = X.shape
    theta1 = np.random.uniform(-0.1, 0.1, (col, d1))
    theta2 = np.random.uniform(-0.1, 0.1, (d1+1, d2))
    theta3 = np.random.uniform(-0.1, 0.1, (d2+1, 1))
    for i in range(T):
        # 前向传播
        randpos = np.random.randint(0, row)
        xone = X[randpos: randpos+1, :]
        yone = Y[randpos]
        s1 = xone.dot(theta1)
        x1 = np.tanh(s1)
        x1 = np.c_[np.ones((1, 1)), x1]
        s2 = x1.dot(theta2)
        x2 = np.tanh(s2)
        x2 = np.c_[np.ones((1, 1)), x2]
        s3 = x2.dot(theta3)
        x3 = np.tanh(s3)[0][0]
        delta3 = -2*(yone-x3)
        delta2 = delta3*theta3[1:, :].T*dertanh(s2)
        delta1 = delta2.dot(theta2[1:, :].T)*dertanh(s1)
        theta3 -= 0.01*x2.T*delta3
        theta2 -= 0.01*x1.T*delta2
        theta1 -= 0.01*xone.T.dot(delta1)
    return theta1, theta2, theta3

In [None]:
# Q14,设置r=0.1,η=0.01，看看该多层神经网络怎么样
eout = 0
for i in ran，看看该多层神经网络怎么样ge(50):
    theta1, theta2, theta3 = nnetwork2hidden(X, Y, 8, 3, 50000)
    theta = [theta1, theta2, theta3]
    eout += errfun(Xtest, Ytest, theta)
print(eout/50)