In [1]:
import torch
from torch import nn

In [3]:
net=nn.Sequential(
    nn.Conv2d(1,6,kernel_size=5,padding=2),nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2,stride=2),
    nn.Conv2d(6,16,kernel_size=5),nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2,stride=2),
    nn.Flatten(),
    nn.Linear(16*5*5,120),nn.Sigmoid(),
    nn.Linear(120,84),nn.Sigmoid(),
    nn.Linear(84,10)
)

In [5]:
X=torch.rand(size=(1,1,28,28),dtype=torch.float32)
for layer in net:
    X=layer(X)
    print(layer.__class__.__name__,'output shape : \t',X.shape)

Conv2d output shape : 	 torch.Size([1, 6, 28, 28])
Sigmoid output shape : 	 torch.Size([1, 6, 28, 28])
AvgPool2d output shape : 	 torch.Size([1, 6, 14, 14])
Conv2d output shape : 	 torch.Size([1, 16, 10, 10])
Sigmoid output shape : 	 torch.Size([1, 16, 10, 10])
AvgPool2d output shape : 	 torch.Size([1, 16, 5, 5])
Flatten output shape : 	 torch.Size([1, 400])
Linear output shape : 	 torch.Size([1, 120])
Sigmoid output shape : 	 torch.Size([1, 120])
Linear output shape : 	 torch.Size([1, 84])
Sigmoid output shape : 	 torch.Size([1, 84])
Linear output shape : 	 torch.Size([1, 10])


In [9]:
batch_size=256
train_iter,test_iter=

('0.weight', Parameter containing:
tensor([[[[ 1.1942e-01,  1.7167e-01,  4.8540e-02, -2.6645e-02, -1.4289e-01],
          [ 3.4561e-02, -1.3729e-01, -8.7327e-02,  5.4697e-02,  1.0098e-02],
          [ 3.2080e-03,  1.3585e-01,  6.2651e-02,  9.8863e-02, -3.3620e-02],
          [-7.5267e-02, -9.6052e-02, -1.2625e-02,  1.9905e-01, -8.4561e-02],
          [ 3.1591e-02,  1.7671e-01, -1.7678e-01, -3.5893e-03,  1.9120e-01]]],


        [[[-7.9199e-02,  9.1871e-02, -8.1361e-02, -1.1113e-01, -1.6436e-01],
          [ 1.9934e-01,  4.9690e-02, -1.8053e-01,  1.4280e-01, -1.0225e-01],
          [-6.8607e-03, -7.3043e-02,  1.7045e-01, -3.4399e-03, -4.4045e-02],
          [-1.7724e-01,  8.8789e-02, -1.2791e-04,  1.6151e-01, -1.5816e-01],
          [ 3.1847e-02, -1.8677e-01, -9.3110e-02,  4.6811e-02,  1.6406e-01]]],


        [[[ 7.9657e-02, -1.8574e-01, -1.0006e-01,  1.7286e-01,  1.0487e-01],
          [-1.2398e-01, -1.3908e-01, -1.8012e-01,  4.4638e-02,  8.5963e-02],
          [-1.8000e-01,  8.8959e-

In [13]:
import torchvision
from torchvision import transforms

In [14]:
batch_size=256
mnist_train=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=True,
    download=True,transform=transforms.ToTensor())
mnist_test=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=False,
    download=True,transform=transforms.ToTensor())

In [17]:
train_iter=torch.utils.data.DataLoader(mnist_train,batch_size,shuffle=True)
test_iter=torch.utils.data.DataLoader(mnist_test,batch_size,shuffle=False)

In [64]:
def evaluate_accuracy_gpu(net,data_iter,device=None):
    if isinstance(net,nn.Module):
        net.eval()
        if not device:
            device=next(iter(net.parameters())).device
    l_sum=[]
    for X,y in data_iter:
        y_hat=net(X)
        l_sum.append( (sum(torch.argmax(y_hat,dim=1).reshape(y.shape) == y)/y.shape[0]).item())
    return sum(l_sum)/len(l_sum)

In [65]:
def train_ch6(net,train_iter,test_iter,num_epochs,lr,device):
    def init_weight(m):
        if type(m)==nn.Linear or type(m)==nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weight)
    print('training on',device)
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss=nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        tr_l=[]
        tr_acc=[]
        net.train()
        for X,y in train_iter:
            optimizer.zero_grad()
            X,y=X.to(device),y.to(device)
            y_hat=net(X)
#             print(y_hat)
            l=loss(y_hat,y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                tr_l.append(l.item())
#                 print(y.shape)
#                 print(y_hat.shape)
#                 print(y_hat)
#                 print(y)
#                 print(torch.argmax(y_hat,dim=1))
                tr_acc.append( (sum(torch.argmax(y_hat,dim=1).reshape(y.shape) == y)/y.shape[0]).item())
        test_acc=evaluate_accuracy_gpu(net,test_iter)
        print('epoch : ',epoch ,' train loss : ',sum(tr_l)/len(tr_l),' train acc : ', sum(tr_acc)/len(tr_acc),'test acc : ',test_acc)

In [66]:
lr, num_epochs = 0.9, 10
train_ch6(net, train_iter, test_iter, num_epochs, lr, 'cpu')

training on cpu
epoch :  0  train loss :  2.310947549089472  train acc :  0.10733599288666502 test acc :  0.2197265625
epoch :  1  train loss :  1.2609370756656566  train acc :  0.49905806729134095 test acc :  0.54873046875
epoch :  2  train loss :  0.8356376358803282  train acc :  0.6674977837724888 test acc :  0.7189453125
epoch :  3  train loss :  0.6915651800784659  train acc :  0.7290835550490846 test acc :  0.74228515625
epoch :  4  train loss :  0.6217201315342111  train acc :  0.7574357268658091 test acc :  0.7541015625
epoch :  5  train loss :  0.5742680389830407  train acc :  0.7780640513338941 test acc :  0.75986328125
epoch :  6  train loss :  0.5341895173204706  train acc :  0.7926640071767441 test acc :  0.7923828125
epoch :  7  train loss :  0.5033415785495271  train acc :  0.8083167109083622 test acc :  0.7853515625
epoch :  8  train loss :  0.4785179874998458  train acc :  0.8184563385679366 test acc :  0.7890625
epoch :  9  train loss :  0.45818831261168136  train acc

In [63]:
torch.Tensor([[1]]).item()

1.0