In [1]:
import torch
from torch import nn,optim
from torch.nn import functional as F

In [29]:
class Residual(nn.Module):
    def __init__(self,input_channels,num_channels,use_1x1conv=False,strides=1):
        super().__init__()
        self.conv1=nn.Conv2d(input_channels,num_channels,kernel_size=3,padding=1,stride=strides)
        self.conv2=nn.Conv2d(num_channels,num_channels,kernel_size=3,padding=1)
        
        if use_1x1conv:
            self.conv3=nn.Conv2d(input_channels,num_channels,kernel_size=1,stride=strides)
        else:
            self.conv3=None
        self.bn1=nn.BatchNorm2d(num_channels)
        self.bn2=nn.BatchNorm2d(num_channels)
    
    def forward(self,X):
        print(X.shape)
        Y=F.relu(self.bn1(self.conv1(X)))
        print(Y.shape)
        Y=self.bn2(self.conv2(Y))
        print(Y.shape)
        if self.conv3:
            X=self.conv3(X)
            print(X.shape)
        Y+=X
        print(Y.shape)
        return F.relu(Y)

In [25]:
blk = Residual(3,3)
X = torch.rand(4, 3, 6, 6)
Y = blk(X)
Y.shape

torch.Size([4, 3, 6, 6])
torch.Size([4, 3, 6, 6])
torch.Size([4, 3, 6, 6])


torch.Size([4, 3, 6, 6])

In [26]:
blk = Residual(3,6, use_1x1conv=True, strides=2)
blk(X).shape

torch.Size([4, 6, 3, 3])
torch.Size([4, 6, 3, 3])
torch.Size([4, 6, 3, 3])
torch.Size([4, 6, 3, 3])


torch.Size([4, 6, 3, 3])

In [None]:
(3-3+2+2)/2  = 4,6,2,2
4,6,(2-3+2+2)/2=4,6,1,1
4,6,3,3 \ 1 * 1 = 4,6,3,3

In [14]:
def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = X.reshape((c_i, h * w))
    K = K.reshape((c_o, c_i))
    Y = torch.matmul(K, X)
    return Y.reshape((c_o, h, w))

In [15]:
X = torch.normal(0, 1, (3, 3, 3))
K = torch.normal(0, 1, (2, 3, 1, 1))

In [17]:
 corr2d_multi_in_out_1x1(X, K).shape

torch.Size([2, 3, 3])

In [18]:
X1 = torch.normal(0, 1, (4,6, 3, 3))
X2 = torch.normal(0, 1, (4,6, 1, 1))
(X1+X2).shape

torch.Size([4, 6, 3, 3])

In [28]:
class Residual2(nn.Module):
    def __init__(self,input_channels,num_channels,use_1x1conv=False,strides=1):
        super().__init__()
        self.conv1=nn.Conv2d(input_channels,num_channels,kernel_size=3,padding=1,stride=strides)
        self.conv2=nn.Conv2d(num_channels,num_channels,kernel_size=3,padding=1)
        
        if use_1x1conv:
            self.conv3=nn.Conv2d(input_channels,num_channels,kernel_size=1,stride=strides)
        else:
            self.conv3=None
        self.bn1=nn.BatchNorm2d(num_channels)
        self.bn2=nn.BatchNorm2d(num_channels)
    
    def forward(self,X):
        Y=F.relu(self.bn1(self.conv1(X)))
        Y=self.bn2(self.conv2(Y))
        if self.conv3:
            X=self.conv3(X)
#         Y+=X
        return F.relu(Y)

In [21]:
blk = Residual2(3,3)
X = torch.rand(4, 3, 6, 6)
Y = blk(X)
Y.shape

torch.Size([4, 3, 6, 6])

In [22]:
blk = Residual2(3,6, use_1x1conv=True, strides=2)
blk(X).shape

torch.Size([4, 6, 3, 3])

In [30]:
blk = Residual(3,6, use_1x1conv=True, strides=2)
blk(X).shape

torch.Size([4, 3, 6, 6])
torch.Size([4, 6, 3, 3])
torch.Size([4, 6, 3, 3])
torch.Size([4, 6, 3, 3])
torch.Size([4, 6, 3, 3])


torch.Size([4, 6, 3, 3])

4,3,6,6 =>nn.Conv2d(3,6,3,1,2)=>4,6,(6-3+2+2)/2=3,3
4,6,3,3=>nn.Conv2d(6,6,3,1)=>4,6,(3-3+2+1)/1=3=>4,6,3,3
4,3,6,6=>nn.Conv2d(3,6,1,2)=>4,6,(6-1+0+2)/2=3=>4,6,3,3

In [36]:
class Residual(nn.Module):
    def __init__(self,input_channels,num_channels,use_1x1conv=False,strides=1):
        super().__init__()
        self.conv1=nn.Conv2d(input_channels,num_channels,kernel_size=3,padding=1,stride=strides)
        self.conv2=nn.Conv2d(num_channels,num_channels,kernel_size=3,padding=1)
        
        if use_1x1conv:
            self.conv3=nn.Conv2d(input_channels,num_channels,kernel_size=1,stride=strides)
        else:
            self.conv3=None
        self.bn1=nn.BatchNorm2d(num_channels)
        self.bn2=nn.BatchNorm2d(num_channels)
    
    def forward(self,X):
#         print(X.shape)
        Y=F.relu(self.bn1(self.conv1(X)))
#         print(Y.shape)
        Y=self.bn2(self.conv2(Y))
#         print(Y.shape)
        if self.conv3:
            X=self.conv3(X)
#             print(X.shape)
        Y+=X
#         print(Y.shape)
        return F.relu(Y)

In [37]:
b1=nn.Sequential(nn.Conv2d(1,64,kernel_size=7,stride=2,padding=3),
                nn.BatchNorm2d(64),nn.ReLU(),
                nn.MaxPool2d(kernel_size=3,stride=2,padding=1))

In [38]:
def resnet_block(input_channles,num_channels,num_residuals,first_block=False):
    blk=[]
    for i in range(num_residuals):
        if i==0 and not first_block:
            blk.append(Residual(input_channles,num_channels,use_1x1conv=True,strides=2))
        else:
            blk.append(Residual(num_channels,num_channels))
    return blk

In [39]:
b2=nn.Sequential(*resnet_block(64,64,2,first_block=True))
b3=nn.Sequential(*resnet_block(64,128,2))
b4=nn.Sequential(*resnet_block(128,256,2))
b5=nn.Sequential(*resnet_block(256,512,2))

In [40]:
net=nn.Sequential(b1,b2,b3,b4,b5,nn.AdaptiveAvgPool2d((1,1)),nn.Flatten(),nn.Linear(512,10))

In [41]:
X=torch.rand((1,1,224,224))
for layer in net:
    X=layer(X)
    print(layer.__class__.__name__,'output shape : \t',X.shape)

Sequential output shape : 	 torch.Size([1, 64, 56, 56])
Sequential output shape : 	 torch.Size([1, 64, 56, 56])
Sequential output shape : 	 torch.Size([1, 128, 28, 28])
Sequential output shape : 	 torch.Size([1, 256, 14, 14])
Sequential output shape : 	 torch.Size([1, 512, 7, 7])
AdaptiveAvgPool2d output shape : 	 torch.Size([1, 512, 1, 1])
Flatten output shape : 	 torch.Size([1, 512])
Linear output shape : 	 torch.Size([1, 10])


In [45]:
def evaluate_acc_gpu(net,data_iter,device=None):
    if isinstance(net,nn.Module):
        device='cuda' if torch.cuda.is_available() else 'cpu'
        net.to(device)
        net.eval()
        l_sum=[]
        for X,y in data_iter:
            X,y=X.to(device),y.to(device)
            y_hat=net(X)
            l_sum.append((sum(torch.argmax(y_hat,dim=1).reshape(y.shape)==y) / y.shape[0]).item())
    return sum(l_sum)/len(l_sum)

In [53]:
for l in net:
    print(type(l))

<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.container.Sequential'>
<class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>
<class 'torch.nn.modules.flatten.Flatten'>
<class 'torch.nn.modules.linear.Linear'>


In [52]:
for l in net:
    print(l)

Sequential(
  (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
Sequential(
  (0): Residual(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): Residual(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 

In [54]:
def train_ch6(net,train_iter,test_iter,num_epochs,lr,device):
    def init_weight(m):
        if type(m) ==nn.Linear or type(m) ==nn.Conv2d:
            torch.nn.init.xavier_uniform_(m.weight)
    net.apply(init_weight)
    print('training on : ',device)
    net.to(device)
    loss=nn.CrossEntropyLoss()
    optimizer=optim.SGD(net.parameters(),lr=lr)
    for epoch in range(num_epochs):
        tr_l=[]
        tr_acc=[]
        net.train()
        for X,y in train_iter:
            X,y=X.to(device),y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            
            with torch.no_grad():
                tr_l.append(l.item())
                tr_acc.append((sum(torch.argmax(y_hat,dim=1).reshape(y.shape) ==y ) / y.shape[0] ).item() )
        test_acc=evaluate_acc_gpu(net,test_iter,device)
        print('epoch : ',epoch ,' train loss : ',sum(tr_l)/len(tr_l),' train acc : ', sum(tr_acc)/len(tr_acc),'test acc : ',test_acc)

In [55]:
import torchvision
from torchvision import transforms

In [56]:
trans=[transforms.ToTensor()]
trans.insert(0,transforms.Resize(96))
trans=transforms.Compose(trans)
mnist_train=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=True,
    download=True,transform=trans)
mnist_test=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=False,
    download=True,transform=trans)

In [58]:
train_iter=torch.utils.data.DataLoader(mnist_train,batch_size,shuffle=True)
test_iter=torch.utils.data.DataLoader(mnist_test,batch_size,shuffle=False)

In [59]:
lr, num_epochs, batch_size = 0.05, 10, 256
train_ch6(net, train_iter, test_iter, num_epochs, lr, 'cpu')

training on :  cpu
epoch :  0  train loss :  0.48153917890913944  train acc :  0.8324634308510638 test acc :  0.70205078125


KeyboardInterrupt: 