In [1]:
import argparse
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

In [10]:
? nn.BatchNorm2d

[0;31mInit signature:[0m
 [0mnn[0m[0;34m.[0m[0mBatchNorm2d[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mnum_features[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0meps[0m[0;34m=[0m[0;36m1e-05[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmomentum[0m[0;34m=[0m[0;36m0.1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maffine[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtrack_running_stats[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Applies Batch Normalization over a 4D input (a mini-batch of 2D inputs
with additional channel dimension) as described in the paper
`Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift`_ .

.. math::

    y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

The mean and standard-deviation are calculated per-dimension over
the mini-batches and :math:`\gamma` and :mat

In [38]:
class smallNet(nn.Module):

    def __init__(self):
        super(smallNet, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv = nn.Conv2d(3, 64, 3)
        # batch         
        self.batch = nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        # relu
        self.relu = nn.ReLU()
        # max pooling        
        self.maxpool = nn.MaxPool2d(2)
#         # full connected         
#         self.fc = nn.Linear(14400, 1)

    def forward(self, x):
        print(x.size())
        x = self.conv(x)
        print(x.size())
        x = self.batch(x)
        print(x.size())
        x = self.relu(x)
        print(x.size())
        x = self.maxpool(x)
        print(x.size())
        x = x.view(-1, self.num_flat_features(x))
        print(x.size())
        return nn.Linear(x.size()[1], 1)(x)
#         # Max pooling over a (2, 2) window
#         x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
#         # If the size is a square you can only specify a single number
#         x = F.max_pool2d(F.relu(self.conv2(x)), 2)
#         x = x.view(-1, self.num_flat_features(x))
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = self.fc3(x)
#         return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = smallNet()
print(net)

smallNet(
  (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (batch): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


In [39]:
params = list(net.parameters())
for param in params:
    print(param.size())

torch.Size([64, 3, 3, 3])
torch.Size([64])
torch.Size([64])
torch.Size([64])


In [43]:
input = torch.randn(3, 3, 32, 32)
out = net(input)
print(out)

torch.Size([3, 3, 32, 32])
torch.Size([3, 64, 30, 30])
torch.Size([3, 64, 30, 30])
torch.Size([3, 64, 30, 30])
torch.Size([3, 64, 15, 15])
torch.Size([3, 14400])
tensor([[0.0154],
        [0.0214],
        [0.2525]], grad_fn=<AddmmBackward>)


In [5]:
?nn.Dropout

[0;31mInit signature:[0m [0mnn[0m[0;34m.[0m[0mDropout[0m[0;34m([0m[0mp[0m[0;34m=[0m[0;36m0.5[0m[0;34m,[0m [0minplace[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
During training, randomly zeroes some of the elements of the input
tensor with probability :attr:`p` using samples from a Bernoulli
distribution. Each channel will be zeroed out independently on every forward
call.

This has proven to be an effective technique for regularization and
preventing the co-adaptation of neurons as described in the paper
`Improving neural networks by preventing co-adaptation of feature
detectors`_ .

Furthermore, the outputs are scaled by a factor of :math:`\frac{1}{1-p}` during
training. This means that during evaluation the module simply computes an
identity function.

Args:
    p: probability of an element to be zeroed. Default: 0.5
    inplace: If set to ``True``, will do this operation in-place. Default: ``False``

Shape:
   

In [1]:
from datetime import datetime,timezone,timedelta
dt = datetime.utcnow()
print(dt)
dt = dt.replace(tzinfo=timezone.utc)
print(dt)
tzutc_8 = timezone(timedelta(hours=8))
local_dt = dt.astimezone(tzutc_8)
print(local_dt)

2020-02-18 10:13:31.992813
2020-02-18 10:13:31.992813+00:00
2020-02-18 18:13:31.992813+08:00


In [2]:
type(local_dt)

datetime.datetime

In [4]:
print(str(local_dt))

2020-02-18 18:13:31.992813+08:00


In [4]:
checkpoint = torch.load('checkpoints/checkpoints_pure/checkpoint-2020-02-20 13:38:29.968086+08:00/312.pth.tar', map_location='cpu')

In [None]:
checkpoint

In [None]:
print(checkpoint)