In [1]:
import torch
import torch.nn as nn
## to clarify the following question
## does batchnorm2d need weight, bias, running var, running mean for inference?


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Create a simple dataset
# Assume you have 100 samples with shape (1, 1, 4, 4)
# Each sample is a random tensor and the label is randomly 0 or 1
X = torch.randn(100, 1, 4, 4)  # Input data
y = torch.randint(0, 2, (100,))  # Labels: 0 or 1 for 2 classes

# Create a DataLoader for the training data
dataset = TensorDataset(X, y)
train_loader = DataLoader(dataset, batch_size=8, shuffle=True)

# Define a simple neural network
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 5, kernel_size=2, padding=0)
        self.fc1 = nn.Linear(5*3*3, 32)
        self.fc2 = nn.Linear(32, 2)
        self.bn=nn.BatchNorm2d(5)
    def forward(self, x):
        print('input ', x)
        x = self.conv1(x)
        print('after conv ',x)
        x = self.bn(x)
        print('after bn ', x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the model, define the loss function and the optimizer
model = SimpleNet()
model.train()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Print statistics
        running_loss += loss.item()
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}')

print('Training completed.')


input  tensor([[[[ 1.6043,  0.5144,  0.9251,  0.6100],
          [ 0.8714, -0.9061,  0.0516, -0.1753],
          [ 0.1364,  1.0469,  0.5141,  0.0853],
          [ 0.4005,  0.5466,  0.7327, -2.0647]]],


        [[[ 0.4069, -1.1692, -1.3030,  0.2557],
          [-0.3764,  0.6163,  0.3434,  0.7476],
          [-1.6031, -1.0823, -0.4912,  2.2541],
          [-1.6909, -0.2388,  0.6077, -1.7233]]],


        [[[-0.7837,  1.5876,  0.4674, -1.2671],
          [ 1.0338, -1.1834,  0.6811, -1.3463],
          [-0.4633,  1.8787,  1.8904,  0.7148],
          [ 1.2089,  0.6662, -0.9508,  0.3406]]],


        [[[-0.3101,  0.6381,  0.8811, -1.3285],
          [ 0.3927,  1.0629,  1.8784, -0.2823],
          [-1.0077, -1.1627,  0.6188,  1.5428],
          [-0.8968, -0.9647,  2.1878, -0.6009]]],


        [[[ 1.2556,  1.4985, -1.4034,  0.0220],
          [-0.7165,  0.8852,  0.6385,  1.0377],
          [ 0.0127,  0.8899,  1.2042, -0.3971],
          [-0.1541,  0.1557,  0.7384,  0.2908]]],


        [[[ 0

In [5]:
param_train=model.state_dict()
for param_tensor in param_train:
    print(param_tensor, "\t", model.state_dict()[param_tensor])

conv1.weight 	 tensor([[[[-0.0592, -0.4612],
          [ 0.3614, -0.3047]]],


        [[[-0.3991,  0.3419],
          [ 0.0810, -0.0641]]],


        [[[-0.0697,  0.0165],
          [ 0.2275,  0.4673]]],


        [[[-0.4836,  0.3153],
          [-0.0036,  0.4338]]],


        [[[ 0.2416, -0.3558],
          [-0.2597, -0.1830]]]])
conv1.bias 	 tensor([-0.4969, -0.2724,  0.1204,  0.0274, -0.1125])
fc1.weight 	 tensor([[ 0.1076,  0.0179,  0.1383,  ..., -0.0846, -0.0677,  0.1551],
        [-0.0678,  0.0708, -0.1192,  ..., -0.0134,  0.1634, -0.0557],
        [-0.1221,  0.0036,  0.0444,  ...,  0.0932, -0.0379, -0.0925],
        ...,
        [-0.0686, -0.0528,  0.0043,  ...,  0.0115,  0.0654, -0.0476],
        [-0.0620,  0.1181, -0.0341,  ..., -0.0054, -0.1274, -0.0915],
        [ 0.0034,  0.1550, -0.1360,  ...,  0.0467, -0.0595, -0.0329]])
fc1.bias 	 tensor([ 0.0978,  0.0640, -0.0524, -0.0297, -0.0683, -0.1069,  0.1167, -0.0784,
        -0.0906,  0.1101, -0.0015, -0.1270, -0.1234,  0.2053,

In [6]:
model.training

True

In [7]:
model.eval()
model.training

False

In [8]:

# dummy data 
x=torch.randn(1,1,4,4)
print(x)
y=model(x)
print(y)

# parameters exported 
params_test = model.state_dict()

for param_tensor in params_test:
    print(param_tensor, "\t", model.state_dict()[param_tensor])

tensor([[[[-0.0151, -0.2458,  0.4091,  2.0912],
          [ 0.9502, -0.6538,  1.3181, -0.9059],
          [-2.3296,  1.1681,  0.8635, -0.7372],
          [ 0.3078, -1.9847, -1.6476,  0.2364]]]])
input  tensor([[[[-0.0151, -0.2458,  0.4091,  2.0912],
          [ 0.9502, -0.6538,  1.3181, -0.9059],
          [-2.3296,  1.1681,  0.8635, -0.7372],
          [ 0.3078, -1.9847, -1.6476,  0.2364]]]])
after conv  tensor([[[[ 0.1600, -1.3090, -0.7333],
          [-1.4495, -0.9071,  0.3796],
          [-0.1818, -1.1796, -0.8755]],

         [[-0.2316, -0.1718,  0.4440],
          [-1.1386,  0.4784, -0.9911],
          [ 1.2088, -0.4986, -1.0176]],

         [[ 0.0280,  0.6115,  0.0030],
          [ 0.0593,  0.8569, -0.1344],
          [-0.5555, -1.1681, -0.2163]],

         [[-0.3297,  0.8494,  0.0913],
          [-0.1232,  1.1296, -1.2185],
          [ 0.6603, -0.9730, -0.5142]],

         [[-0.1559, -0.3889, -0.9343],
          [ 0.7409, -1.2009,  0.4389],
          [-0.8076,  0.6795,  0.7430]