In [2]:
import torch

class Matmul(torch.nn.Module):
    def __init__(self, shape):
        super().__init__()
        self.w = torch.nn.Parameter(torch.randn(shape, requires_grad=True))
        self.b = torch.nn.Parameter(torch.randn(shape, requires_grad=True))

    def __call__(self, x):
        z = torch.matmul(x, self.w) + self.b
        #z = x @ self.w + self.b
        return z

x = torch.ones((5, 5))
y = torch.zeros((5, 5))

model = Matmul((5, 5))

lr = 1

for i in range(10):
    pred_y = model(x)

    loss = torch.nn.functional.binary_cross_entropy_with_logits(pred_y, y)
    loss.backward()

    model.w.data = model.w.data - lr * model.b.grad
    model.b.data = model.b.data - lr * model.b.grad

    #直替换w, b的值，会导致w，b脱离计算图，无法进行反向传播
    #改变其值只需要改变其data的值即可

    model.w.grad.zero_()
    model.b.grad.zero_()

    print(loss)

tensor(1.4089, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(1.2938, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(1.1877, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(1.0903, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(1.0012, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.9201, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.8464, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7796, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7192, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.6647, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)


In [16]:
import torch
import random

class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv = torch.nn.Conv2d(1, 1, (5, 5))

    def forward(self, x):
        return self.conv(x)

x1 =  torch.ones((1, 5, 5)).to(torch.float32)
x2 = torch.zeros((1, 5, 5)).to(torch.float32)

y1 =  torch.ones((1, 1, 1)).to(torch.float32)
y2 = torch.zeros((1, 1, 1)).to(torch.float32)

learning_rate = 0.01

model = Model()
criterion = torch.nn.MSELoss(reduction='sum')

model.train()
for i in range(50):
    model.zero_grad()
    x, y = (x1, y1) if random.randint(0,1) else (x2, y2)
    pred_y = model(x)
    loss = criterion(pred_y, y)
    loss.backward()
    
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad
    if i % 5 == 0:
        print(f"%.4f %.1f %.2f" % (loss.item(), y.mean(), pred_y.mean()))


0.0097 0.0 -0.10
0.0072 0.0 -0.08
0.0176 1.0 0.87
0.0009 1.0 0.97
0.0000 1.0 0.99
0.0050 0.0 -0.07
0.0047 0.0 -0.07
0.0045 0.0 -0.07
0.0041 0.0 -0.06
0.0000 1.0 1.01
0.0037 0.0 -0.06
0.0000 1.0 1.01
0.0034 0.0 -0.06
0.0000 1.0 1.00
0.0000 1.0 1.00
0.0000 1.0 1.00
0.0032 0.0 -0.06
0.0030 0.0 -0.06
0.0028 0.0 -0.05
0.0027 0.0 -0.05
0.0026 0.0 -0.05
0.0000 1.0 1.00
0.0025 0.0 -0.05
0.0000 1.0 1.00
0.0023 0.0 -0.05


In [47]:
#Need to optimization

import torch
import random

cuda0 = torch.device('cuda:0')
cuda1 = torch.device('cuda:1')  

class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.root1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 1, (5, 5)),
        )
        self.root2 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 1, (5, 5)),
        )
        
    def forward(self, x):
        root1 = self.root1(x).to(cuda0)
        root2 = self.root2(x).to(cuda1)
        fusion = root1 + root2.to(cuda0)

        # root1  --> cuda0
        # root2  --> cuda1
        # fusion --> cuda0

        return [root1, root2, fusion]

learning_rate = 0.01

model = Model()
optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss(reduction='sum')

model.train()
for i in range(500):
    model.zero_grad()

    if random.randint(0, 10) % 2 == 0:
        x  = torch.ones((1, 5, 5)).to(torch.float32)
        y1 = torch.ones((1, 1, 1)).to(torch.float32).to(cuda0)
        y2 =torch.zeros((1, 1, 1)).to(torch.float32).to(cuda1)
    else:
        x =  torch.zeros((1, 5, 5)).to(torch.float32)
        y1 = torch.zeros((1, 1, 1)).to(torch.float32).to(cuda0)
        y2 =  torch.ones((1, 1, 1)).to(torch.float32).to(cuda1)

    r1, r2, r12 = model(x)
    loss1 = criterion(r1, y1)
    loss2 = criterion(r2, y2).to(cuda0)
    loss =  loss1 + loss2

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if i % 50 == 0:
        print(f"%.4f %.4f [%.2f %.2f] [%.2f %.2f]" % (loss1.item(), loss2.item(), y1.mean(), r1.mean(), y2.mean(), r2.mean()))


0.0101 0.8066 [0.00 0.10] [1.00 0.10]
0.0077 0.3236 [0.00 0.09] [1.00 0.43]
0.0000 0.0002 [1.00 1.00] [0.00 0.02]
0.0010 0.0414 [0.00 0.03] [1.00 0.80]
0.0000 0.0000 [1.00 1.00] [0.00 0.00]
0.0002 0.0088 [0.00 0.01] [1.00 0.91]
0.0001 0.0037 [0.00 0.01] [1.00 0.94]
0.0000 0.0000 [1.00 1.00] [0.00 0.00]
0.0000 0.0006 [0.00 0.00] [1.00 0.97]
0.0000 0.0000 [1.00 1.00] [0.00 0.00]
