In [1]:
import torch

In [2]:
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

# Randomly initialize weights
w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)

    # Update weights using gradient descent
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2


0 23752898.0
1 16838880.0
2 13124202.0
3 10682584.0
4 8768808.0
5 7127164.5
6 5682423.0
7 4449930.5
8 3434524.25
9 2631951.25
10 2012700.875
11 1546348.5
12 1198327.5
13 940111.8125
14 747551.3125
15 603231.375
16 493741.6875
17 409893.625
18 344634.125
19 293174.25
20 251877.984375
21 218282.75
22 190577.84375
23 167504.09375
24 148049.5
25 131484.359375
26 117279.03125
27 105005.7109375
28 94353.125
29 85021.5078125
30 76812.7734375
31 69559.53125
32 63131.92578125
33 57414.0703125
34 52315.484375
35 47750.08203125
36 43651.59375
37 39963.390625
38 36640.71875
39 33638.87109375
40 30923.888671875
41 28462.931640625
42 26228.046875
43 24201.375
44 22356.931640625
45 20675.953125
46 19138.091796875
47 17730.728515625
48 16441.802734375
49 15260.6689453125
50 14175.939453125
51 13178.365234375
52 12261.521484375
53 11416.15625
54 10635.3544921875
55 9914.1982421875
56 9247.66796875
57 8631.095703125
58 8059.904296875
59 7530.8173828125
60 7039.208984375
61 6583.15478515625
62 6159.65673

In [3]:
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs.
# Setting requires_grad=False indicates that we do not need to compute gradients
# with respect to these Tensors during the backward pass.
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

# Create random Tensors for weights.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y using operations on Tensors; these
    # are exactly the same operations we used to compute the forward pass using
    # Tensors, but we do not need to keep references to intermediate values since
    # we are not implementing the backward pass by hand.
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the a scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call w1.grad and w2.grad will be Tensors holding the gradient
    # of the loss with respect to w1 and w2 respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    # An alternative way is to operate on weight.data and weight.grad.data.
    # Recall that tensor.data gives a tensor that shares the storage with
    # tensor, but doesn't track history.
    # You can also use torch.optim.SGD to achieve this.
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()

0 31206054.0
1 25461074.0
2 24892312.0
3 25100538.0
4 23465350.0
5 19036628.0
6 13305037.0
7 8241092.5
8 4828962.5
9 2865948.75
10 1814080.875
11 1249799.25
12 932117.0
13 738559.6875
14 609588.0
15 516414.09375
16 444632.53125
17 387013.15625
18 339524.75
19 299705.75
20 265867.125
21 236829.84375
22 211719.921875
23 189898.15625
24 170840.84375
25 154245.484375
26 139632.8125
27 126704.734375
28 115238.4921875
29 105024.640625
30 95900.0546875
31 87734.90625
32 80393.140625
33 73782.234375
34 67814.8203125
35 62415.41015625
36 57527.17578125
37 53094.9921875
38 49059.7421875
39 45387.0546875
40 42033.0078125
41 38966.62890625
42 36162.69140625
43 33593.296875
44 31234.595703125
45 29066.802734375
46 27069.75
47 25227.17578125
48 23526.171875
49 21953.875
50 20498.923828125
51 19152.78125
52 17904.01171875
53 16746.921875
54 15672.5966796875
55 14674.455078125
56 13746.029296875
57 12882.326171875
58 12078.787109375
59 11330.224609375
60 10636.4453125
61 9988.5634765625
62 9383.759765

498 0.00020983633294235915
499 0.00020643448806367815


In [4]:
from colour_dict import colour_dict
from evaluation import name_to_rgb
import numpy as np
import random
non_blue = []
for colour, values in colour_dict.items():
    if colour == 'blue':
        blue = np.array(list((map(name_to_rgb, values))))
    else:
        non_blue.extend(list((map(name_to_rgb, values))))
non_blue = np.array(non_blue)
labels = [1]*len(blue) + [0]*len(non_blue)
data = np.concatenate([blue, non_blue], axis=0)
c = list(zip(data, labels))
random.shuffle(c)
data, labels = zip(*c)


In [5]:
labels = [[l] for l in labels]

In [6]:
N, D_in, H, D_out = 1, 3, 10, 1

In [7]:
dtype = torch.float
x = torch.tensor(data, dtype=dtype)
y = torch.tensor(labels, dtype=dtype)

In [8]:
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

In [9]:
learning_rate = 1e-5
for t in range(500):
    # Forward pass: compute predicted y using operations on Tensors; these
    # are exactly the same operations we used to compute the forward pass using
    # Tensors, but we do not need to keep references to intermediate values since
    # we are not implementing the backward pass by hand.
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the a scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call w1.grad and w2.grad will be Tensors holding the gradient
    # of the loss with respect to w1 and w2 respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    # An alternative way is to operate on weight.data and weight.grad.data.
    # Recall that tensor.data gives a tensor that shares the storage with
    # tensor, but doesn't track history.
    # You can also use torch.optim.SGD to achieve this.
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()

0 16.51317024230957
1 16.430273056030273
2 16.34917640686035
3 16.269832611083984
4 16.192203521728516
5 16.116254806518555
6 16.041940689086914
7 15.969230651855469
8 15.898088455200195
9 15.82846736907959
10 15.76034164428711
11 15.693679809570312
12 15.628440856933594
13 15.564589500427246
14 15.502103805541992
15 15.44095230102539
16 15.381096839904785
17 15.322514533996582
18 15.265174865722656
19 15.209038734436035
20 15.154094696044922
21 15.100306510925293
22 15.047647476196289
23 14.996094703674316
24 14.945624351501465
25 14.89620590209961
26 14.847823143005371
27 14.800446510314941
28 14.754055976867676
29 14.708620071411133
30 14.664132118225098
31 14.620556831359863
32 14.577885627746582
33 14.536087036132812
34 14.495145797729492
35 14.455046653747559
36 14.415766716003418
37 14.377280235290527
38 14.339578628540039
39 14.302642822265625
40 14.266448974609375
41 14.230989456176758
42 14.196239471435547
43 14.162189483642578
44 14.128816604614258
45 14.096111297607422
46 1

439 11.334063529968262
440 11.330682754516602
441 11.32730484008789
442 11.323927879333496
443 11.320552825927734
444 11.317181587219238
445 11.313814163208008
446 11.310450553894043
447 11.307084083557129
448 11.30372428894043
449 11.300366401672363
450 11.297008514404297
451 11.293659210205078
452 11.290308952331543
453 11.286958694458008
454 11.283615112304688
455 11.28027057647705
456 11.276930809020996
457 11.273590087890625
458 11.270255088806152
459 11.266923904418945
460 11.263593673706055
461 11.260265350341797
462 11.256938934326172
463 11.25361442565918
464 11.250293731689453
465 11.246975898742676
466 11.243661880493164
467 11.240348815917969
468 11.237037658691406
469 11.233728408813477
470 11.230422019958496
471 11.227119445800781
472 11.223816871643066
473 11.2205171585083
474 11.217220306396484
475 11.2139253616333
476 11.2106351852417
477 11.207345962524414
478 11.204057693481445
479 11.200774192810059
480 11.197494506835938
481 11.194214820861816
482 11.19093513488769

In [10]:
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)
loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-4
for t in range(500):
    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(x)
    print(y_pred.shape)
    print(y.shape)
    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad


torch.Size([42, 1])
torch.Size([42, 1])
0 7.628746509552002
torch.Size([42, 1])
torch.Size([42, 1])
1 7.618115425109863
torch.Size([42, 1])
torch.Size([42, 1])
2 7.607580661773682
torch.Size([42, 1])
torch.Size([42, 1])
3 7.597138404846191
torch.Size([42, 1])
torch.Size([42, 1])
4 7.58678674697876
torch.Size([42, 1])
torch.Size([42, 1])
5 7.576523303985596
torch.Size([42, 1])
torch.Size([42, 1])
6 7.56634521484375
torch.Size([42, 1])
torch.Size([42, 1])
7 7.55625057220459
torch.Size([42, 1])
torch.Size([42, 1])
8 7.546236991882324
torch.Size([42, 1])
torch.Size([42, 1])
9 7.53630256652832
torch.Size([42, 1])
torch.Size([42, 1])
10 7.526444435119629
torch.Size([42, 1])
torch.Size([42, 1])
11 7.516661643981934
torch.Size([42, 1])
torch.Size([42, 1])
12 7.5069355964660645
torch.Size([42, 1])
torch.Size([42, 1])
13 7.497050762176514
torch.Size([42, 1])
torch.Size([42, 1])
14 7.487235069274902
torch.Size([42, 1])
torch.Size([42, 1])
15 7.477486610412598
torch.Size([42, 1])
torch.Size([42, 1

torch.Size([42, 1])
torch.Size([42, 1])
164 6.351039409637451
torch.Size([42, 1])
torch.Size([42, 1])
165 6.34438419342041
torch.Size([42, 1])
torch.Size([42, 1])
166 6.337742328643799
torch.Size([42, 1])
torch.Size([42, 1])
167 6.331112384796143
torch.Size([42, 1])
torch.Size([42, 1])
168 6.324495315551758
torch.Size([42, 1])
torch.Size([42, 1])
169 6.317890167236328
torch.Size([42, 1])
torch.Size([42, 1])
170 6.311221599578857
torch.Size([42, 1])
torch.Size([42, 1])
171 6.304564952850342
torch.Size([42, 1])
torch.Size([42, 1])
172 6.2979207038879395
torch.Size([42, 1])
torch.Size([42, 1])
173 6.29128885269165
torch.Size([42, 1])
torch.Size([42, 1])
174 6.284668922424316
torch.Size([42, 1])
torch.Size([42, 1])
175 6.278061389923096
torch.Size([42, 1])
torch.Size([42, 1])
176 6.271462440490723
torch.Size([42, 1])
torch.Size([42, 1])
177 6.264867305755615
torch.Size([42, 1])
torch.Size([42, 1])
178 6.258284091949463
torch.Size([42, 1])
torch.Size([42, 1])
179 6.251712799072266
torch.Siz

torch.Size([42, 1])
torch.Size([42, 1])
306 5.4923624992370605
torch.Size([42, 1])
torch.Size([42, 1])
307 5.48708963394165
torch.Size([42, 1])
torch.Size([42, 1])
308 5.481823444366455
torch.Size([42, 1])
torch.Size([42, 1])
309 5.476564884185791
torch.Size([42, 1])
torch.Size([42, 1])
310 5.4713134765625
torch.Size([42, 1])
torch.Size([42, 1])
311 5.46606969833374
torch.Size([42, 1])
torch.Size([42, 1])
312 5.460832595825195
torch.Size([42, 1])
torch.Size([42, 1])
313 5.455602169036865
torch.Size([42, 1])
torch.Size([42, 1])
314 5.450379371643066
torch.Size([42, 1])
torch.Size([42, 1])
315 5.445163726806641
torch.Size([42, 1])
torch.Size([42, 1])
316 5.43995475769043
torch.Size([42, 1])
torch.Size([42, 1])
317 5.434752941131592
torch.Size([42, 1])
torch.Size([42, 1])
318 5.429558753967285
torch.Size([42, 1])
torch.Size([42, 1])
319 5.424370765686035
torch.Size([42, 1])
torch.Size([42, 1])
320 5.419190406799316
torch.Size([42, 1])
torch.Size([42, 1])
321 5.4140167236328125
torch.Size(

In [11]:
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.Tanh(),
    torch.nn.Linear(H, D_out),
    torch.nn.Sigmoid(),
)
loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(500):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model(x)

    # Compute and print loss.
    loss = loss_fn(y_pred, y)
    print(t, loss.item())

    # Before the backward pass, use the optimizer object to zero all of the
    # gradients for the variables it will update (which are the learnable
    # weights of the model). This is because by default, gradients are
    # accumulated in buffers( i.e, not overwritten) whenever .backward()
    # is called. Checkout docs of torch.autograd.backward for more details.
    optimizer.zero_grad()

    # Backward pass: compute gradient of the loss with respect to model
    # parameters
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()


0 8.937456130981445
1 8.67393970489502
2 8.438857078552246
3 8.22853946685791
4 8.040599822998047
5 7.872669219970703
6 7.722585201263428
7 7.58842134475708
8 7.468462944030762
9 7.361173152923584
10 7.265145301818848
11 7.179040431976318
12 7.101541996002197
13 7.031330585479736
14 6.967087268829346
15 6.907540798187256
16 6.851527690887451
17 6.798018455505371
18 6.746119976043701
19 6.695060729980469
20 6.644177436828613
21 6.592900276184082
22 6.540735244750977
23 6.4872517585754395
24 6.432066917419434
25 6.374842643737793
26 6.3152852058410645
27 6.253146171569824
28 6.188220500946045
29 6.120344161987305
30 6.0493903160095215
31 5.975268840789795
32 5.897928237915039
33 5.817358016967773
34 5.733591079711914
35 5.646707057952881
36 5.556832313537598
37 5.464132785797119
38 5.368805885314941
39 5.271071434020996
40 5.171156883239746
41 5.069292068481445
42 4.965702056884766
43 4.8606085777282715
44 4.754233360290527
45 4.6468024253845215
46 4.53855037689209
47 4.4297261238098145


In [12]:
model(x[1])

tensor([6.8366e-06], grad_fn=<SigmoidBackward>)

In [13]:
for pred, true in zip(y_pred, y):
    print(pred, true)

tensor([8.4496e-06], grad_fn=<SelectBackward>) tensor([0.])
tensor([6.9120e-06], grad_fn=<SelectBackward>) tensor([0.])
tensor([0.0004], grad_fn=<SelectBackward>) tensor([0.])
tensor([0.0000], grad_fn=<SelectBackward>) tensor([0.])
tensor([3.8694e-06], grad_fn=<SelectBackward>) tensor([0.])
tensor([3.3834e-07], grad_fn=<SelectBackward>) tensor([0.])
tensor([0.9110], grad_fn=<SelectBackward>) tensor([1.])
tensor([0.0055], grad_fn=<SelectBackward>) tensor([0.])
tensor([3.2951e-07], grad_fn=<SelectBackward>) tensor([0.])
tensor([0.0960], grad_fn=<SelectBackward>) tensor([0.])
tensor([0.0890], grad_fn=<SelectBackward>) tensor([0.])
tensor([0.9950], grad_fn=<SelectBackward>) tensor([1.])
tensor([4.7561e-06], grad_fn=<SelectBackward>) tensor([0.])
tensor([1.6478e-06], grad_fn=<SelectBackward>) tensor([0.])
tensor([9.7925e-07], grad_fn=<SelectBackward>) tensor([0.])
tensor([1.0000], grad_fn=<SelectBackward>) tensor([1.])
tensor([0.9999], grad_fn=<SelectBackward>) tensor([1.])
tensor([0.9999],

In [14]:
from prob_model import NeuralColourModel
from evaluation import test_colour_model


In [15]:
all_data = list(zip(data,labels))

In [16]:
def draw_data():
    return random.choice(all_data)

In [17]:
fx, w = draw_data()


In [18]:
cm = NeuralColourModel('blue')

In [19]:
for i in range(500):
    fx, w = draw_data()
    cm.update(fx, w)
    if i % 10 == 0:
        results = test_colour_model(cm)
        print(results)

tensor(0.1924, grad_fn=<MseLossBackward>)
{'tp': 9, 'fp': 33, 'fn': 0, 'tn': 0}
tensor(0.3808, grad_fn=<MseLossBackward>)
tensor(0.3962, grad_fn=<MseLossBackward>)
tensor(0.3338, grad_fn=<MseLossBackward>)
tensor(0.2341, grad_fn=<MseLossBackward>)
tensor(0.3317, grad_fn=<MseLossBackward>)
tensor(0.3282, grad_fn=<MseLossBackward>)
tensor(0.3927, grad_fn=<MseLossBackward>)
tensor(0.3316, grad_fn=<MseLossBackward>)
tensor(0.3160, grad_fn=<MseLossBackward>)
tensor(0.3179, grad_fn=<MseLossBackward>)
{'tp': 9, 'fp': 33, 'fn': 0, 'tn': 0}
tensor(0.3540, grad_fn=<MseLossBackward>)
tensor(0.3510, grad_fn=<MseLossBackward>)
tensor(0.3544, grad_fn=<MseLossBackward>)
tensor(0.3352, grad_fn=<MseLossBackward>)
tensor(0.4098, grad_fn=<MseLossBackward>)
tensor(0.3488, grad_fn=<MseLossBackward>)
tensor(0.3543, grad_fn=<MseLossBackward>)
tensor(0.3158, grad_fn=<MseLossBackward>)
tensor(0.2457, grad_fn=<MseLossBackward>)
tensor(0.2142, grad_fn=<MseLossBackward>)
{'tp': 9, 'fp': 33, 'fn': 0, 'tn': 0}
tens

{'tp': 9, 'fp': 33, 'fn': 0, 'tn': 0}
tensor(0.3376, grad_fn=<MseLossBackward>)
tensor(0.2472, grad_fn=<MseLossBackward>)
tensor(0.3159, grad_fn=<MseLossBackward>)
tensor(0.3906, grad_fn=<MseLossBackward>)
tensor(0.3376, grad_fn=<MseLossBackward>)
tensor(0.3490, grad_fn=<MseLossBackward>)
tensor(0.2154, grad_fn=<MseLossBackward>)
tensor(0.3159, grad_fn=<MseLossBackward>)
tensor(0.3420, grad_fn=<MseLossBackward>)
tensor(0.3874, grad_fn=<MseLossBackward>)
{'tp': 9, 'fp': 33, 'fn': 0, 'tn': 0}
tensor(0.1940, grad_fn=<MseLossBackward>)
tensor(0.3483, grad_fn=<MseLossBackward>)
tensor(0.1967, grad_fn=<MseLossBackward>)
tensor(0.1877, grad_fn=<MseLossBackward>)
tensor(0.3520, grad_fn=<MseLossBackward>)
tensor(0.2158, grad_fn=<MseLossBackward>)
tensor(0.3217, grad_fn=<MseLossBackward>)
tensor(0.3420, grad_fn=<MseLossBackward>)
tensor(0.1967, grad_fn=<MseLossBackward>)
tensor(0.3491, grad_fn=<MseLossBackward>)
{'tp': 9, 'fp': 33, 'fn': 0, 'tn': 0}
tensor(0.1877, grad_fn=<MseLossBackward>)
tens

{'tp': 9, 'fp': 33, 'fn': 0, 'tn': 0}
tensor(0.2374, grad_fn=<MseLossBackward>)
tensor(0.2173, grad_fn=<MseLossBackward>)
tensor(0.3325, grad_fn=<MseLossBackward>)
tensor(0.1953, grad_fn=<MseLossBackward>)
tensor(0.3501, grad_fn=<MseLossBackward>)
tensor(0.3763, grad_fn=<MseLossBackward>)
tensor(0.3494, grad_fn=<MseLossBackward>)
tensor(0.3500, grad_fn=<MseLossBackward>)
tensor(0.3471, grad_fn=<MseLossBackward>)
tensor(0.2170, grad_fn=<MseLossBackward>)
{'tp': 9, 'fp': 33, 'fn': 0, 'tn': 0}
tensor(0.3297, grad_fn=<MseLossBackward>)
tensor(0.3352, grad_fn=<MseLossBackward>)
tensor(0.3164, grad_fn=<MseLossBackward>)
tensor(0.3884, grad_fn=<MseLossBackward>)
tensor(0.3313, grad_fn=<MseLossBackward>)
tensor(0.2174, grad_fn=<MseLossBackward>)
tensor(0.3469, grad_fn=<MseLossBackward>)
tensor(0.1954, grad_fn=<MseLossBackward>)
tensor(0.3884, grad_fn=<MseLossBackward>)
tensor(0.3884, grad_fn=<MseLossBackward>)
{'tp': 9, 'fp': 33, 'fn': 0, 'tn': 0}
tensor(0.3461, grad_fn=<MseLossBackward>)
tens