In [1]:
import torch
import numpy as np

In [2]:
#  通过数据直接创建tensor
data = [[1,2],[3,4]]
x_data = torch.tensor(data) 
print(x_data)

tensor([[1, 2],
        [3, 4]])


In [4]:
# 从numpy array创建
np_array = np.array(data)
x_np = torch.from_numpy(np_array)
print(x_np)

tensor([[1, 2],
        [3, 4]], dtype=torch.int32)


In [None]:
# 从另一个tensor创建
# The new tensor retains the properties (shape, datatype) of the argument tensor, unless explicitly overridden.
x_ones = torch.ones_like(x_data)
print(f"Ones Tensor: \n {x_ones} \n")
x_rand = torch.rand_like(x_data, dtype = torch.float)
print(f"Random Tensor: \n {x_rand} \n")

In [14]:
# 使用tuple创建shape
shape = (2,3,4)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[[0.1064, 0.1743, 0.8710, 0.9019],
         [0.9750, 0.8121, 0.2988, 0.0415],
         [0.0418, 0.0978, 0.2077, 0.6544]],

        [[0.6603, 0.6116, 0.8551, 0.6024],
         [0.6580, 0.3880, 0.3977, 0.2277],
         [0.3908, 0.4841, 0.1865, 0.6562]]]) 

Ones Tensor: 
 tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]]) 

Zeros Tensor: 
 tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])


In [17]:
# tensor属性
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [30]:
# tensor操作
# move to gpu
if torch.cuda.is_available():
    tensor = tensor.to('cuda')
print(f"Device tensor is stored on: {tensor.device}")

# indexing and slicing
tensor2 = torch.ones(4,4)
tensor2[1,:] = 0
print(tensor2)

# joining
t1 = torch.cat([tensor2,tensor2,tensor2],dim=1)
print(t1)
t2 = torch.stack([tensor2,tensor2,tensor2])
print(t2)

Device tensor is stored on: cuda:0
tensor([[1., 1., 1., 1.],
        [0., 0., 0., 0.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
tensor([[[1., 1., 1., 1.],
         [0., 0., 0., 0.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [0., 0., 0., 0.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [0., 0., 0., 0.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])


In [38]:
# multiplying
tensor = torch.ones(3,4)
tensor.add_(1) # same as tensor = tensor.add(1)
# This computes the element-wise product
print(f"tensor.mul(tensor) \n {tensor.mul(tensor)} \n")
# Alternative syntax:
print(f"tensor * tensor \n {tensor * tensor}")

# matrix multiplication
print(f"tensor.matmul(tensor.T) \n {tensor.matmul(tensor.T)} \n")
# Alternative syntax:
print(f"tensor @ tensor.T \n {tensor @ tensor.T}")

tensor.mul(tensor) 
 tensor([[4., 4., 4., 4.],
        [4., 4., 4., 4.],
        [4., 4., 4., 4.]]) 

tensor * tensor 
 tensor([[4., 4., 4., 4.],
        [4., 4., 4., 4.],
        [4., 4., 4., 4.]])
tensor.matmul(tensor.T) 
 tensor([[16., 16., 16.],
        [16., 16., 16.],
        [16., 16., 16.]]) 

tensor @ tensor.T 
 tensor([[16., 16., 16.],
        [16., 16., 16.],
        [16., 16., 16.]])


In-place operations Operations that have a `_` suffix are in-place. For example: `x.copy_(y)`,`x.t_()`, will change x.

``` python
print(tensor, "\n")
tensor.add_(5)
print(tensor)
```
In-place operations save some memory, but can be problematic when computing derivatives because of an immediate loss of history. Hence, their use is **discouraged**.

In [43]:
# 使用numpy来模拟学习过程
# 下述过程使用多项式来近似 sin(x)

# -*- coding: utf-8 -*-
import numpy as np
import math

# Create random input and output data
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# Randomly initialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(3000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 780.3353715419314
199 551.3968662605379
299 390.5363077941849
399 277.4613555084525
499 197.94507586710486
599 142.00678350438346
699 102.64117522383577
799 74.92898723177007
899 55.41425445079917
999 41.667993961099356
1099 31.982332416210035
1199 25.155964142828108
1299 20.343592723935906
1399 16.950222896747366
1499 14.556908039225975
1599 12.868570252649317
1699 11.6773165012622
1799 10.8366388002139
1899 10.243262454996058
1999 9.824370709217721
2099 9.528610379274836
2199 9.31975748113959
2299 9.172254870450834
2399 9.068067782558664
2499 8.994467468342075
2599 8.942468610978167
2699 8.905727416484439
2799 8.879764394673913
2899 8.861416044191742
2999 8.848447950874926
Result: y = 0.005877409399576431 + 0.8560882048823306 x + -0.0010139509219414923 x^2 + -0.09323755694108793 x^3


In [44]:
# 使用pytorch的tensor进行计算
# -*- coding: utf-8 -*-

import torch
import math


dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(3000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 3471.304443359375
199 2302.429443359375
299 1528.328857421875
399 1015.61865234375
499 675.9971313476562
599 451.0034484863281
699 301.9302978515625
799 203.14588928222656
899 137.67608642578125
999 94.27933502197266
1099 65.5089111328125
1199 46.43180847167969
1299 33.77994918823242
1399 25.387434005737305
1499 19.819406509399414
1599 16.124330520629883
1699 13.671597480773926
1799 12.043139457702637
1899 10.961652755737305
1999 10.243196487426758
2099 9.765792846679688
2199 9.44846248626709
2299 9.237438201904297
2399 9.097077369689941
2499 9.003673553466797
2599 8.941495895385742
2699 8.900087356567383
2799 8.872504234313965
2899 8.854113578796387
2999 8.841851234436035
Result: y = 0.0020751410629600286 + 0.8523032069206238 x + -0.0003579975164029747 x^2 + -0.09269917756319046 x^3


In [48]:
# 使用autograd
# -*- coding: utf-8 -*-
import torch
import math

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0")  # Uncomment this to run on GPU

# Create Tensors to hold input and outputs.
# By default, requires_grad=False, which indicates that we do not need to
# compute gradients with respect to these Tensors during the backward pass.
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Create random Tensors for weights. For a third order polynomial, we need
# 4 weights: y = a + b x + c x^2 + d x^3
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y using operations on Tensors.
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding
    # the gradient of the loss with respect to a, b, c, d respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # Manually zero the gradients after updating weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1885.426513671875
199 1286.76904296875
299 880.0772094726562
399 603.4884643554688
499 415.1712646484375
599 286.8084716796875
699 199.21282958984375
799 139.36834716796875
899 98.43614196777344
999 70.40735626220703
1099 51.19224166870117
1199 38.00436019897461
1299 28.94273567199707
1399 22.709415435791016
1499 18.41684913635254
1599 15.457529067993164
1699 13.415144920349121
1799 12.004087448120117
1899 11.028197288513184
1999 10.352581977844238
Result: y = -0.035404909402132034 + 0.8368995189666748 x + 0.006107935216277838 x^2 + -0.09050813317298889 x^3


In [53]:
# 使用nn module
# -*- coding: utf-8 -*-
import torch
import math


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# For this example, the output y is a linear function of (x, x^2, x^3), so
# we can consider it as a linear layer neural network. Let's prepare the
# tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape
# (3,), for this case, broadcasting semantics will apply to obtain a tensor
# of shape (2000, 3) 

# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. The Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
# The Flatten layer flatens the output of the linear layer to a 1D tensor,
# to match the shape of `y`.
model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for t in range(2000):

    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(xx)

    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 1161.61376953125
199 772.2823486328125
299 514.4922485351562
399 343.7847900390625
499 230.73208618164062
599 155.8538055419922
699 106.25416564941406
799 73.3952865600586
899 51.62413787841797
999 37.19732666015625
1099 27.635988235473633
1199 21.29822540283203
1299 17.096500396728516
1399 14.310474395751953
1499 12.462748527526855
1599 11.237119674682617
1699 10.423955917358398
1799 9.884320259094238
1899 9.526121139526367
1999 9.288300514221191
Result: y = 0.006312811281532049 + 0.8364532589912415 x + -0.0010890649864450097 x^2 + -0.0904446542263031 x^3


In [54]:
# user optimizer
# -*- coding: utf-8 -*-
import torch
import math


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Prepare the input tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction='sum')

# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use RMSprop; the optim package contains many other
# optimization algorithms. The first argument to the RMSprop constructor tells the
# optimizer which Tensors it should update.
learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(2000):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model(xx)

    # Compute and print loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Before the backward pass, use the optimizer object to zero all of the
    # gradients for the variables it will update (which are the learnable
    # weights of the model). This is because by default, gradients are
    # accumulated in buffers( i.e, not overwritten) whenever .backward()
    # is called. Checkout docs of torch.autograd.backward for more details.
    optimizer.zero_grad()

    # Backward pass: compute gradient of the loss with respect to model
    # parameters
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()


linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 16626.0546875
199 6515.5634765625
299 2559.4296875
399 1367.1279296875
499 1103.1568603515625
599 963.4591064453125
699 813.9025268554688
799 663.8603515625
899 524.9379272460938
999 402.435302734375
1099 298.0370178222656
1199 211.72714233398438
1299 142.79522705078125
1399 90.22856140136719
1499 52.73708724975586
1599 29.484756469726562
1699 15.626887321472168
1799 10.28799057006836
1899 8.992877960205078
1999 8.91797924041748
Result: y = -0.0005292326677590609 + 0.8545901775360107 x + -0.0005292874993756413 x^2 + -0.09359170496463776 x^3
