In [1]:
import torch

In [3]:
A = torch.arange(20,dtype=torch.float32).reshape(5,4)

In [4]:
A

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]])

In [5]:
x = torch.arange(4,dtype=torch.float32)

In [6]:
torch.mv(A,x)


tensor([ 14.,  38.,  62.,  86., 110.])

In [7]:
B = torch.ones(4,3,dtype=torch.float32)
B

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [8]:
torch.mm(A,B)

tensor([[ 6.,  6.,  6.],
        [22., 22., 22.],
        [38., 38., 38.],
        [54., 54., 54.],
        [70., 70., 70.]])

In [9]:
C = torch.arange(36).reshape(3,2,6)
sum_c = C.sum(axis=2,keepdims = True)
sum_c.shape

torch.Size([3, 2, 1])

In [10]:
torch.cuda.is_available()

True

In [11]:
import torch
x = torch.arange(4,dtype=torch.float32)
x

tensor([0., 1., 2., 3.])

In [12]:
x.requires_grad_(True)
x.grad

In [13]:
y = 2 * torch.dot(x,x)
y

tensor(28., grad_fn=<MulBackward0>)

In [14]:
y.backward()
x.grad

tensor([ 0.,  4.,  8., 12.])

In [15]:
x.grad == 4*x

tensor([True, True, True, True])

In [16]:
x.grad.zero_()
# 新的函数
y = x.sum()
y.backward()
x.grad

tensor([1., 1., 1., 1.])

In [17]:
x.grad.zero_()
# 新的函数
y = x*x
# 一般对标量求导，而不是向量

In [18]:
def f(a):
    b = a * 2
    while b.norm() < 1000:
        b = b * 2
    if b.sum() > 0:
        c = b
    else:
        c = 100 * b
    return c

a = torch.randn(size=(), requires_grad=True)
d = f(a)
d.backward()

In [1]:
%matplotlib inline
import random
import torch
from d2l import torch as d2l

In [7]:
def synthetic_data(w,b,num_eamples):
    X = torch.normal(0,1,(num_eamples,len(w)))
    y = torch.matmul(X,w)+b
    y+=+torch.normal(0,0.01,y.shape)
    return X,y.reshape((-1,1))
true_w = torch.tensor([2,-3.4])
b= 4.2
features,labels = synthetic_data(true_w,b,1000)
labels.shape

torch.Size([1000, 1])

In [12]:
def foo():
    print("starting...")
    b = 10
    while True:
        res = yield 4
        print(b)
        print("res:",res)
g = foo()
print(next(g))
print("*"*20)
print(g.send(9))

starting...
4
********************
10
res: 9
4


In [13]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

true_w = torch.tensor([2,-3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w,true_b,1000)

In [15]:
def load_array(data_arrays,batch_size,is_train=True):
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset,batch_size,shuffle=is_train)

batch_size = 10
data_iter = load_array((features,labels),batch_size)
next(iter(data_iter))

[tensor([[ 0.0720, -1.3155],
         [-1.9039, -0.4144],
         [ 0.9070,  0.6294],
         [ 0.1427,  0.0871],
         [-1.0577, -1.2468],
         [ 0.3422, -0.6229],
         [-0.9361,  1.1780],
         [ 0.3678,  0.4176],
         [ 0.4885,  1.6352],
         [ 1.6941, -0.2616]]),
 tensor([[ 8.8317],
         [ 1.8173],
         [ 3.8821],
         [ 4.1859],
         [ 6.3284],
         [ 7.0006],
         [-1.6603],
         [ 3.5202],
         [-0.3771],
         [ 8.4730]])]

In [17]:
from torch import nn
net = nn.Sequential(nn.Linear(2,1))
net[0].weight.data.normal_(0,0.01)
net[0].bias.data.fill_(0)

tensor([0.])

In [18]:
loss= nn.MSELoss()
trainer = torch.optim.SGD(net.parameters(),lr=0.03)

In [21]:
num_epoches = 3
for epoch in range(num_epoches):
    for X,y in data_iter:
        l = loss(net(X),y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features),labels)
    print(f'epoch {epoch+1}, loss {l}')


epoch 1, loss 0.00010378904698882252
epoch 2, loss 0.00010456161544425413
epoch 3, loss 0.00010491220018593594


In [1]:
# 权重衰退

%matplotlib inline
import torch
from torch import nn
from d2l import torch as d2l

In [3]:
n_train,n_test,num_inputs,batch_size = 20,100,200,5
true_w,true_b = torch.ones((num_inputs,1))*0.01,0.05
train_data = d2l.synthetic_data(true_w,true_b,n_train)
test_data = d2l.synthetic_data(true_w,true_b,n_test)
train_iter = d2l.load_array(train_data,batch_size)
test_iter = d2l.load_array(test_data,batch_size)

In [9]:
def init_params():
    w = torch.normal(0,1,size=(num_inputs,1),requires_grad=True)
    b = torch.zeros(1,requires_grad=True)
    return[w,b]

def l2_penalty(w):
    return torch.sum(w.pow(2))/2

In [10]:
def train(lambd):
    w,b = init_params()
    net,loss = lambda X:d2l.linreg(X,w,b),d2l.squared_loss
    num_epoches,lr = 100,0.003
    for epoch in range(num_epoches):
        for X,y in train_iter:
            l = loss(net(X),y)+l2_penalty(w)*lambd
            l.sum().backward()
            d2l.sgd([w,b],lr,batch_size)
        if (epoch+1)%5==0:
            print(f'loss={d2l.evaluate_loss(net,train_iter,loss)}')

In [11]:
train(lambd=1)

loss=25.284801864624022
loss=6.71133394241333
loss=2.1310164451599123
loss=0.7619744896888733
loss=0.292746365070343
loss=0.1174873948097229
loss=0.04825361520051956
loss=0.020214653015136717
loss=0.008619414642453194
loss=0.0037782991304993628
loss=0.001734130026306957
loss=0.000873245601542294
loss=0.0005058526585344225
loss=0.0003504311462165788
loss=0.0002866126946173608
loss=0.0002578057232312858
loss=0.0002455235677189194
loss=0.00024189773976104333
loss=0.00023866779665695503
loss=0.00023353329161182045


In [7]:
w = torch.tensor([1,2])

In [8]:
torch.dot(w,w)

tensor(5)

In [1]:
import torch 
X = torch.rand(2,20)

In [3]:
X.shape

torch.Size([2, 20])

In [4]:
!nvidia-smi

Sat Nov 19 17:47:56 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 522.06       Driver Version: 522.06       CUDA Version: 11.8     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   50C    P8     4W /  N/A |    225MiB /  6144MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [1]:
import torch
from torch import nn
torch.device('cpu'),torch.cuda.device('cuda'),torch.cuda.device('cuda:0')

(device(type='cpu'),
 <torch.cuda.device at 0x1e7c518c4f0>,
 <torch.cuda.device at 0x1e7c518c520>)

In [6]:
torch.cuda.device_count()

1

In [3]:
X = torch.ones(2,3,device='cuda:0')
X

tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [5]:
Y = torch.ones(2,3,device = 'cpu')

In [4]:
net = nn.Sequential(nn.Linear(3,1))
net.to(device='cuda:0')
net(X)

tensor([[0.8020],
        [0.8020]], device='cuda:0', grad_fn=<AddmmBackward0>)