In [3]:
import torch

In [None]:
x = torch.arange(12)
X = x.reshape(3, 4)

x, x.shape, x.numel(), X

In [None]:
torch.zeros((2, 3, 4))

In [None]:
torch.ones((2, 3, 4))

In [None]:
x = torch.tensor([1, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])

x+y, x-y, x*y, x/y, x**y, torch.exp(x)

(tensor([ 3,  4,  6, 10]),
 tensor([-1,  0,  2,  6]),
 tensor([ 2,  4,  8, 16]),
 tensor([0.5000, 1.0000, 2.0000, 4.0000]),
 tensor([ 1,  4, 16, 64]),
 tensor([2.7183e+00, 7.3891e+00, 5.4598e+01, 2.9810e+03]))

In [None]:
x = torch.arange(12).reshape((3, 4))
y = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
# 在不同维度上的 concatenate
torch.cat((x, y), dim=0), torch.cat((x, y), dim=1), x==y, x.sum()

(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11],
         [ 1,  2,  3,  4],
         [ 5,  6,  7,  8],
         [ 9, 10, 11, 12]]),
 tensor([[ 0,  1,  2,  3,  1,  2,  3,  4],
         [ 4,  5,  6,  7,  5,  6,  7,  8],
         [ 8,  9, 10, 11,  9, 10, 11, 12]]),
 tensor([[False, False, False, False],
         [False, False, False, False],
         [False, False, False, False]]),
 tensor(66))

In [None]:
a = torch.arange(3).reshape((3, 1))
b = torch.arange(2).reshape((1, 2))
# broadcast：如果两个矩阵的shape不一样的时候会发生，最容易出错的地方，所以检查矩阵的shape很重要
a, b, a + b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]),
 tensor([[0, 1],
         [1, 2],
         [2, 3]]))

In [None]:
x[-1], x[1:3], x[1, 2]

(tensor([ 8,  9, 10, 11]),
 tensor([[ 4,  5,  6,  7],
         [ 8,  9, 10, 11]]),
 tensor(6))

In [None]:
x[0:2, :] = 12
x

tensor([[12, 12, 12, 12],
        [12, 12, 12, 12],
        [ 8,  9, 10, 11]])

In [None]:
# in-place assignment

x[:] = x + y
# 等价于 x += y

In [None]:
a = torch.tensor([3.5])
a, a.item(), float(a), int(a)

(tensor([3.5000]), 3.5, 3.5, 3)

In [None]:
from google.colab import drive
import csv

drive.mount('/content/drive')

with open('/content/sample_data/mnist_test.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        print(row)

In [None]:
import os

os.makedirs(os.path.join('..', 'data'), exist_ok=True)
data_file = os.path.join('..', 'data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Price\n')  # 列名
    f.write('NA,Pave,127500\n')  # 每行表示一个数据样本
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

In [None]:
import pandas as pd

data = pd.read_csv(data_file)
print(data)

   NumRooms Alley   Price
0       NaN  Pave  127500
1       2.0   NaN  106000
2       4.0   NaN  178100
3       NaN   NaN  140000


In [None]:
inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]
inputs.iloc[:, 0:1] = inputs.iloc[:, 0:1].fillna(inputs.iloc[:, 0:1].mean())
print(inputs)

   NumRooms Alley
0       3.0  Pave
1       2.0   NaN
2       4.0   NaN
3       3.0   NaN


In [None]:
inputs = pd.get_dummies(inputs, dtype=int, drop_first=True)
print(inputs)

   NumRooms  Alley_Pave  Alley_nan
0       3.0        True      False
1       2.0       False       True
2       4.0       False       True
3       3.0       False       True


In [None]:
# element-wise prod: Hadamard product
import numpy as np
A = np.arange(20).reshape(5, 4)
B = A.copy()
A * B

array([[  0,   1,   4,   9],
       [ 16,  25,  36,  49],
       [ 64,  81, 100, 121],
       [144, 169, 196, 225],
       [256, 289, 324, 361]])

In [None]:
A = np.arange(20).reshape(5, 4)
B = A.T
np.dot(A, B)

array([[  14,   38,   62,   86,  110],
       [  38,  126,  214,  302,  390],
       [  62,  214,  366,  518,  670],
       [  86,  302,  518,  734,  950],
       [ 110,  390,  670,  950, 1230]])

In [None]:
# 矩阵的降维，降某一维就是将那个轴拍扁到其他轴上
A.mean(axis=0), A.sum(axis=0)

array([40, 45, 50, 55])

In [None]:
# 保留那一维
A.sum(axis=1, keepdims=True), A.cumsum(axis=0)

(array([[ 6],
        [22],
        [38],
        [54],
        [70]]),
 array([[ 0,  1,  2,  3],
        [ 4,  6,  8, 10],
        [12, 15, 18, 21],
        [24, 28, 32, 36],
        [40, 45, 50, 55]]))

In [None]:
# L2 norm
u = np.array([3, -4])
np.linalg.norm(u)

5.0

In [None]:
# L1 norm
np.abs(u).sum()

7

$\frac{\partial uv}{\partial x} = \frac{\partial u}{\partial x}v + \frac{\partial v}{\partial x}u$

$<u, v>$ 是指这两个向量做内积

$\frac{\partial <u, v>}{\partial x} = u^T \frac{\partial v}{\partial x} + v^T \frac{\partial u}{\partial x}$

In [18]:
# 自动求导

import torch
x = torch.arange(4.0)
x.requires_grad_(True)
y = 2 * torch.dot(x, x)
y.backward()
x.grad

tensor([ 0.,  4.,  8., 12.])

In [None]:
# 默认 Pytorch 会累积梯度，可以这样清理
x.grad.zero_()
y = x * x
u = y.detach() # 这里把 u 移动到记录的计算图之外（适用于我们想要 freeze 参数的时候）
z = u * x

z.sum().backward() # 在获取梯度前都需要说 backward()，因为计算梯度是 weighted 的，pytorch 是拖延到实际需要的时候再执行
x.grad == u # 计算机不知道 u = x * x 了

loss 为什么通常是一个标量？因为如果loss是向量的话，反向传播的时候矩阵就会太大，算不出来了