In [3]:
import torch
import matplotlib.pyplot as plt

# 手工实现线性回归

In [None]:
date_size = 500
iter_n = 50
sample_rate = 0.1
lr = 0.001

x = torch.rand([date_size, 1]) * 10
y_true = 3 * x + 0.8 + torch.randn([date_size, 1])

w = torch.rand(1, requires_grad=True)
b = torch.rand(1, requires_grad=True)

loss_v = []
for i in range(iter_n):
  # random mask 
  mask = torch.rand(date_size) < sample_rate
  
  y_pre = w * x[mask] + b
  loss = (y_pre - y_true[mask]).pow(2).mean()
  loss.backward()

  # type 1: update on tensor.data object
  # w.data = w.data - lr * w.grad
  # b.data = b.data - lr * b.grad
  # w.grad.data.zero_()
  # b.grad.data.zero_()

  # type 2: update on tensor object in no_grad
  with torch.no_grad():
    # w.sub_(lr * w.grad): work
    # w = w - lr * w.grad: not work!!!! w's requires_grad will be False after
    w -= lr * w.grad
    b -= lr * b.grad
  w.grad.zero_()
  b.grad.zero_()

  loss_v.append(loss.item())

print(w.item(), b.item(), loss.item())

y_pre = w * x + b

plt.subplot(1, 2, 1)
plt.plot(x.numpy(), y_pre.detach().numpy(), color = 'r')
plt.scatter(x.numpy(), y_true.numpy(), s=2)

plt.subplot(1, 2, 2)
plt.plot(range(iter_n), loss_v)

plt.show()

# 用nn.Module实现线性回归

In [None]:
from torch import nn
from torch import optim

date_size = 1000
iter_n = 3000
sample_rate = 0.3
lr = 0.01

x = torch.rand([date_size, 1]) * 10
y_true = torch.sin(x) + 0.8 + torch.randn([date_size, 1]) * 0.1

class LR1(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear1 = nn.Linear(1,10)
    self.linear2 = nn.Linear(10,1)
    self.sig = nn.Sigmoid()

  def forward(self, x):
    out = self.linear2(self.sig(self.linear1(x)))
    return out

class LR2(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(1,1)

  def forward(self, x):
    out = self.linear(x)
    return out

model = LR1()
criteria = nn.MSELoss()
#optimizer = optim.SGD(model.parameters(), lr = lr)
optimizer = optim.Adam(model.parameters(), lr = lr)

loss_v = []

for i in range(iter_n):
  # random mask 
  mask = torch.rand(date_size) < sample_rate
  
  y_pre = model(x[mask])
  loss = criteria(y_pre, y_true[mask])
  loss.backward()
  loss_v.append(loss.item())

  optimizer.step()
  optimizer.zero_grad()

#for name, param in model.named_parameters():
#    print(name, param.data)
print(loss.item())

model.eval()
y_pre = model(x)

plt.subplot(1, 2, 1)
plt.scatter(x.numpy(), y_pre.detach().numpy(), color = 'r', s=2)
plt.scatter(x.numpy(), y_true.numpy(), s=2)

plt.subplot(1, 2, 2)
plt.plot(range(iter_n), loss_v)

plt.show()

# basic operation

In [17]:
import torch
x = torch.randn(3,4)
y = torch.zeros_like(x)

# in_place operation
print(id(y))
y[:] = y + x
print(id(y))
y += x
print(id(y))

# no a in_place operation
y = y + x
print(id(y))

tensor([[-0.4504, -1.8309,  0.5252,  0.8609],
        [ 0.2168, -0.9063,  0.0332,  0.8122],
        [-1.3639, -0.4885,  0.0419, -0.1577]])
140635443850272


# Load from csv

In [38]:
import torch
import os
import numpy as np
import pandas as pd
## create data
os.makedirs(os.path.join('data'), exist_ok=True)
data_file = os.path.join('data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Price\n')
    f.write('NA,Pave,127500\n')
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')
## load data
data = pd.read_csv(data_file)
input, output = data.iloc[:, 0:2], data.iloc[:, 2]
input = input.fillna(input.mean())
input = pd.get_dummies(input, dummy_na=True)
print(input)
print(output)
## fill in torch
X, y = torch.tensor(input.values), torch.tensor(output.values)
X, y

   NumRooms  Alley_Pave  Alley_nan
0       3.0           1          0
1       2.0           0          1
2       4.0           0          1
3       3.0           0          1
0    127500
1    106000
2    178100
3    140000
Name: Price, dtype: int64


(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([127500, 106000, 178100, 140000]))

In [60]:
import torch

## scalar multiply with a matrix, they both output the same value
## torch.tensor(2.0) is a scalar
## torch.tensor([2.0]) is a one-dim tensor with one value
print( torch.tensor(2.0) * torch.ones(5,5), torch.tensor([2.0]) * torch.ones(5,5) )

## hadamard product: element-wise product
print( torch.arange(4) * torch.arange(4))

## dot product of two vector: torch.dot or torch.matmul
print( torch.dot( torch.arange(4), torch.arange(4) ) , torch.matmul( torch.arange(4), torch.arange(4) ))

tensor([[2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.]]) tensor([[2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.]])
tensor([0, 1, 4, 9])
tensor(14) tensor(14)


# 0/1/2维tensor

In [47]:
# tensor.dim() = element in tensor.size()

# difference between shape [1,4] and [4]
# 2d tensor[1,4] and 1d tensor[4]
# torch.arange(4).view(1,4): [[1,2,3,4]], dim() = 2, shape = [1,4]
# torch.arange(4):           [1,2,3,4],   dim() = 1, shape = [4]

# size() and shape
# 1. method vs property
# 2. tensor.size(dim=x) == tensor.shape[x]
# 3. all return torch.Size([])

zero_d = torch.tensor(4)
one_d = torch.arange(4)
two_d = torch.arange(4).view(1,-1)
print("zero d: ", zero_d, zero_d.dim(), zero_d.shape)
print("one d: ", one_d, one_d.dim(), one_d.size())
print("two d: ", two_d, two_d.dim(), two_d.shape)

print("get count on specific dims ", two_d.size(dim=1), two_d.shape[1])

zero d:  tensor(4) 0 torch.Size([])
one d:  tensor([0, 1, 2, 3]) 1 torch.Size([4])
two d:  tensor([[0, 1, 2, 3]]) 2 torch.Size([1, 4])
get count on specific dims  4 4


# torch中的乘法：dot,mv,mm,@,matmul,mut,mutiply,*

In [90]:
a_0d_4 = torch.arange(4)
b_0d_4 = torch.arange(4)
c_1d_43 = torch.arange(12).view(4,3)
d_1d_34 = torch.arange(12).view(3,4)

################################
# element-wise multiply is the most simplest
################################
# Hadamard product: 按位乘法
print( torch.mul(a_0d_4, b_0d_4), a_0d_4 * b_0d_4, torch.multiply(a_0d_4, b_0d_4))

################################
# torch.dot, torch.mv, torch.mm only perform on 0-d/1-d/2-d tensor
# @ and torch.matmul are equal and more generalized version of the above method, can perform on high-dim tensor, while operate on last dim and retain the shape
# ATTENTION: both values have same dtype

# torch.dot(1d, 1d)=>0d , dot([n],   [n])   => [].   , eg: [4],  [4]   => [], tensor([0,1,2,3]), tensor([0,1,2,3]) = tensor(14) 
# torch.mv (2d, 1d)=>1d , mv ([m,n], [n])   => [m].  , eg: [3,4],[4]   => [3]
# torch.mm (2d, 2d)=>2d , mm ([m,n], [n,x]) => [m,x] , eg: [3,4],[4,3] => [3,3]
################################
# dot product: 点积
print( torch.dot(a_0d_4,b_0d_4), a_0d_4 @ b_0d_4, torch.matmul(a_0d_4,b_0d_4) )
# mat * vec: 最后一个dim上做点积
print( torch.mv(d_1d_34,a_0d_4), d_1d_34 @ a_0d_4, torch.matmul(d_1d_34,a_0d_4) )
# mat * mat: 矩阵乘法
print( torch.mm(c_1d_43,d_1d_34), c_1d_43 @ d_1d_34, torch.matmul(c_1d_43,d_1d_34) )

tensor([0, 1, 4, 9]) tensor([0, 1, 4, 9]) tensor([0, 1, 4, 9])
tensor(14) tensor(14) tensor(14)
tensor([14, 38, 62]) tensor([14, 38, 62]) tensor([14, 38, 62])
tensor([[ 20,  23,  26,  29],
        [ 56,  68,  80,  92],
        [ 92, 113, 134, 155],
        [128, 158, 188, 218]]) tensor([[ 20,  23,  26,  29],
        [ 56,  68,  80,  92],
        [ 92, 113, 134, 155],
        [128, 158, 188, 218]]) tensor([[ 20,  23,  26,  29],
        [ 56,  68,  80,  92],
        [ 92, 113, 134, 155],
        [128, 158, 188, 218]])


# aggregate function: sum/mean/max/min and L1,L2 norm

In [1]:
import torch

x1 = torch.arange(12).view(3,4).float()
print(x1)
print( x1.sum(), x1.mean(), x1.max(), x1.min() )

# keepdim
print( x1.sum(dim=0), x1.sum(dim=0, keepdim=True) )
print( x1.sum(axis=0), x1.sum(axis=0, keepdim=True) )

# along columns: [3,4] => [4]
# the specified dimension will be eliminated after operation
print( x1.sum(dim=0), x1.mean(dim=0), x1.max(dim=0), x1.min(dim=0) )

# along rows:    [3,4] => [3]
print( x1.sum(dim=1), x1.mean(dim=1), x1.max(dim=1), x1.min(dim=1) )


y1 = torch.arange(12).float()
y2 = torch.arange(12).view(3,4).float()
# vector L2/L1 norm
print( y1.norm(), y1.abs().sum())
# matrix F norm and L1 norm
print( y2.norm(), y2.abs().sum())

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])
tensor(66.) tensor(5.5000) tensor(11.) tensor(0.)
tensor([12., 15., 18., 21.]) tensor([[12., 15., 18., 21.]])
tensor([12., 15., 18., 21.]) tensor([[12., 15., 18., 21.]])
tensor([12., 15., 18., 21.]) tensor([4., 5., 6., 7.]) torch.return_types.max(
values=tensor([ 8.,  9., 10., 11.]),
indices=tensor([2, 2, 2, 2])) torch.return_types.min(
values=tensor([0., 1., 2., 3.]),
indices=tensor([0, 0, 0, 0]))
tensor([ 6., 22., 38.]) tensor([1.5000, 5.5000, 9.5000]) torch.return_types.max(
values=tensor([ 3.,  7., 11.]),
indices=tensor([3, 3, 3])) torch.return_types.min(
values=tensor([0., 4., 8.]),
indices=tensor([0, 0, 0]))
tensor(22.4944) tensor(66.)
tensor(22.4944) tensor(66.)


# freeze paramter

In [40]:
x = torch.arange(4, requires_grad=True, dtype=torch.float32)
y = x * x          # element-wise multiply
u = y.detach()     # u requires_grad=False
z = u * x          # u is constant in this case

# freeze y = x * x
z.sum().backward()
print(x.grad == u)

x.grad.zero_()
y.sum().backward()
print(x.grad == 2*x)

tensor([True, True, True, True])
tensor([True, True, True, True])


# 交换维度
permute, transpose

In [2]:
import torch
x = torch.arange(24).reshape(2, 3, 4)
print(x.shape)

torch.Size([2, 3, 4])


In [4]:
x.permute(2, 1, 0).shape, x.transpose(1, 2).shape

(torch.Size([4, 3, 2]), torch.Size([2, 4, 3]))