In [2]:
%matplotlib inline
import numpy as np
import torch
import torch.optim as optim

torch.set_printoptions(edgeitems=2, linewidth=75)

In [3]:
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c).unsqueeze(1) # <1>
t_u = torch.tensor(t_u).unsqueeze(1) # <1>

t_u.shape

torch.Size([11, 1])

In [4]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices

(tensor([ 3,  6,  8, 10,  5,  2,  9,  0,  1]), tensor([7, 4]))

In [5]:
t_u_train = t_u[train_indices]
t_c_train = t_c[train_indices]

t_u_val = t_u[val_indices]
t_c_val = t_c[val_indices]

t_un_train = 0.1 * t_u_train
t_un_val = 0.1 * t_u_val

In [6]:
import torch.nn as nn

linear_model = nn.Linear(1, 1) # 輸入和輸出特徵數量
linear_model(t_un_val)

tensor([[-0.8803],
        [-3.4640]], grad_fn=<AddmmBackward0>)

In [7]:
linear_model.weight

Parameter containing:
tensor([[-0.7489]], requires_grad=True)

In [8]:
linear_model.bias

Parameter containing:
tensor([0.7522], requires_grad=True)

In [9]:
x = torch.ones(1)
linear_model(x)

tensor([0.0034], grad_fn=<AddBackward0>)

In [10]:
x = torch.ones(10, 1)
linear_model(x)      

tensor([[0.0034],
        [0.0034],
        [0.0034],
        [0.0034],
        [0.0034],
        [0.0034],
        [0.0034],
        [0.0034],
        [0.0034],
        [0.0034]], grad_fn=<AddmmBackward0>)

In [11]:
#B*C*H*W
linear_model = nn.Linear(1, 1)
optimizer = optim.SGD(
    linear_model.parameters(),   #模型參數的list
    lr=1e-2)

In [12]:
linear_model.parameters()

<generator object Module.parameters at 0x00000234BE0F6820>

In [13]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[-0.5979]], requires_grad=True),
 Parameter containing:
 tensor([0.2692], requires_grad=True)]

In [14]:
def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val, t_c_train, t_c_val):  #從param改為只傳model
    for epoch in range(1, n_epochs + 1):
        t_p_train = model(t_u_train)
        loss_train = loss_fn(t_p_train, t_c_train)

        t_p_val = model(t_u_val)
        loss_val = loss_fn(t_p_val, t_c_val)
        
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        if epoch == 1 or epoch % 1000 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")

In [15]:
#自己定義loss function
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c)**2
    return squared_diffs.mean()

linear_model = nn.Linear(1, 1)
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    model = linear_model,
    loss_fn = loss_fn,
    t_u_train = t_un_train,
    t_u_val = t_un_val, 
    t_c_train = t_c_train,
    t_c_val = t_c_val)

print()
print(linear_model.weight)
print(linear_model.bias)

Epoch 1, Training loss 175.7538, Validation loss 55.0326
Epoch 1000, Training loss 4.5631, Validation loss 2.7121
Epoch 2000, Training loss 2.8500, Validation loss 3.3770
Epoch 3000, Training loss 2.7084, Validation loss 4.6428

Parameter containing:
tensor([[5.5946]], requires_grad=True)
Parameter containing:
tensor([-18.4818], requires_grad=True)


In [16]:
#用nn.MSELoss()
linear_model = nn.Linear(1, 1)
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    model = linear_model,
    loss_fn = nn.MSELoss(),
    t_u_train = t_un_train,
    t_u_val = t_un_val, 
    t_c_train = t_c_train,
    t_c_val = t_c_val)

print()
print(linear_model.weight)
print(linear_model.bias)

Epoch 1, Training loss 223.1891, Validation loss 73.0458
Epoch 1000, Training loss 4.8824, Validation loss 3.0087
Epoch 2000, Training loss 2.8763, Validation loss 3.2620
Epoch 3000, Training loss 2.7106, Validation loss 4.5932

Parameter containing:
tensor([[5.5883]], requires_grad=True)
Parameter containing:
tensor([-18.4452], requires_grad=True)


In [17]:
seq_model = nn.Sequential(
            nn.Linear(1, 13),
            nn.Tanh(),
            nn.Linear(13, 1))
seq_model

Sequential(
  (0): Linear(in_features=1, out_features=13, bias=True)
  (1): Tanh()
  (2): Linear(in_features=13, out_features=1, bias=True)
)

In [18]:
[param.shape for param in seq_model.parameters()] #兩層

[torch.Size([13, 1]), torch.Size([13]), torch.Size([1, 13]), torch.Size([1])]

In [19]:
#取得參數張量的預設名稱
for name, param in seq_model.named_parameters():
    print(name, param.shape)

0.weight torch.Size([13, 1])
0.bias torch.Size([13])
2.weight torch.Size([1, 13])
2.bias torch.Size([1])


In [20]:
from collections import OrderedDict

seq_model = nn.Sequential(OrderedDict([
    ('hidden_linear', nn.Linear(1, 8)),
    ('hidden_activation', nn.Tanh()),
    ('output_linear', nn.Linear(8, 1))
]))

seq_model

Sequential(
  (hidden_linear): Linear(in_features=1, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

In [21]:
for name, param in seq_model.named_parameters():
    print(name, param.shape)

hidden_linear.weight torch.Size([8, 1])
hidden_linear.bias torch.Size([8])
output_linear.weight torch.Size([1, 8])
output_linear.bias torch.Size([1])


In [22]:
seq_model.output_linear.bias

Parameter containing:
tensor([-0.1762], requires_grad=True)

In [23]:
optimizer = optim.SGD(seq_model.parameters(), lr=1e-3) # <1>

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    model = seq_model,
    loss_fn = nn.MSELoss(),
    t_u_train = t_un_train,
    t_u_val = t_un_val, 
    t_c_train = t_c_train,
    t_c_val = t_c_val)
    
print('output', seq_model(t_un_val))
print('answer', t_c_val)
print('hidden', seq_model.hidden_linear.weight.grad)

Epoch 1, Training loss 211.8919, Validation loss 68.2542
Epoch 1000, Training loss 6.5521, Validation loss 13.5001
Epoch 2000, Training loss 3.3525, Validation loss 5.1517
Epoch 3000, Training loss 2.0467, Validation loss 5.1301
Epoch 4000, Training loss 1.9261, Validation loss 4.8428
Epoch 5000, Training loss 1.8760, Validation loss 4.8208
output tensor([[-1.3088],
        [12.5490]], grad_fn=<AddmmBackward0>)
answer tensor([[-4.],
        [11.]])
hidden tensor([[ 8.0389e-03],
        [ 4.1340e-03],
        [ 8.5762e-05],
        [-1.5454e-02],
        [-1.0769e-02],
        [ 3.0969e-03],
        [ 3.5060e-03],
        [ 6.4656e-03]])


In [None]:
from matplotlib import pyplot as plt

t_range = torch.arange(20., 90.).unsqueeze(1)

fig = plt.figure(dpi=600)
plt.xlabel("Fahrenheit")
plt.ylabel("Celsius")
plt.plot(t_u.numpy(), t_c.numpy(), 'o')
plt.plot(t_range.numpy(), seq_model(0.1 * t_range).detach().numpy(), 'c-')
plt.plot(t_u.numpy(), seq_model(0.1 * t_u).detach().numpy(), 'kx')