# A System of Equations that has a solution $\mathbf{x}=[x_1,x_2,x_3]=[1,1,-2]$

$x_1 + x_2 + x_3 = 0$

$2x_1 + x_2 + x_3 = 1$

$x_1 + x_2 - x_3 = 4$

## 1. Solving Without Network

The system of equations can be rewritten by a matrix form


$\begin{bmatrix*}[r]
   1 & 1 & 1\\
   2 & 1 & 1\\
   1 & 1 & -1    
\end{bmatrix*}\mathbf{x} = \begin{bmatrix*}[r]
   0\\
   1\\
   4
\end{bmatrix*}$.

$\rightarrow \mathbf{Ax}=b$ with $\mathbf{x} = [x_1~ x_2~x_3]^{T}$

In [1]:
import torch
x = torch.randn((3,1), dtype=torch.float32, requires_grad=True)
A = torch.tensor([[1,2,1],[1,1,1], [1,1,-1]], dtype=torch.float32)
b = torch.tensor([0,1,4], dtype=torch.float32).reshape((3,1))
print(x)
print(A)
print(b)

tensor([[-0.4764],
        [ 0.5709],
        [ 1.3465]], requires_grad=True)
tensor([[ 1.,  2.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1., -1.]])
tensor([[0.],
        [1.],
        [4.]])


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(torch.mm(A, x))

tensor([[ 2.0118],
        [ 1.4409],
        [-1.2520]], grad_fn=<MmBackward0>)


### Update $\mathbf{x}$ to minize the error between $\mathbf{xA}$ and $b$

In [3]:

import torch
x = torch.randn((3,1), dtype=torch.float32, requires_grad=True)
A = torch.tensor([[1,1,1],[2,1,1], [1,1,-1]], dtype=torch.float32)
b = torch.tensor([0,1,4], dtype=torch.float32).reshape((3,1))


lr = 0.1  # Learning rate
num_epochs = 5000  # Number of iterations

for epoch in range(num_epochs):
    #1. Forward Operation
    Ax = torch.mm(A, x)

    #2. Calculate loss
    avg_loss  = (Ax - b) **2 #squared error
    avg_loss = avg_loss.mean()

    #3. AutoGrad using back propagation
    avg_loss.backward() 

    #4. Update variables
    x.data.sub_(lr*x.grad) #torch.sub_ is the in place version of tensor.sub. x.sub_(y)-->x = x - y

    #5. Reset gradients of variables
    x.grad.data.zero_()


    print(f'epoch {epoch+1}, loss {avg_loss.detach().numpy()}, x:{x.t().detach().numpy()}')


epoch 1, loss 11.590909004211426, x:[[ 0.9421066 -0.4247802  0.9645488]]
epoch 2, loss 8.00048828125, x:[[ 0.94993216 -0.3220225   0.47434354]]
epoch 3, loss 5.705703258514404, x:[[ 0.9925862  -0.2092228   0.07428536]]
epoch 4, loss 4.0962090492248535, x:[[ 1.0414398  -0.10368691 -0.25896835]]
epoch 5, loss 2.9459478855133057, x:[[ 1.0870428  -0.01006889 -0.5391209 ]]
epoch 6, loss 2.1209280490875244, x:[[ 1.1267935   0.07134153 -0.77556455]]
epoch 7, loss 1.528753399848938, x:[[ 1.1604604  0.1416326 -0.9754469]]
epoch 8, loss 1.103613257408142, x:[[ 1.1885672   0.20221314 -1.1445277 ]]
epoch 9, loss 0.7983438372612, x:[[ 1.2118205   0.25445446 -1.2875786 ]]
epoch 10, loss 0.5791055560112, x:[[ 1.230915    0.29958335 -1.4086026 ]]
epoch 11, loss 0.4216137230396271, x:[[ 1.2464738   0.33866286 -1.5109763 ]]
epoch 12, loss 0.3084392547607422, x:[[ 1.2590377   0.37260237 -1.597555  ]]
epoch 13, loss 0.22707432508468628, x:[[ 1.2690693   0.40217552 -1.6707559 ]]
epoch 14, loss 0.1685416251

## 2. Solving With Network

In [12]:
#y=2x

import torch
import numpy as np

inputs = torch.tensor([[1],[2]],dtype=torch.float32)
target = torch.tensor([[2],[4]],dtype=torch.float32)

network = torch.nn.Linear(1,1,bias=False)

loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD(network.parameters(),lr=1e-2)

updates = 1000

for i in range(updates):
    #1 forward
    out = network(inputs)
    loss = loss_fn(out,target)

    #2 backward
    optimizer.zero_grad()
    loss.backward()

    #3 update parameters (weight)
    optimizer.step()

    #4 print
    for name,param in network.named_parameters():
        print(name, param.data.numpy())

weight [[0.92360413]]
weight [[0.9774239]]
weight [[1.0285528]]
weight [[1.0771251]]
weight [[1.1232688]]
weight [[1.1671054]]
weight [[1.2087501]]
weight [[1.2483126]]
weight [[1.285897]]
weight [[1.3216021]]
weight [[1.355522]]
weight [[1.387746]]
weight [[1.4183587]]
weight [[1.4474407]]
weight [[1.4750687]]
weight [[1.5013152]]
weight [[1.5262495]]
weight [[1.549937]]
weight [[1.5724401]]
weight [[1.5938182]]
weight [[1.6141273]]
weight [[1.633421]]
weight [[1.6517498]]
weight [[1.6691624]]
weight [[1.6857042]]
weight [[1.701419]]
weight [[1.716348]]
weight [[1.7305306]]
weight [[1.7440041]]
weight [[1.7568039]]
weight [[1.7689637]]
weight [[1.7805156]]
weight [[1.7914897]]
weight [[1.8019153]]
weight [[1.8118196]]
weight [[1.8212286]]
weight [[1.8301672]]
weight [[1.8386588]]
weight [[1.8467258]]
weight [[1.8543895]]
weight [[1.86167]]
weight [[1.8685865]]
weight [[1.8751572]]
weight [[1.8813994]]
weight [[1.8873295]]
weight [[1.8929629]]
weight [[1.8983148]]
weight [[1.9033991]]


In [15]:
#y=2x+3

import torch
import numpy as np

inputs = torch.tensor([[1],[2]],dtype=torch.float32)
target = torch.tensor([[5],[7]],dtype=torch.float32)

network = torch.nn.Linear(1,1,bias=True)

loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD(network.parameters(),lr=1e-1)

updates = 1000

for i in range(updates):
    #1 forward
    out = network(inputs)
    loss = loss_fn(out,target)

    #2 backward
    optimizer.zero_grad()
    loss.backward()

    #3 update parameters (weight)
    optimizer.step()

    #4 print
    for name,param in network.named_parameters():
        print(name, param.data.numpy())

weight [[1.8675231]]
bias [0.78243256]
weight [[2.5990317]]
bias [1.2656891]
weight [[2.819809]]
bias [1.4328418]
weight [[2.8800519]]
bias [1.5003307]
weight [[2.8899267]]
bias [1.536249]
weight [[2.8840885]]
bias [1.5620213]
weight [[2.873438]]
bias [1.5843904]
weight [[2.8614018]]
bias [1.6054809]
weight [[2.8490567]]
bias [1.6259642]
weight [[2.836739]]
bias [1.6460543]
weight [[2.8245533]]
bias [1.6658217]
weight [[2.81253]]
bias [1.6852914]
weight [[2.8006775]]
bias [1.7044741]
weight [[2.7889965]]
bias [1.723376]
weight [[2.7774854]]
bias [1.7420019]
weight [[2.7661421]]
bias [1.7603558]
weight [[2.7549644]]
bias [1.778442]
weight [[2.7439497]]
bias [1.7962643]
weight [[2.7330956]]
bias [1.8138266]
weight [[2.7224]]
bias [1.8311325]
weight [[2.7118602]]
bias [1.848186]
weight [[2.7014742]]
bias [1.8649907]
weight [[2.6912398]]
bias [1.8815503]
weight [[2.6811547]]
bias [1.8978683]
weight [[2.671217]]
bias [1.9139482]
weight [[2.6614242]]
bias [1.9297935]
weight [[2.6517742]]
bia

In [16]:
#y=2x+3

import torch
import numpy as np

network = torch.nn.Linear(1,1,bias=True)

loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD(network.parameters(),lr=1e-1)

updates = 1000

for i in range(updates):
    #0 dataset
    inputs = torch.randn((2,1),dtype=torch.float32)
    target = 2*inputs+3

    #1 forward
    out = network(inputs)
    loss = loss_fn(out,target)

    #2 backward
    optimizer.zero_grad()
    loss.backward()

    #3 update parameters (weight)
    optimizer.step()

    #4 print
    for name,param in network.named_parameters():
        print(name, param.data.numpy())

weight [[0.01696402]]
bias [0.31334096]
weight [[0.4926168]]
bias [1.0477021]
weight [[2.3508132]]
bias [1.5318102]
weight [[2.4002342]]
bias [1.8011354]
weight [[2.2725177]]
bias [2.0365841]
weight [[2.103989]]
bias [2.2687998]
weight [[2.236044]]
bias [2.3928618]
weight [[2.109066]]
bias [2.5510607]
weight [[2.126902]]
bias [2.6359835]
weight [[2.0983703]]
bias [2.7092388]
weight [[2.1275005]]
bias [2.7548018]
weight [[2.1342328]]
bias [2.8000374]
weight [[2.1184425]]
bias [2.8487256]
weight [[2.118808]]
bias [2.873415]
weight [[2.1017556]]
bias [2.864665]
weight [[2.0964804]]
bias [2.8930151]
weight [[2.0654452]]
bias [2.9279213]
weight [[2.0549817]]
bias [2.94428]
weight [[2.0544748]]
bias [2.9537349]
weight [[2.043132]]
bias [2.97039]
weight [[2.042316]]
bias [2.9724176]
weight [[2.041678]]
bias [2.9729338]
weight [[2.0296645]]
bias [2.9796302]
weight [[2.0212476]]
bias [2.9834886]
weight [[2.0076776]]
bias [2.9892213]
weight [[2.008198]]
bias [2.9907694]
weight [[2.0086849]]
bias

$x_1 + x_2 + x_3 = 0$

$2x_1 + x_2 + x_3 = 1$

$x_1 + x_2 - x_3 = 4$

can be rewritten by a matrix form

$\begin{bmatrix*}[r]
   1 & 1 & 1\\
   2 & 1 & 1\\
   1 & 1 & -1    
\end{bmatrix*}\mathbf{x} = \begin{bmatrix*}[r]
   0\\
   1\\
   4
\end{bmatrix*}$.

$\rightarrow \mathbf{Ax}=b$


- $\begin{bmatrix*}[r]
   1 & 1 & 1\\
   2 & 1 & 1\\
   1 & 1 & -1    
\end{bmatrix*}$ is `the input to the network`

- $\mathbf{x}$ is `not an input but network that should be trained !!!!`

- $\begin{bmatrix*}[r]
   0\\
   1\\
   4
\end{bmatrix*}$ is `the target`.

$x_1 + x_2 + x_3 = 0$

$2x_1 + x_2 + x_3 = 1$

$x_1 + x_2 - x_3 = 4$

다음 선형 방정식들은 행렬 형태로 다시 쓸 수 있습니다.

$\begin{bmatrix*}[r]
   1 & 1 & 1\\
   2 & 1 & 1\\
   1 & 1 & -1    
\end{bmatrix*}\mathbf{x} = \begin{bmatrix*}[r]
   0\\
   1\\
   4
\end{bmatrix*}$.

$\rightarrow \mathbf{Ax}=b$

- $\begin{bmatrix*}[r]
   1 & 1 & 1\\
   2 & 1 & 1\\
   1 & 1 & -1    
\end{bmatrix*}$는 "네트워크의 입력"입니다.

- $\mathbf{x}$는 "입력이 아니라 훈련되어야 하는 네트워크입니다 !!!"

- $\begin{bmatrix*}[r]
   0\\
   1\\
   4
\end{bmatrix*}$는 "목표값"입니다.

이것은 선형 시스템을 나타내는 표준 행렬 방정식 형태입니다. 이를 풀면, $\mathbf{x}$ 값을 찾는 것은 주어진 입력과 목표값으로 모델을 훈련시키는 것과 같은 의미를 갖습니다.

### Train the network $\mathbf{x}$ so that the output of the network becomes the target

Then, the parameter $\mathbf{x}$ becomes `the solution` !!!

In [4]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn

class network(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1, bias = False)  # input and output is 1 dimension

    def forward(self, x):
        out = self.linear(x)
        return out

inputs = torch.tensor([[1,1,1],[2,1,1], [1,1,-1]], dtype=torch.float32)
target = torch.tensor([0,1,4], dtype=torch.float32).reshape((3,1))

#model = network()
model = nn.Linear(3, 1, bias = False)

criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

num_epochs = 10000
for epoch in range(num_epochs):
    # forward
    out = model(inputs)
    loss = criterion(out, target)
    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 20 == 0:
        print(f'Epoch[{epoch+1}/{num_epochs}], out: {out.data.numpy()[:,0]}, loss: {loss.item():.6f}', end=' ')
        for name, param in model.named_parameters():
            print (name, param.data.numpy(), end=' ')
        print()


Epoch[20/10000], out: [0.55411243 0.2698158  0.02831388], loss: 5.538167 weight [[-0.28104466  0.57827497  0.26036888]] 
Epoch[40/10000], out: [0.618255   0.39588046 0.19402443], loss: 5.077550 weight [[-0.21944396  0.63104224  0.20956856]] 
Epoch[60/10000], out: [0.6716123  0.5050883  0.34937817], loss: 4.674347 weight [[-0.16387808  0.6793352   0.15856552]] 
Epoch[80/10000], out: [0.71556616 0.5995109  0.49532828], loss: 4.318384 weight [[-0.11366188  0.7236289   0.10757246]] 
Epoch[100/10000], out: [0.75133044 0.6809701  0.6327227 ], loss: 4.001612 weight [[-0.06819101  0.76434356  0.05677081]] 
Epoch[120/10000], out: [0.77997106 0.7510683  0.7623163 ], loss: 3.717639 weight [[-0.02693242  0.8018509   0.00631488]] 
Epoch[140/10000], out: [0.80242383 0.81121445 0.8847817 ], loss: 3.461370 weight [[ 0.01058423  0.83647984 -0.04366484]] 
Epoch[160/10000], out: [0.81951004 0.8626478  1.0007187 ], loss: 3.228717 weight [[ 0.04477404  0.86852145 -0.09305864]] 
Epoch[180/10000], out: [0.83

# `Lab: Solve the System of Equations with the following answers using custom dataset and dataloader`
$\mathbf{x} = [0, 0.5, 1, 1, -2]^{T}$

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

import numpy as np
solution = np.array([[0, 0.5, 1,1,-2]]).T
num_samples = 20
inputs = np.random.randn(num_samples, 5)
targets = np.matmul (inputs, solution)


inputs = torch.FloatTensor(inputs)
targets = torch.FloatTensor(targets)

dataset = TensorDataset(inputs, targets) #tuple of inputs and targets
batch_size = 3
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last= True)


In [2]:
class network(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(5, 1, bias = False)  # input and output is 1 dimension

    def forward(self, x):
        out = self.linear(x)
        return out
    
model = network()

criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)


num_epochs = 2000

for epoch in range(num_epochs):    
    for input, target in train_loader:
        pred = model(input)
        loss = criterion(pred, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch[{epoch+1}/{num_epochs}]')
    print(f'    out: {np.around(pred.data.numpy()[:,0],3)}')
    print(f'    target: {np.around(target.reshape(batch_size).data.numpy(), 3)}')
    print(f'    loss: {loss.item():.6f}')
    for name, param in model.named_parameters():
        print (f'    {name}: {np.around(param.data.numpy(),3)}')
    print(f'solution:{solution.T[0]}')

Epoch[1/2000]
    out: [0.198 0.506 0.079]
    target: [1.302 0.92  1.834]
    loss: 1.489969
    linear.weight: [[ 0.18   0.37   0.279  0.231 -0.318]]
solution:[ 0.   0.5  1.   1.  -2. ]
Epoch[2/2000]
    out: [-0.961  0.185  0.134]
    target: [ 0.649  0.65  -2.001]
    loss: 2.455014
    linear.weight: [[ 0.18   0.369  0.285  0.232 -0.328]]
solution:[ 0.   0.5  1.   1.  -2. ]
Epoch[3/2000]
    out: [-0.33  -0.166  0.115]
    target: [-0.642  0.297 -2.001]
    loss: 1.596410
    linear.weight: [[ 0.179  0.367  0.29   0.231 -0.339]]
solution:[ 0.   0.5  1.   1.  -2. ]
Epoch[4/2000]
    out: [ 0.061  0.099 -0.178]
    target: [-1.064  1.834  0.16 ]
    loss: 1.463933
    linear.weight: [[ 0.177  0.365  0.294  0.232 -0.35 ]]
solution:[ 0.   0.5  1.   1.  -2. ]
Epoch[5/2000]
    out: [ 0.888 -1.012  0.086]
    target: [ 3.683 -1.852 -2.001]
    loss: 4.289647
    linear.weight: [[ 0.176  0.363  0.299  0.232 -0.36 ]]
solution:[ 0.   0.5  1.   1.  -2. ]
Epoch[6/2000]
    out: [-0.84  -0.17

In [None]:
class network(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(5, 1, bias=False)  # 입력과 출력이 각각 1 차원입니다.

    def forward(self, x):
        out = self.linear(x)
        return out

model = network()  # 선형 회귀 모델을 초기화합니다.

criterion = nn.MSELoss()  # 평균 제곱 오차 손실 함수를 설정합니다.
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)  # 확률적 경사 하강법을 사용하여 최적화합니다.

num_epochs = 2000  # 에포크 수를 설정합니다.

for epoch in range(num_epochs):
    for input, target in train_loader:  # 데이터로더에서 입력과 목표값을 가져옵니다.
        pred = model(input)  # 모델을 사용하여 예측을 생성합니다.
        loss = criterion(pred, target)  # 손실을 계산합니다.
        optimizer.zero_grad()  # 그래디언트 초기화
        loss.backward()  # 역전파 수행
        optimizer.step()  # 모델 파라미터 업데이트

    print(f'Epoch[{epoch+1}/{num_epochs}]')
    print(f'    out: {np.around(pred.data.numpy()[:,0],3)}')  # 예측 출력
    print(f'    target: {np.around(target.reshape(batch_size).data.numpy(), 3)}')  # 목표값
    print(f'    loss: {loss.item():.6f}')  # 손실
    for name, param in model.named_parameters():
        print (f'    {name}: {np.around(param.data.numpy(),3)}')  # 모델 파라미터
    print(f'solution:{solution.T[0]}')  # 실제 솔루션


In [8]:
target

tensor([[ 1.4847],
        [-1.4357]])