In [1]:
import torch
from torch import nn
import numpy as np

In [2]:
!nvidia-smi

Thu Apr 14 19:04:45 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 3090    Off  | 00000000:01:00.0 Off |                  N/A |
| 36%   30C    P8    19W / 350W |  17155MiB / 24265MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

## Tensor의 저장 및 불러오기

In [3]:
x = torch.tensor([1,2,4,8])
y = torch.tensor([0,3,6,9,12])
x,y

(tensor([1, 2, 4, 8]), tensor([ 0,  3,  6,  9, 12]))

In [4]:
my_dict = {'d1':x, 'd2':y}
torch.save(my_dict,'my_data')

In [5]:
d = torch.load('my_data')
print("d:",d)
print("d['d2']:",d['d2'])

d: {'d1': tensor([1, 2, 4, 8]), 'd2': tensor([ 0,  3,  6,  9, 12])}
d['d2']: tensor([ 0,  3,  6,  9, 12])


## Network 파라미터 확인 및 직접 변경

In [6]:
net1 = nn.Sequential(nn.Linear(3,3,bias=False),nn.Softmax())
net1

Sequential(
  (0): Linear(in_features=3, out_features=3, bias=False)
  (1): Softmax(dim=None)
)

In [7]:
net1[0].weight.data

tensor([[-0.1694,  0.0433, -0.4909],
        [-0.5456,  0.0588,  0.2438],
        [-0.0405,  0.2435,  0.0562]])

In [8]:
net1.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.1694,  0.0433, -0.4909],
                      [-0.5456,  0.0588,  0.2438],
                      [-0.0405,  0.2435,  0.0562]]))])

In [9]:
x = torch.tensor([3.,3.,3.])

In [10]:
y = torch.matmul(net1[0].weight.data,x).softmax(0)
print(y)

tensor([0.0558, 0.1713, 0.7729])


In [11]:
y = net1(x)
print(y)

tensor([0.0558, 0.1713, 0.7729], grad_fn=<SoftmaxBackward0>)


  input = module(input)


In [12]:
net1[0].weight.data = torch.eye(3)
net1[0].weight.data

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [13]:
y = net1(x)
print(y)

tensor([0.3333, 0.3333, 0.3333], grad_fn=<SoftmaxBackward0>)


## Network 파라미터들의 저장 및 불러오기

In [14]:
net1.state_dict()

OrderedDict([('0.weight',
              tensor([[1., 0., 0.],
                      [0., 1., 0.],
                      [0., 0., 1.]]))])

In [15]:
torch.save(net1.state_dict(),'net1.params')

In [16]:
net2 = nn.Sequential(nn.Linear(3,3,bias=False),nn.Softmax())
net2.state_dict()

OrderedDict([('0.weight',
              tensor([[ 0.4436,  0.4721, -0.4767],
                      [ 0.5730, -0.1541,  0.1248],
                      [-0.3381, -0.3861, -0.1668]]))])

In [17]:
y = net2(x)
print(y)

tensor([0.4189, 0.5734, 0.0077], grad_fn=<SoftmaxBackward0>)


In [18]:
net1_params = torch.load('net1.params')
net1_params

OrderedDict([('0.weight',
              tensor([[1., 0., 0.],
                      [0., 1., 0.],
                      [0., 0., 1.]]))])

In [19]:
net2.load_state_dict(net1_params)
net2.state_dict()

OrderedDict([('0.weight',
              tensor([[1., 0., 0.],
                      [0., 1., 0.],
                      [0., 0., 1.]]))])

In [20]:
y = net2(x)
print(y)

tensor([0.3333, 0.3333, 0.3333], grad_fn=<SoftmaxBackward0>)


## GPU활용하기

In [21]:
print("활용가능 NVIDIA gpu 개수는?",torch.cuda.device_count())

활용가능 NVIDIA gpu 개수는? 1


In [22]:
if torch.cuda.is_available():
    device = "cuda:0"
else:
    device = "cpu"
print("device:",device)

device: cuda:0


In [23]:
print(net1[0].weight.data.device)
print(x.device)

cpu
cpu


In [24]:
x_gpu = x.to(device)
net1(x_gpu)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat2 in method wrapper_mm)

In [26]:
net1.to("cuda:0")
net1(x_gpu)

tensor([0.3333, 0.3333, 0.3333], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [27]:
net1(x)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat2 in method wrapper_mm)

In [28]:
net1(x.to(device))

tensor([0.3333, 0.3333, 0.3333], device='cuda:0', grad_fn=<SoftmaxBackward0>)

## GPU를 통한 deep network 학습

In [29]:
import time

In [30]:
deepnet = nn.Sequential(nn.Linear(1024,1024),nn.ReLU(),
                       nn.Linear(1024,2048),nn.ReLU(),
                       nn.Linear(2048,4096),nn.ReLU(),
                       nn.Linear(4096,4096),nn.ReLU(),                        
                       nn.Linear(4096,2048),nn.ReLU(),
                       nn.Linear(2048,1024),nn.ReLU(),
                        nn.Linear(1024,2))
deepnet

Sequential(
  (0): Linear(in_features=1024, out_features=1024, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1024, out_features=2048, bias=True)
  (3): ReLU()
  (4): Linear(in_features=2048, out_features=4096, bias=True)
  (5): ReLU()
  (6): Linear(in_features=4096, out_features=4096, bias=True)
  (7): ReLU()
  (8): Linear(in_features=4096, out_features=2048, bias=True)
  (9): ReLU()
  (10): Linear(in_features=2048, out_features=1024, bias=True)
  (11): ReLU()
  (12): Linear(in_features=1024, out_features=2, bias=True)
)

In [31]:
loss = nn.CrossEntropyLoss(reduction='mean')
alg = torch.optim.SGD(deepnet.parameters(),0.01, weight_decay=0.01)

In [32]:
num_epochs = 16
s1 = time.time()
deepnet.to("cpu")
for epoch in range(num_epochs):
    X = torch.randn((4096,1024))
    y = torch.randint(0,2,(4096,))
    y_pred = deepnet(X)
    l = loss(y_pred,y)
    l.backward()
    alg.step()
    alg.zero_grad()
s2 = time.time()

print(f'calculation time: {np.round(s2-s1)} seconds')

calculation time: 40.0 seconds


In [33]:
print(device)

cuda:0


In [34]:
num_epochs = 16
s1 = time.time()
deepnet.to(device)
for epoch in range(num_epochs):
    X = torch.randn((4096,1024)).to(device)
    y = torch.randint(0,2,(4096,)).to("cuda:0")
    y_pred = deepnet(X)
    l = loss(y_pred,y)
    l.backward()
    alg.step()
    alg.zero_grad()
s2 = time.time()

print(f'calculation time: {np.round(s2-s1)} seconds')

calculation time: 1.0 seconds
