<a href="https://colab.research.google.com/github/jhlee508/nlp-with-pytorch/blob/master/nlp_with_pytorch_week1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 파이토치 기초

### 텐서 만들기

In [None]:
import torch
import numpy as np
torch.manual_seed(2021)

<torch._C.Generator at 0x7fda541119d0>

In [None]:
def describe(x):
    print("타입: {}".format(x.type()))
    print("크기: {}".format(x.shape))
    print("값: \n{}".format(x))

In [None]:
describe(torch.Tensor(2, 3))

타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[-3.8786e+12,  3.0831e-41,  3.7835e-44],
        [ 0.0000e+00,         nan,  3.0831e-41]])


In [None]:
describe(torch.rand(2, 3)) # 균등 분포
describe(torch.randn(2, 3)) # 표준 정규 분포

타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[0.1304, 0.5134, 0.7426],
        [0.7159, 0.5705, 0.1653]])
타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[-0.5708,  0.7409, -1.2991],
        [ 0.1929,  0.4697, -0.0257]])


In [None]:
describe(torch.zeros(2, 3))
describe(torch.ones(2, 3))
x = torch.ones(2, 3)
describe(x.fill_(5))

타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[0., 0., 0.],
        [0., 0., 0.]])
타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[1., 1., 1.],
        [1., 1., 1.]])
타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[5., 5., 5.],
        [5., 5., 5.]])


In [None]:
x = torch.Tensor([[1, 2, 3], [4, 5, 6]])
describe(x)

타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [None]:
npy = np.random.rand(2, 3)
describe(torch.from_numpy(npy))

타입: torch.DoubleTensor
크기: torch.Size([2, 3])
값: 
tensor([[0.0800, 0.2433, 0.3978],
        [0.9422, 0.0528, 0.8170]], dtype=torch.float64)


In [None]:
x = torch.FloatTensor([[1, 2, 3],  
                       [4, 5, 6]])
describe(x)

x = x.long()
describe(x)

x = torch.tensor([[1, 2, 3], 
                  [4, 5, 6]], dtype=torch.int64)
describe(x)

x = x.float() 
describe(x)

타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])
타입: torch.LongTensor
크기: torch.Size([2, 3])
값: 
tensor([[1, 2, 3],
        [4, 5, 6]])
타입: torch.LongTensor
크기: torch.Size([2, 3])
값: 
tensor([[1, 2, 3],
        [4, 5, 6]])
타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [None]:
x = torch.randn(2, 3)
describe(x)

타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[-0.1157,  1.0702,  2.8246],
        [-0.8402, -0.2610, -1.0075]])


In [None]:
describe(torch.add(x, x))

타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[-0.2314,  2.1403,  5.6492],
        [-1.6804, -0.5220, -2.0150]])


In [None]:
describe(x + x)

타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[-0.2314,  2.1403,  5.6492],
        [-1.6804, -0.5220, -2.0150]])


In [None]:
x = torch.arange(6)
describe(x)

x = x.view(2, 3)
describe(x)

타입: torch.LongTensor
크기: torch.Size([6])
값: 
tensor([0, 1, 2, 3, 4, 5])
타입: torch.LongTensor
크기: torch.Size([2, 3])
값: 
tensor([[0, 1, 2],
        [3, 4, 5]])


In [None]:
describe(torch.sum(x, dim=0)) # 행
describe(torch.sum(x, dim=1)) # 열

타입: torch.LongTensor
크기: torch.Size([3])
값: 
tensor([3, 5, 7])
타입: torch.LongTensor
크기: torch.Size([2])
값: 
tensor([ 3, 12])


In [None]:
describe(torch.transpose(x, 0, 1))

타입: torch.LongTensor
크기: torch.Size([3, 2])
값: 
tensor([[0, 3],
        [1, 4],
        [2, 5]])


In [None]:
x = torch.arange(6).view(2, 3)
describe(x)
describe(x[:1, :2])
describe(x[0, 1])

타입: torch.LongTensor
크기: torch.Size([2, 3])
값: 
tensor([[0, 1, 2],
        [3, 4, 5]])
타입: torch.LongTensor
크기: torch.Size([1, 2])
값: 
tensor([[0, 1]])
타입: torch.LongTensor
크기: torch.Size([])
값: 
1


In [None]:
indices = torch.LongTensor([0, 2])
describe(torch.index_select(x, dim=1, index=indices)) # 열에서 해당 인덱스 선택

타입: torch.LongTensor
크기: torch.Size([2, 2])
값: 
tensor([[0, 2],
        [3, 5]])


In [None]:
indices = torch.LongTensor([0, 0])
describe(torch.index_select(x, dim=0, index=indices))

타입: torch.LongTensor
크기: torch.Size([2, 3])
값: 
tensor([[0, 1, 2],
        [0, 1, 2]])


In [None]:
row_indices = torch.arange(2).long()
col_indices = torch.LongTensor([0, 1])
describe(x[row_indices, col_indices])

타입: torch.LongTensor
크기: torch.Size([2])
값: 
tensor([0, 4])


In [None]:
x = torch.arange(6).view(2,3)
describe(x)
describe(torch.cat([x, x], dim=0))
describe(torch.cat([x, x], dim=1))
describe(torch.stack([x, x]))

타입: torch.LongTensor
크기: torch.Size([2, 3])
값: 
tensor([[0, 1, 2],
        [3, 4, 5]])
타입: torch.LongTensor
크기: torch.Size([4, 3])
값: 
tensor([[0, 1, 2],
        [3, 4, 5],
        [0, 1, 2],
        [3, 4, 5]])
타입: torch.LongTensor
크기: torch.Size([2, 6])
값: 
tensor([[0, 1, 2, 0, 1, 2],
        [3, 4, 5, 3, 4, 5]])
타입: torch.LongTensor
크기: torch.Size([2, 2, 3])
값: 
tensor([[[0, 1, 2],
         [3, 4, 5]],

        [[0, 1, 2],
         [3, 4, 5]]])


In [None]:
x1 = torch.arange(6).view(2, 3).float()
describe(x1)

x2 = torch.ones(3, 2)
x2[:, 1] += 1
describe(x2)

describe(torch.mm(x1, x2)) # 행렬곱

타입: torch.FloatTensor
크기: torch.Size([2, 3])
값: 
tensor([[0., 1., 2.],
        [3., 4., 5.]])
타입: torch.FloatTensor
크기: torch.Size([3, 2])
값: 
tensor([[1., 2.],
        [1., 2.],
        [1., 2.]])
타입: torch.FloatTensor
크기: torch.Size([2, 2])
값: 
tensor([[ 3.,  6.],
        [12., 24.]])


In [None]:
print(torch.cuda.is_available())

True


In [None]:
x = torch.rand(3,3)
describe(x)

타입: torch.FloatTensor
크기: torch.Size([3, 3])
값: 
tensor([[0.4724, 0.5814, 0.1358],
        [0.5071, 0.0114, 0.7892],
        [0.1523, 0.1646, 0.9006]])


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
x = torch.rand(3, 3).to(device)
describe(x)
print(x.device)

타입: torch.cuda.FloatTensor
크기: torch.Size([3, 3])
값: 
tensor([[0.3421, 0.1729, 0.0969],
        [0.7246, 0.0545, 0.8811],
        [0.6582, 0.9686, 0.3049]], device='cuda:0')
cuda:0


In [None]:
cpu_device = torch.device("cpu")

In [None]:
# 에러 발생!
y = torch.rand(3, 3).cuda()
print(y.device)
x + y

cuda:0


tensor([[0.4079, 0.6250, 0.9952],
        [0.9865, 0.0917, 1.7677],
        [1.5116, 1.8746, 1.1260]], device='cuda:0')

In [None]:
y = y.to(cpu_device)
x = x.to(cpu_device)
x + y

tensor([[0.4079, 0.6250, 0.9952],
        [0.9865, 0.0917, 1.7677],
        [1.5116, 1.8746, 1.1260]])

In [None]:
if torch.cuda.is_available(): # GPU가 있을 경우에
    a = torch.rand(3,3).to(device='cuda:0') #  CUDA 텐서
    print(a)
    
    b = torch.rand(3,3).cuda()
    print(b)

    print(a + b)

    a = a.cuda() # 에러 발생
    print(a + b)

tensor([[0.4788, 0.6704, 0.4302],
        [0.8689, 0.1824, 0.5595],
        [0.7591, 0.8993, 0.5691]], device='cuda:0')
tensor([[0.5340, 0.9363, 0.1453],
        [0.2639, 0.2078, 0.9786],
        [0.5227, 0.0941, 0.5573]], device='cuda:0')
tensor([[1.0128, 1.6066, 0.5755],
        [1.1327, 0.3902, 1.5381],
        [1.2819, 0.9934, 1.1264]], device='cuda:0')
tensor([[1.0128, 1.6066, 0.5755],
        [1.1327, 0.3902, 1.5381],
        [1.2819, 0.9934, 1.1264]], device='cuda:0')


## Ch.2

In [None]:
import spacy
from spacy import displacy

In [None]:
nlp = spacy.load('en')

In [None]:
doc = nlp("Mary slapped the green witch.")

In [None]:
doc2 = nlp(u"John war born in Chicken, Alaska, and studies at Cranberry Lemon University.")

In [None]:
type(doc)

spacy.tokens.doc.Doc

In [None]:
type(doc2)

spacy.tokens.doc.Doc

In [None]:
import spacy
from spacy import displacy

text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."

nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
displacy.serve(doc, style="ent")


Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [None]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("This is a sentence.")
displacy.serve(doc, style="dep")


Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

