In [None]:
import torch
from torch import nn
from torch.nn import LSTM
from transformers import BertPreTrainedModel, BertConfig, BertModel
import torch.nn.functional as F

def init_weights(m):
    if isinstance(m, nn.Linear):    # m이 Linear 라면
        torch.nn.init.xavier_uniform_(m.weight) # Linear 객체의 weight를 사비에르 초기화 시키고
        m.bias.data.fill_(7)    # 편향은 7로 초기화 시킨다.
        
class Model_A(BertPreTrainedModel):
    def __init__(self, bert_model_config:BertConfig):
        super(Model_A, self).__init__(bert_model_config)
        self.bert = BertModel(bert_model_config)
        self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prop)
        self.lstm = LSTM(bert_model_config.hidden_size, bert_model_config.hidden_size)
        self.mlp = nn.Sequential(
            nn.Dropout(p=bert_model_config.hidden_dropout_prop),
            nn.Linear(bert_model_config.hidden_size,1)
        )
        self.w_omega = nn.Parameter(torch.Tensor(bert_model_config.hidden_size,bert_model_config.hidden_size))
        self.b_omega = nn.Parameter(torch.Tensor(1,bert_model_config.hidden_size))
        self.u_omega = nn.Parameter(torch.Tensor(bert_model_config.hidden_size,1))
        
        nn.init.uniform_(self.w_omega, -0.1,0.1)    # nn.Parameter시 맞춰준다. -0.1,0.1 사이로 맞춘다.
        nn.init.uniform_(self.u_omega,-0.1,0.1)
        nn.init.uniform_(self.b_omega,-0,1,0.1)
        self.mlp.apply(init_weights)
        
    def forward(self, document_batch:torch.Tensor, device='cpu', bert_batch_size=0):
        bert_output = torch.zeros(size=(document_batch.shape[0],
                                        min(document_batch.shape[1],
                                            bert_batch_size),
                                        self.bert.config.hidden_size), dtype=torch.float, device=device)
        for doc_id in range(document_batch.shape[0]):
            all_bert_output_info = self.bert(document_batch[doc_id][:self.bert_batch_size,0],
                                             token_type_ids=document_batch[doc_id][:self.bert_batch_size,1],
                                             attention_mask = document_batch[doc_id][:self.bert_batch_size,2])
            bert_token_max = torch.max(all_bert_output_info[0],1)   # max pooling
            bert_output[doc_id][:self.bert_batch_size] = torch.cat((bert_token_max.values, all_bert_output_info[1]),1)
        
        
            

In [5]:
import torch
import torch.nn as nn
w = torch.empty(3,5)
nn.init.uniform_(w, 1,3)    # torch변수명, lower value, upper value
w

tensor([[1.5289, 1.4437, 2.6601, 1.7284, 2.0371],
        [2.9423, 1.0405, 1.7881, 1.5383, 1.3288],
        [1.3950, 1.9025, 2.2378, 1.1102, 2.3316]])

In [8]:
# CNN 모델 필사
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 특징 추출 계층
        self.layer = nn.Sequential(
            nn.Conv2d(1,16,3,padding=1),
            nn.ReLU(),
            nn.Conv2d(16,32,3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(32,64,3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        # 분류 계층
        self.fc_layer = nn.Sequential(
            nn.Linear(64*7*7,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )
        
        for m in self.modules():
            if isinstance(m,nn.Conv2d):
                m.weight.data.normal_(0.0,0.02) # 평균 0, 편차 0.02 
                m.bias.data.fill_(0) # 0
            
                nn.init.xavier_normal_(m.weight.data)    # 사비에르 노멀 초기화
                m.bias.data.fill_(0) # 0
                
                nn.init.kaiming_normal_(m.weight.data)   # 카밍 노멀 초기화
                m.bias.data.fill_(0) # 0
                
            if isinstance(m, nn.Linear):
                m.weight.data.normal_(0.0,0.02) # 평균 0, 표준편차 0.02
                m.bias.data.fill_(0)
                
                nn.init.xavier_normal_(m.weight.data)
                m.bias.data.fill_(0)
                
                nn.init.kaiming_normal_(m.weight.data)
                m.bias.data.fill_(0)
    
    def forward(self,x):
        x = self.layer(x)
        x = x.view(batch_size, -1)
        x = self.fc_layer(x)
        return x

In [11]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
learning_rate= 0.01
model = CNN().to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [12]:
class RNN(nn.Module):
    def __init__(self, data_size, hidden_size, output_size):
        super(RNN,self).__init__()
        
        self.hidden_size = hidden_size
        input_size = data_size + hidden_size
        
        self.i2h = nn.Linear(input_size,hidden_size)
        self.h2o = nn.Linear(hidden_size, output_size)
        
    def forward(self, data, last_hidden):
        input = torch.cat((data, last_hidden),1)
        hidden = self.i2h(input)
        output = self.h2o(hidden)
        return hidden, output

In [16]:
loss_fn = nn.MSELoss()
rnn = RNN(50,20,10)

batch_size = 10
Timesteps = 10

batch = torch.randn(batch_size, 50)
hidden = torch.zeros(batch_size, 20)
target = torch.zeros(batch_size, 10)

total_loss = 0
for t in range(Timesteps):
    hidden, output = rnn(batch,hidden)
    loss = loss_fn(output,target)
    print(loss)
    total_loss += loss 
total_loss.backward()

tensor(0.0893, grad_fn=<MseLossBackward0>)
tensor(0.1122, grad_fn=<MseLossBackward0>)
tensor(0.1161, grad_fn=<MseLossBackward0>)
tensor(0.1161, grad_fn=<MseLossBackward0>)
tensor(0.1159, grad_fn=<MseLossBackward0>)
tensor(0.1159, grad_fn=<MseLossBackward0>)
tensor(0.1159, grad_fn=<MseLossBackward0>)
tensor(0.1159, grad_fn=<MseLossBackward0>)
tensor(0.1159, grad_fn=<MseLossBackward0>)
tensor(0.1159, grad_fn=<MseLossBackward0>)


In [17]:
class RNN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(RNN,self).__init__()
        
        self.hidden_size = hidden_size
        self.i2o = nn.Linear(input_size+hidden_size,output_size)
        self.i2h = nn.Linear(input_size+hidden_size,hidden_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self,input,hidden):
        combined = torch.cat((input,hidden),1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(1,self.hidden_size)

In [19]:
n_hidden = 128
n_letters = 100
n_categories = 10
rnn = RNN(n_letters,n_hidden,n_categories)

In [23]:
import torch.optim as optim

lr = 0.005
criterion = nn.NLLLoss()    # LogSoftmax이므로 NLL 손실함수가 적합하다고 한다.
optimizer = optim.SGD(rnn.parameters(), lr=lr)  # 어떤 파라미터들을 어떤 학습률만큼 갱신시킬 것인가?

def train(category_tensor, line_tensor):    # category_tensor:정답 , line_tensor:입력 데이터
    hidden = rnn.initHidden()
    
    rnn.zero_grad()
    
    for i in range(line_tensor.size()[0]):  
        output, hidden = rnn(line_tensor[i],hidden)
    loss = criterion(output,category_tensor)
    loss.backward()
    optimizer.step()    
    
    # for p in rnn.parameters():
        # SGD 갱신
        # p.data.add_(p.grad.data, alpha= -lr)    # 파라미터값에 기울기에 alpha만큼 곱한 값을 더해준다.
        
    return output, loss.item()

In [None]:
import torch.nn as nn
class RNN(nn.Module):
    def __init__(self,data_size, hidden_size,output_size):
        super(RNN, self).__init__()
        
        
        input_size = data_size + hidden_size
        layer1 = nn.Linear(input_size,hidden_size)
        layer2 = nn.Linear(input_size,output_size)
        
    
    def forward(self,data,last_hidden):
        input = torch.cat((data,last_hidden),1)
        hidden_layer = torch.zeros(hidden_size,hidden_size)
    
        

In [5]:
import torch.nn as nn
class RNN(nn.Module):
    def __init__(self, data_size, hidden_size, output_size):
        super(RNN,self).__init__()
        
        self.hidden_size = hidden_size
        input_size = data_size + hidden_size
        
        self.i2h = nn.Linear(input_size,hidden_size)
        self.h2o = nn.Linear(hidden_size, output_size)
        
    def forward(self, data, last_hidden):
        input = torch.cat((data, last_hidden),1)
        hidden = self.i2h(input)
        output = self.h2o(hidden)
        return hidden, output

In [35]:
a= torch.tensor([1,2,5])
b = torch.tensor([3,4,5])
torch.nn.functional.margin_ranking_loss(a,b,target=a.sign())

tensor(1.3333)

In [45]:
# margin ranking loss
y = torch.tensor([3,4,2,5])
yhat = torch.tensor([3,5,1,5])
print((y-y[1]).sign(), (yhat-yhat[1]).sign())
mask = (y-y[1]).sign() != (yhat-yhat[1]).sign()        # 다른 것만
mask


tensor([-1,  0, -1,  1]) tensor([-1,  0, -1,  0])


tensor([False, False, False,  True])

In [32]:
a= torch.tensor([0,0,2],dtype=torch.float)
b = torch.tensor([3,4,5],dtype=torch.float)
torch.nn.functional.mse_loss(a,b)

tensor(11.3333)

In [26]:
a= torch.tensor([1, -0.1, 0.5,0])
a.sign()

tensor([ 1., -1.,  1.,  0.])

In [18]:
a= torch.tensor([[1,2,3],[1,2,3]])
a.shape[0]

2

In [34]:
a= torch.tensor([0,0,2],dtype=torch.float)
b = torch.tensor([3,4,5],dtype=torch.float)
torch.nn.CosineEmbeddingLoss(a,b)

RuntimeError: Boolean value of Tensor with more than one value is ambiguous