In [1]:
#원조

In [9]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets

'''
STEP 1: LOADING DATASET
'''
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

'''
STEP 2: MAKING DATASET ITERABLE
'''

batch_size = 100
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

'''
STEP 3: CREATE MODEL CLASS
'''

class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim
        
        # Number of hidden layers
        self.layer_dim = layer_dim
        
        # Building your RNN
        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, feature_dim)
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='tanh')
        
        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
        
        # One time step
        # We need to detach the hidden state to prevent exploding/vanishing gradients
        # This is part of truncated backpropagation through time (BPTT)
        
        print('x.shape')
        print(x.shape)
        
        out, hn = self.rnn(x, h0.detach())
        
        # Index hidden state of last time step
        # out.size() --> 100, 28, 100
        # out[:, -1, :] --> 100, 100 --> just want last time step hidden states! 
        out = self.fc(out[:, -1, :]) 
        # out.size() --> 100, 10
        return out

'''
STEP 4: INSTANTIATE MODEL CLASS
'''

input_dim = 28
hidden_dim = 100
layer_dim = 2  # ONLY CHANGE IS HERE FROM ONE LAYER TO TWO LAYER
output_dim = 10

model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)

print('RNNMODEL: input, hidden, layer, output')
print(input_dim, hidden_dim, layer_dim, output_dim)

# JUST PRINTING MODEL & PARAMETERS 
# print(model)
# print(len(list(model.parameters())))
# for i in range(len(list(model.parameters()))):
#     print(list(model.parameters())[i].size())

'''
STEP 5: INSTANTIATE LOSS CLASS
'''
criterion = nn.CrossEntropyLoss()

'''
STEP 6: INSTANTIATE OPTIMIZER CLASS
'''
learning_rate = 0.1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  

'''
STEP 7: TRAIN THE MODEL
'''

# Number of steps to unroll
seq_dim = 28  

iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        # Load images as tensors with gradient accumulation abilities
        images = images.view(-1, seq_dim, input_dim).requires_grad_()
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        # outputs.size() --> 100, 10
        
        print('input.shape')
        print(images.shape)
        
        print('output.shape')
        print(labels.shape)
        
        outputs = model(images)
        break
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimizer.step()
        
#         break
        
        iter += 1
        
        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                # Resize images
                images = images.view(-1, seq_dim, input_dim)
                
                # Forward pass only to get logits/output
                outputs = model(images)
                
                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                # Total number of labels
                total += labels.size(0)
                
                # Total correct predictions
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))
            
           
            
            ####제일 잘됨

            
#RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
#RNNModel(28, 100, 2, 10)
#input : 배치크기100 * Number of steps to unroll 28 * 인풋크기 28
#output: 배치크기100


RNNMODEL: input, hidden, layer, output
28 100 2 10
input.shape
torch.Size([100, 28, 28])
output.shape
torch.Size([100])
x.shape
torch.Size([100, 28, 28])
input.shape
torch.Size([100, 28, 28])
output.shape
torch.Size([100])
x.shape
torch.Size([100, 28, 28])
input.shape
torch.Size([100, 28, 28])
output.shape
torch.Size([100])
x.shape
torch.Size([100, 28, 28])
input.shape
torch.Size([100, 28, 28])
output.shape
torch.Size([100])
x.shape
torch.Size([100, 28, 28])
input.shape
torch.Size([100, 28, 28])
output.shape
torch.Size([100])
x.shape
torch.Size([100, 28, 28])


In [6]:
import pickle
x_train=pickle.load(open('x_train.pickle','rb'))
y_train=pickle.load(open('y_train.pickle','rb'))
x_test=pickle.load(open('x_test.pickle','rb'))
y_test=pickle.load(open('y_test.pickle','rb'))
x_train

tensor([[337156,   4169,  33656,  ...,  17724,  75793, 110849],
        [  8926,   1967,  64902,  ...,   2538,   3290,  64920],
        [352286, 352287,  60784,  ..., 352294,  14465, 352295],
        ...,
        [   182,      0,      0,  ...,      0,      0,      0],
        [ 37794,      0,      0,  ...,      0,      0,      0],
        [303159,      0,      0,  ...,      0,      0,      0]])

In [10]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets

'''
STEP 1: LOADING DATASET
'''
# train_dataset = dsets.MNIST(root='./data', 
#                             train=True, 
#                             transform=transforms.ToTensor(),
#                             download=True)

# test_dataset = dsets.MNIST(root='./data', 
#                            train=False, 
#                            transform=transforms.ToTensor())

train_dataset=list(zip(x_train,y_train))
test_dataset=list(zip(x_test,y_test))


for i in train_dataset:
    print('train_dataset[0]')
    print('\ntype')
    print(type(i))
    print('\nlen(train_dataset[0])')
    print(len(i))
    print('\ntrain_dataset[0][0]')
    print(i[0].shape)
    print('\ntrain_dataset[0][1]')
    print(i[1])
    break

'''
STEP 2: MAKING DATASET ITERABLE
'''

batch_size = 100
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
print('\nepoch개수')
print(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)


for i in train_loader:
    print('\ntrain_loader[0]')
    print('\ntrain_loader[0][0]')
    print(i[0].shape)
    print('\ntrain_loader[0][1]')
    print(i[1].shape)
    break
        

train_dataset[0]

type
<class 'tuple'>

len(train_dataset[0])
2

train_dataset[0][0]
torch.Size([30])

train_dataset[0][1]
tensor(0)

epoch개수
2

train_loader[0]

train_loader[0][0]
torch.Size([100, 30])

train_loader[0][1]
torch.Size([100])


In [17]:

'''
STEP 3: CREATE MODEL CLASS
'''

class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(RNNModel, self).__init__()
        # Hidden dimensions
        self.hidden_dim = hidden_dim
        
        # Number of hidden layers
        self.layer_dim = layer_dim
        
        # Building your RNN
        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, feature_dim)
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='tanh')
        
        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        
    
        print('x.shape')
        print(x.shape)
        
        out, hn = self.rnn(x)
        
        # Index hidden state of last time step
        # out.size() --> 100, 28, 100
        # out[:, -1, :] --> 100, 100 --> just want last time step hidden states! 
        out = self.fc(out[:, -1, :]) 
        # out.size() --> 100, 10
        return out

'''
STEP 4: INSTANTIATE MODEL CLASS
'''

input_dim = 30
hidden_dim = 100
layer_dim = 2  # ONLY CHANGE IS HERE FROM ONE LAYER TO TWO LAYER
output_dim = 10

model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)

print('RNNMODEL: input, hidden, layer, output')
print(input_dim, hidden_dim, layer_dim, output_dim)

# JUST PRINTING MODEL & PARAMETERS 
# print(model)
# print(len(list(model.parameters())))
# for i in range(len(list(model.parameters()))):
#     print(list(model.parameters())[i].size())

'''
STEP 5: INSTANTIATE LOSS CLASS
'''
criterion = nn.CrossEntropyLoss()

'''
STEP 6: INSTANTIATE OPTIMIZER CLASS
'''
learning_rate = 0.1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  

'''
STEP 7: TRAIN THE MODEL
'''

# Number of steps to unroll
seq_dim = 10  

iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        # Load images as tensors with gradient accumulation abilities
        images = images.view(-1, seq_dim, input_dim)
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        # outputs.size() --> 100, 10
        
        print('input.shape')
        print(images.shape)
        
        print('output.shape')
        print(labels.shape)
        
        outputs = model(images.long())
        break
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimizer.step()
        
#         break
        
        iter += 1
        
        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                # Resize images
                images = images.view(-1, seq_dim, input_dim)
                
                # Forward pass only to get logits/output
                outputs = model(images)
                
                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                # Total number of labels
                total += labels.size(0)
                
                # Total correct predictions
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))
            
           
            
            ####제일 잘됨

            
#RNNModel(input_dim, hidden_dim, layer_dim, output_dim)
#RNNModel(28, 100, 2, 10)
#input : 배치크기100 * Number of steps to unroll 28 * 인풋크기 28
#output: 배치크기100


RNNMODEL: input, hidden, layer, output
30 100 2 10
input.shape
torch.Size([10, 10, 30])
output.shape
torch.Size([100])
x.shape
torch.Size([10, 10, 30])


RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 'mat2' in call to _th_mm

In [8]:
'''
STEP 3: CREATE MODEL CLASS
'''

class RNNModel(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        
        super(RNNModel, self).__init__()
        
        self.input_dim=input_dim
        self.embedding_dim=embedding_dim
        self.hidden_dim=hidden_dim
        self.output_dim=output_dim
        
        self.embedding=nn.Embedding(input_dim,embedding_dim)
        
        self.rnn=nn.RNN(embedding_dim,hidden_dim)
        
        self.fc=nn.Linear(hidden_dim,output_dim)
        

    def forward(self, x):
        
        print('\ninput')
        print(self.input_dim)
        print(x.shape)
        
        embedded=self.embedding(x)
        print('\nemb')
        print(self.embedding_dim)
        print(embedded.shape)
        
        output,hidden=self.rnn(embedded)
        
        print('oh')
        print(output.shape)
        print(hidden.shape)
        
        output=self.fc(hidden.squeeze(0))
        
        print('\noutput')
        print(output.shape)
        
        return output
        
    
    def forward(self, text):

        #text = [sent len, batch size]
        
        embedded = self.embedding(text)
        
        #embedded = [sent len, batch size, emb dim]
        
        output, hidden = self.rnn(embedded)
        
        #output = [sent len, batch size, hid dim]
        #hidden = [1, batch size, hid dim]
        
        assert torch.equal(output[-1,:,:], hidden.squeeze(0))
        
        return self.fc(hidden.squeeze(0))

    
'''
STEP 4: INSTANTIATE MODEL CLASS
'''
input_dim = 30
hidden_dim = 100
layer_dim = 2  # ONLY CHANGE IS HERE FROM ONE LAYER TO TWO LAYER
output_dim = 10

model = RNNModel(input_dim, hidden_dim, layer_dim, output_dim)

# JUST PRINTING MODEL & PARAMETERS 

# for i in range(len(list(model.parameters()))):
#     print(list(model.parameters())[i].size())

'''
STEP 5: INSTANTIATE LOSS CLASS
'''
criterion = nn.CrossEntropyLoss()

'''
STEP 6: INSTANTIATE OPTIMIZER CLASS
'''
learning_rate = 0.1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  

'''
STEP 7: TRAIN THE MODEL
'''

# Number of steps to unroll
seq_dim = 28  

iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        print('train_loader')
        print(images.shape)
        print(labels.shape)
        break
        
        
        # Load images as tensors with gradient accumulation abilities
#         images = images.requires_grad_()
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        # outputs.size() --> 100, 10
        outputs = model(images)
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimizer.step()
        
        iter += 1
        
        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                # Resize images
                images = images
                
                # Forward pass only to get logits/output
                outputs = model(images)
                
                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)
                
                # Total number of labels
                total += labels.size(0)
                
                # Total correct predictions
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))
            
            
            ####제일 잘됨

train_loader
torch.Size([100, 30])
torch.Size([100])
train_loader
torch.Size([100, 30])
torch.Size([100])


In [24]:
x_train.shape
y_train.shape


torch.Size([149995])

In [9]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        print('batch[0].shape')
        print(batch[0].shape)
        
        predictions = model(batch[0])
        print(predictions)
        predictions=predictions.squeeze(1)
        
        loss = criterion(predictions, batch.label)
        
        acc = binary_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            predictions = model(batch).squeeze(1)
            
            loss = criterion(predictions, batch.label)
            
            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


    
import torch.nn as nn

class RNNModel(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        
        super().__init__()
        
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        
        self.rnn = nn.RNN(embedding_dim, hidden_dim)
        
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, text):

        #text = [sent len, batch size]
        
        embedded = self.embedding(text)
        
        #embedded = [sent len, batch size, emb dim]
        
        output, hidden = self.rnn(embedded)
        
        #output = [sent len, batch size, hid dim]
        #hidden = [1, batch size, hid dim]
        
        assert torch.equal(output[-1,:,:], hidden.squeeze(0))
        
        return self.fc(hidden.squeeze(0))
    
    
    

INPUT_DIM = 30
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1

model = RNNModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr=1e-3)

criterion = nn.BCEWithLogitsLoss()

N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_loader, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, test_loader, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut1-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

batch[0].shape
torch.Size([100, 30])


RuntimeError: index out of range: Tried to access index 138114 out of table with 29 rows. at /pytorch/aten/src/TH/generic/THTensorEvenMoreMath.cpp:418

In [25]:
class RNN(nn.Module):
    def __init__(self, vocab_size, embed_size, pad_index, hid_size, n_layers, dropout, n_category):
        super(RNN, self).__init__()
        self.vocab_size = vocab_size             # 고유 토큰의 갯수
        self.embed_size = embed_size             # 임베딩 차원의 크기
        self.pad_index = pad_index               # 패딩 토큰 (dummy)
        
        self.embed = nn.Embedding(
            num_embeddings=vocab_size, 
            embedding_dim=embed_size, 
            padding_idx=self.pad_index
        )
        
        self.hid_size = hid_size           # RNN layer의 뉴런의 갯수
        self.n_layers = n_layers           # RNN layer의 수
        self.drouput = dropout             # 드롭아웃 비율
        self.n_category = n_category       # 카테고리 갯수
        
        self.rnn = nn.RNN(embed_size, hid_size, n_layers, batch_first=True)
        self.lin = nn.Linear(input_dim, n_category)

        
        self.outputs = []
        
    def init_hidden(self, batch_size):
        # the weights are of the form (nb_layers, batch_size, hid_size(n_neuron))
        hidden = Variable(torch.randn(self.n_layers, batch_size, self.hid_size))
        return hidden    
    
    def forward(self, x, x_sequence_length):
        # init h randomly
        batch_size = x.size(0)
        self.h = self.init_hidden(batch_size)
        
        # embedding
        x = self.embed(x) # sequence_length(max_len), batch_size, embed_size
        
        # packing for rnn
        x = torch.nn.utils.rnn.pack_padded_sequence(x, x_sequence_length, batch_first=True)
        
        # RNN
        output, self.h = self.rnn(x, self.h)
        
        # unpack
        x, _ = torch.nn.utils.rnn.pad_packed_sequence(output, batch_first=True)
        
        # cbow
        x = x.sum(dim = 1)  # flat하게 펼쳐서 fully-connet하는 것도 가능
        
        # fully-connect
        logit = self.lin(x)
        return logit