In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext import data, datasets
import dataloader as dl

In [2]:
# 하이퍼파라미터
BATCH_SIZE = 64
lr = 0.001
EPOCHS = 10
USE_CUDA = torch.cuda.is_available()
#DEVICE = torch.device("cpu")
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
print("다음 기기로 학습합니다:", DEVICE)

다음 기기로 학습합니다: cuda


In [None]:
train_loader=dl.load_trainset()
test_loader=dl.load_testset()

In [None]:
class LSTM(nn.Module):
    #def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p=0.2):
    def __init__(self, n_layers, hidden_dim, input_shape, n_classes, dropout_p=0.2):
        super(LSTM, self).__init__()
        print("Building Basic LSTM model...")
        self.n_layers = n_layers 
        #self.embed = nn.Embedding(n_vocab, embed_dim)
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(dropout_p)
        self.lstm = nn.LSTM(input_shape, self.hidden_dim,
                          num_layers=self.n_layers,
                          batch_first=True)
        self.out = nn.Linear(self.hidden_dim, n_classes)

    def forward(self, x):
        #x = self.embed(x)
        h_0 = self._init_state(batch_size=x.size(0))
        c_0 = self._init_state(batch_size=x.size(0))
        x, _ = self.lstm(x, (h_0, c_0))  # [i, b, h]
        h_t = x[:,-1,:]
        self.dropout(h_t)
        logit = self.out(h_t)  # [b, h] -> [b, o]
        return logit
    
    def _init_state(self, batch_size=1):
        weight = next(self.parameters()).data
        return weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()

In [None]:
def train(model, optimizer, train_iter):
    model.train()
    for batch in train_iter:
        x, y = np.array(batch[0][0]), batch[1]
        
        X=[]
        for i in range(10):
            X.append(x[np.random.randint(i*10,(1+i)*10)].reshape(-1))
        
        X=torch.tensor(np.array(X)).unsqueeze(0).to(DEVICE)
        y=y.to(DEVICE)
        
        optimizer.zero_grad()

        logit = model(X)
        loss = F.cross_entropy(logit, y)
        loss.backward()
        optimizer.step()

In [None]:
    """
def train(model, optimizer, data_idx):
    model.train()
    for idx, i in enumerate(data_idx): #suppose no batch
        sample=data[i][...,0].transpose(1,0,2) #shape 300,3,25
        #print(sample.shape)
        
        x=[] #Devide 300frame into 10 sectors and choose 1 frame from each sector
        
        for i in range(10):
            x.append(sample[np.random.randint(i*10,(1+i)*10)].reshape(-1))
        
        X=torch.tensor(np.array(x)).unsqueeze(0).to(DEVICE)
        y=torch.tensor(y_label[idx]).to(DEVICE)
        optimizer.zero_grad()
        logit = model(X)
        loss = F.cross_entropy(logit, y)
        loss.backward()
        optimizer.step()
    """ 
    

In [None]:
def evaluate(model, val_iter):
    #evaluate model
    model.eval()
    corrects, total_loss = 0, 0
    for batch in val_iter:
        x, y = np.array(batch[0][0]), batch[1]
        
        X=[]
        for i in range(10):
            X.append(x[np.random.randint(i*10,(1+i)*10)].reshape(-1))
        
        X=torch.tensor(np.array(X)).unsqueeze(0).to(DEVICE)
        y=y.to(DEVICE)
        
        logit = model(X)
        loss = F.cross_entropy(logit, y, reduction='sum')
        total_loss += loss.item()
        corrects += (logit.max(1)[1].view(y.size()).data == y.data).sum()
    size = len(val_iter.dataset)
    avg_loss = total_loss / size
    avg_accuracy = 100.0 * corrects / size
    return avg_loss, avg_accuracy

In [None]:
    """
def evaluate(model, val_iter):
    #evaluate model
    model.eval()
    corrects, total_loss = 0, 0
    for idx, i in enumerate(test_data_idx): #suppose no batch
        sample=test_data[i][...,0].transpose(1,0,2) #shape 300,3,25
        #print(sample.shape)
        
        x=[] #Devide 300frame into 10 sectors and choose 1 frame from each sector
        
        for i in range(10):
            x.append(sample[np.random.randint(i*10,(1+i)*10)].reshape(-1))
        
        X=torch.tensor(np.array(x)).unsqueeze(0).to(DEVICE)
        y=torch.tensor(test_y_label[idx]).to(DEVICE)
    
        logit = model(X)
        loss = F.cross_entropy(logit, y, reduction='sum')
        total_loss += loss.item()
        corrects += (logit.max(1)[1].view(y.size()).data == y.data).sum()
    size = len(val_iter.dataset)
    avg_loss = total_loss / size
    avg_accuracy = 100.0 * corrects / size
    return avg_loss, avg_accuracy
    """"

In [None]:
model = LSTM(1, 256, 75, 2, 0.2).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
best_val_loss = None
for e in range(1, EPOCHS+1):
    train(model, optimizer, train_loader)
    val_loss, val_accuracy = evaluate(model, test_loader)

    print("[이폭: %d] 검증 오차:%5.2f | 검증 정확도:%5.2f" % (e, val_loss, val_accuracy))
    
    # 검증 오차가 가장 적은 최적의 모델을 저장
    if not best_val_loss or val_loss < best_val_loss:
        if not os.path.isdir("snapshot"):
            os.makedirs("snapshot")
        torch.save(model.state_dict(), './snapshot/actionrecognition.pt')
        best_val_loss = val_loss

In [None]:
#model.load_state_dict(torch.load('./snapshot/txtclassification.pt'))
test_loss, test_acc = evaluate(model, test_loader)
print('테스트 오차: %5.2f | 테스트 정확도: %5.2f' % (test_loss, test_acc))

## Generator

1 * 30-> 1 * 30 * 75

In [3]:
class Discriminator(nn.Module):
    #def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p=0.2):
    def __init__(self, n_layers, hidden_dim, input_shape, n_classes, dropout_p=0.2):
        super(Discriminator, self).__init__()
        print("Building Basic Discriminator model...")
        self.n_layers = n_layers 
        #self.embed = nn.Embedding(n_vocab, embed_dim)
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(dropout_p)
        self.lstm = nn.LSTM(input_shape, self.hidden_dim,
                          num_layers=self.n_layers,
                          batch_first=True)
        self.fc = nn.Linear(self.hidden_dim, n_classes)
        self.out = nn.Softmax()
    def forward(self, x):
        #x = self.embed(x)
        h_0 = self._init_state(batch_size=x.size(0))
        c_0 = self._init_state(batch_size=x.size(0))
        x, _ = self.lstm(x, (h_0, c_0))  # [i, b, h]
        h_t = x[:,-1,:]
        self.dropout(h_t)
        logit = self.fc(h_t)  # [b, h] -> [b, o]
        logit = self.out(logit)
        return logit
    
    def _init_state(self, batch_size=1):
        weight = next(self.parameters()).data
        return weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()

In [4]:
class Generator(nn.Module):
    #def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p=0.2):
    def __init__(self, n_layers, hidden_dim, input_shape, dropout_p=0.2):
        super(Generator, self).__init__()
        print("Building Basic generator model...")
        self.n_layers = n_layers 
        #self.embed = nn.Embedding(n_vocab, embed_dim)
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(dropout_p)
        self.lstm = nn.LSTM(input_shape, self.hidden_dim,
                          num_layers=self.n_layers,
                          batch_first=True)
        #self.out = nn.Linear(self.hidden_dim, n_classes)

    def forward(self, x):
        #x = self.embed(x)
        h_0 = self._init_state(batch_size=x.size(0))
        c_0 = self._init_state(batch_size=x.size(0))
        x, (h, c) = self.lstm(x, (h_0, c_0))  # [i, b, h]
        
        return x
    
    def _init_state(self, batch_size=1):
        weight = next(self.parameters()).data
        return weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()

In [11]:
D = Discriminator(1, 256, 75, 2, 0.2).to(DEVICE)
G = Generator(1, 75, 1, 0.2).to(DEVICE)

d_optimizer = torch.optim.Adam(D.parameters(), lr=lr)
g_optimizer = torch.optim.Adam(G.parameters(), lr=lr)

Building Basic Discriminator model...
Building Basic generator model...


In [6]:
criterion=nn.BCELoss()

In [7]:
input_data=torch.randn(30,1)
input_data=torch.tensor(np.array(input_data)).unsqueeze(0).to(DEVICE)

In [8]:
data_loader=dl.GAN_dataloader()
EPOCHS=25

In [13]:
total_step = len(data_loader)
for epoch in range(EPOCHS):
    for i, data in enumerate(data_loader):
        x = np.array(data[0][0])
        
        X=[]
        for i in range(10):
            X.append(x[np.random.randint(i*10,(1+i)*10)].reshape(-1))
        
        X=torch.tensor(np.array(X)).unsqueeze(0).to(DEVICE)
        
        # '진짜'와 '가짜' 레이블 생성
        real_labels = torch.tensor([[0,1]]).to(torch.float32).to(DEVICE)
        fake_labels = torch.tensor([[1,0]]).to(torch.float32).to(DEVICE)
        
        # 판별자가 진짜 이미지를 진짜로 인식하는 오차를 예산
        outputs = D(X)
        
        d_loss_real = criterion(outputs, real_labels)
        real_score = outputs
        #print(real_score)
        # 무작위 텐서로 가짜 이미지 생성
        z = torch.randn(30,1)
        z = torch.tensor(np.array(z)).unsqueeze(0).to(DEVICE)
        fake_images = G(z)
        
        # 판별자가 가짜 이미지를 가짜로 인식하는 오차를 계산
        outputs = D(fake_images)
        d_loss_fake = criterion(outputs, fake_labels)
        fake_score = outputs
        
        # 진짜와 가짜 이미지를 갖고 낸 오차를 더해서 판별자의 오차 계산
        d_loss = d_loss_real + d_loss_fake

        # 역전파 알고리즘으로 판별자 모델의 학습을 진행
        d_optimizer.zero_grad()
        g_optimizer.zero_grad()
        d_loss.backward()
        d_optimizer.step()
        
        # 생성자가 판별자를 속였는지에 대한 오차를 계산
        fake_images = G(z)
        outputs = D(fake_images)
        g_loss = criterion(outputs, real_labels)
        
        # 역전파 알고리즘으로 생성자 모델의 학습을 진행
        d_optimizer.zero_grad()
        g_optimizer.zero_grad()
        g_loss.backward()
        g_optimizer.step()
        
    # 학습 진행 알아보기
    print('Epoch [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}' 
          .format(epoch, EPOCHS, d_loss.item(), g_loss.item(), 
                  real_score.mean().item(), fake_score.mean().item()))
    if(epoch%25==0):
        torch.save(G.state_dict(), './snapshot/gan_'+str(epoch)+'.pt')

  logit = self.out(logit)


Epoch [0/500], d_loss: 0.0172, g_loss: 6.8150, D(x): 0.50, D(G(z)): 0.50
Epoch [1/500], d_loss: 0.0028, g_loss: 7.3366, D(x): 0.50, D(G(z)): 0.50
Epoch [2/500], d_loss: 0.0034, g_loss: 5.9938, D(x): 0.50, D(G(z)): 0.50
Epoch [3/500], d_loss: 0.0125, g_loss: 4.5129, D(x): 0.50, D(G(z)): 0.50
Epoch [4/500], d_loss: 0.0016, g_loss: 7.2300, D(x): 0.50, D(G(z)): 0.50
Epoch [5/500], d_loss: 0.0480, g_loss: 4.3716, D(x): 0.50, D(G(z)): 0.50
Epoch [6/500], d_loss: 0.0180, g_loss: 4.0489, D(x): 0.50, D(G(z)): 0.50
Epoch [7/500], d_loss: 0.0060, g_loss: 5.9748, D(x): 0.50, D(G(z)): 0.50
Epoch [8/500], d_loss: 0.0005, g_loss: 7.6790, D(x): 0.50, D(G(z)): 0.50
Epoch [9/500], d_loss: 0.0007, g_loss: 8.4321, D(x): 0.50, D(G(z)): 0.50
Epoch [10/500], d_loss: 0.0101, g_loss: 6.0328, D(x): 0.50, D(G(z)): 0.50
Epoch [11/500], d_loss: 0.0052, g_loss: 5.3993, D(x): 0.50, D(G(z)): 0.50
Epoch [12/500], d_loss: 0.0011, g_loss: 6.8971, D(x): 0.50, D(G(z)): 0.50
Epoch [13/500], d_loss: 0.0003, g_loss: 8.3151, 

Epoch [110/500], d_loss: 0.0000, g_loss: 13.5409, D(x): 0.50, D(G(z)): 0.50
Epoch [111/500], d_loss: 0.0000, g_loss: 13.3917, D(x): 0.50, D(G(z)): 0.50
Epoch [112/500], d_loss: 0.0000, g_loss: 14.1644, D(x): 0.50, D(G(z)): 0.50
Epoch [113/500], d_loss: 0.0000, g_loss: 14.5559, D(x): 0.50, D(G(z)): 0.50
Epoch [114/500], d_loss: 0.0000, g_loss: 14.9056, D(x): 0.50, D(G(z)): 0.50
Epoch [115/500], d_loss: 0.0000, g_loss: 13.8424, D(x): 0.50, D(G(z)): 0.50
Epoch [116/500], d_loss: 0.0000, g_loss: 15.9921, D(x): 0.50, D(G(z)): 0.50
Epoch [117/500], d_loss: 0.0000, g_loss: 16.1995, D(x): 0.50, D(G(z)): 0.50
Epoch [118/500], d_loss: 0.0000, g_loss: 16.0746, D(x): 0.50, D(G(z)): 0.50
Epoch [119/500], d_loss: 0.0000, g_loss: 69.8111, D(x): 0.50, D(G(z)): 0.50
Epoch [120/500], d_loss: 0.0000, g_loss: 69.6978, D(x): 0.50, D(G(z)): 0.50
Epoch [121/500], d_loss: 0.0000, g_loss: 69.8941, D(x): 0.50, D(G(z)): 0.50
Epoch [122/500], d_loss: 0.0000, g_loss: 69.8372, D(x): 0.50, D(G(z)): 0.50
Epoch [123/5

KeyboardInterrupt: 