# Warming Up - Combined Multiple Models


이 노트북은 독립적으로 모델1, 모델2를 생성을 하고, 이 두개의 모델의 추론 결과를 한개의 통합된 모델에서 추론하는 예제 입니다. 이를 위해서 다음과 같은 작업을 합니다.

- 첫번재 Mnist 모델 생성 및 훈련
- 두번째 Mnist 모델 생성 및 훈련
- 첫번재, 두번째의 모델을 한개의 모델로 통합
- 통합된 모델에서 각각 모델의 추론 결과를 얻음


---

### 참조: 
- 딥러닝으로 리뷰에서 제품 속성 정보 추출하기
    * http://blog.hwahae.co.kr/all/tech/tech-tech/5967/
- PyTorch Quick Start
    - https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html

# 0. 환경 셋업

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

# 1. 데이터 준비

In [2]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [3]:
train_batch_size = 64
test_batch_size = 100

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=train_batch_size)
test_dataloader = DataLoader(test_data, batch_size=test_batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([100, 1, 28, 28])
Shape of y: torch.Size([100]) torch.int64


# 2. 첫번째 및 두번째 모델  준비

## 2.1 두개의 모델 정의 및 생성

In [4]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model_01 = NeuralNetwork().to(device)
print(model_01)

Using cuda device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
model_02 = NeuralNetwork().to(device)
print(model_02)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


## 2.2. 훈련 준비 작업
- Loss() 정의
- 옵티아이저 정의
- 훈련, 테스트 함수 정의

In [7]:
def create_loss_optimizer(model):
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
    
    return loss_fn, optimizer


In [8]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.to(device)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
    return model

In [9]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

## 2.3. 첫번째 모델 훈련 및 예측

In [10]:
loss_fn , optimizer = create_loss_optimizer(model_01)

epochs = 1
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    model_01 = train(train_dataloader, model_01, loss_fn, optimizer)
    test(test_dataloader, model_01, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.302132  [    0/60000]
loss: 2.289945  [ 6400/60000]
loss: 2.266256  [12800/60000]
loss: 2.263689  [19200/60000]
loss: 2.238970  [25600/60000]
loss: 2.215589  [32000/60000]
loss: 2.228728  [38400/60000]
loss: 2.196063  [44800/60000]
loss: 2.193371  [51200/60000]
loss: 2.154253  [57600/60000]
Test Error: 
 Accuracy: 38.5%, Avg loss: 2.151891 

Done!


### 2.3.1 모델 저장

In [11]:
def save_model(model, i):
    torch.save(model.state_dict(), f"model_0{i}.pth")
    print(f"Saved PyTorch Model State to model_0{i}.pth")
    
save_model(model_01, i=1)    

Saved PyTorch Model State to model_01.pth


### 2.3.2 모델 로딩

In [12]:
def load_model(model_path,i):
    model = NeuralNetwork()
    model.load_state_dict(torch.load(model_path))
    model.to(device)
    
    return model
    
model_path = "model_01.pth"    
model_01 = load_model(model_path,i=1)    

### 2.3.3. 예측

In [13]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]


def predict(model):

    model.eval()
    X, y = test_data[0][0], test_data[0][1]
    X = X.to(device)
    with torch.no_grad():
        pred = model(X)
        predicted, actual = classes[pred[0].argmax(0)], classes[y]
        print(f'Predicted: "{predicted}", Actual: "{actual}"')
        


In [14]:
predict(model_01)        

Predicted: "Ankle boot", Actual: "Ankle boot"


## 2.4 두 번째 모델 훈련 및 예측

In [15]:
loss_fn , optimizer = create_loss_optimizer(model_02)

epochs = 3
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    model_02 = train(train_dataloader, model_02, loss_fn, optimizer)
    test(test_dataloader, model_02, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.298704  [    0/60000]
loss: 2.292320  [ 6400/60000]
loss: 2.266799  [12800/60000]
loss: 2.265103  [19200/60000]
loss: 2.254033  [25600/60000]
loss: 2.208532  [32000/60000]
loss: 2.227392  [38400/60000]
loss: 2.183111  [44800/60000]
loss: 2.189458  [51200/60000]
loss: 2.147223  [57600/60000]
Test Error: 
 Accuracy: 36.2%, Avg loss: 2.141000 

Epoch 2
-------------------------------
loss: 2.154752  [    0/60000]
loss: 2.148218  [ 6400/60000]
loss: 2.082286  [12800/60000]
loss: 2.097595  [19200/60000]
loss: 2.059641  [25600/60000]
loss: 1.988723  [32000/60000]
loss: 2.019038  [38400/60000]
loss: 1.933569  [44800/60000]
loss: 1.944044  [51200/60000]
loss: 1.859702  [57600/60000]
Test Error: 
 Accuracy: 56.4%, Avg loss: 1.858879 

Epoch 3
-------------------------------
loss: 1.898810  [    0/60000]
loss: 1.870333  [ 6400/60000]
loss: 1.743841  [12800/60000]
loss: 1.780634  [19200/60000]
loss: 1.692805  [25600/60000]
loss: 1.639441  [32000/600

In [16]:
save_model(model_02, i=2)    
model_path = "model_02.pth"    
model_02 = load_model(model_path,i=2)    

Saved PyTorch Model State to model_02.pth


# 3. 두 개의 모델을 한개의 모델로 통합

## 3.1. 두개의모델을 통합 및 네트워크 보기

In [17]:
# Define model
class CombinedNeuralNetwork(nn.Module):
    def __init__(self, base_model, add_model):
        super(CombinedNeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()        
        self.base_model = base_model
        self.add_model = add_model

    def forward(self, x):
        x = self.flatten(x)
        x_base = self.base_model(x)
        x_add  = self.add_model(x)

        return x_base, x_add

CombinedModel = CombinedNeuralNetwork(model_01, model_02).to(device)
print(CombinedModel)

CombinedNeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (base_model): NeuralNetwork(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear_relu_stack): Sequential(
      (0): Linear(in_features=784, out_features=512, bias=True)
      (1): ReLU()
      (2): Linear(in_features=512, out_features=512, bias=True)
      (3): ReLU()
      (4): Linear(in_features=512, out_features=10, bias=True)
    )
  )
  (add_model): NeuralNetwork(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear_relu_stack): Sequential(
      (0): Linear(in_features=784, out_features=512, bias=True)
      (1): ReLU()
      (2): Linear(in_features=512, out_features=512, bias=True)
      (3): ReLU()
      (4): Linear(in_features=512, out_features=10, bias=True)
    )
  )
)


## 3.2. 통합된 모델을 예측 해보기

In [18]:
import numpy as np
def predict(dataloader, model, i, test_batch_size):
    '''
    싱글 모델의 첫번째 배치만 평가 함.
    '''
    
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            
            ground_truth = y
            print("Ground_Truth: \n", ground_truth, "\n")
            
            pred = model(X)
            print(f"From model_0{i+1} - Predicted Label:")                
            print(pred.argmax(1))

            
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
            break

    correct /=  test_batch_size
    print(f"From model_0{i} Accuracy: {(100*correct):>0.2f}% \n")



def predict_c(dataloader, model, test_batch_size):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            print("X : ", X.shape)
            probs = model(X)
            
            print("Combined Model - probs shape: ", np.array(probs).shape)
            #print("probs \n", probs)
            
            ground_truth = y
            print("Ground_Truth: \n", ground_truth, "\n")



            for i, pred in enumerate(probs):
                print(f"From model_0{i+1} - Predicted Label:")                
                print(pred.argmax(1))
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()
                correct /= test_batch_size

                print(f"From model_0{i+1} Accuracy: {(100*correct):>0.2f}% \n")
                
                correct= 0
                
            break
                
predict_c(test_dataloader, CombinedModel, test_batch_size)    

X :  torch.Size([100, 1, 28, 28])
Combined Model - probs shape:  (2,)
Ground_Truth: 
 tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5, 1, 1, 2, 3, 9, 8, 7, 0,
        2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2, 0, 6, 5, 3, 6, 7, 1, 8,
        0, 1, 4, 2], device='cuda:0') 

From model_01 - Predicted Label:
tensor([9, 4, 1, 1, 4, 4, 4, 4, 8, 7, 4, 9, 9, 3, 4, 1, 4, 4, 8, 0, 4, 9, 9, 9,
        1, 4, 4, 0, 9, 4, 8, 4, 1, 4, 4, 4, 8, 8, 8, 9, 0, 1, 4, 9, 4, 9, 4, 1,
        4, 4, 4, 4, 8, 4, 4, 4, 8, 4, 8, 0, 8, 9, 9, 9, 1, 1, 4, 0, 9, 8, 9, 4,
        4, 4, 4, 4, 1, 4, 9, 4, 1, 8, 9, 9, 9, 0, 4, 4, 0, 4, 9, 4, 4, 9, 1, 9,
        4, 1, 4, 4], device='cuda:0')
From model_01 Accuracy: 41.00% 

From model_02 - Predicted Label:
tensor([9, 2, 1, 1, 2, 1, 4, 4, 7, 7, 4, 9, 7, 3, 4, 1, 2, 2, 8, 0, 2, 7, 7, 9,
        1, 4, 4, 

In [19]:
predict(test_dataloader, model_02, 2, test_batch_size)    

Ground_Truth: 
 tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5, 1, 1, 2, 3, 9, 8, 7, 0,
        2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2, 0, 6, 5, 3, 6, 7, 1, 8,
        0, 1, 4, 2], device='cuda:0') 

From model_03 - Predicted Label:
tensor([9, 2, 1, 1, 2, 1, 4, 4, 7, 7, 4, 9, 7, 3, 4, 1, 2, 2, 8, 0, 2, 7, 7, 9,
        1, 4, 4, 3, 9, 0, 8, 0, 3, 0, 8, 0, 7, 7, 7, 9, 0, 1, 0, 9, 4, 9, 2, 1,
        4, 4, 2, 2, 7, 2, 4, 2, 8, 4, 8, 0, 7, 7, 8, 7, 1, 1, 3, 1, 9, 8, 7, 0,
        2, 0, 4, 1, 1, 2, 8, 4, 1, 8, 9, 9, 7, 0, 3, 4, 0, 2, 7, 3, 4, 7, 1, 8,
        0, 1, 4, 2], device='cuda:0')
From model_02 Accuracy: 64.00% 



# 4. 세 번째 모델을 추가 하기

## 4.1. 세번째 모델 생성 및 훈련

In [20]:
model_03 = NeuralNetwork().to(device)
print(model_03)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [21]:
loss_fn , optimizer = create_loss_optimizer(model_03)

epochs = 1
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    model_03 = train(train_dataloader, model_03, loss_fn, optimizer)
    test(test_dataloader, model_03, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.305949  [    0/60000]
loss: 2.298587  [ 6400/60000]
loss: 2.277089  [12800/60000]
loss: 2.269028  [19200/60000]
loss: 2.261537  [25600/60000]
loss: 2.230896  [32000/60000]
loss: 2.242368  [38400/60000]
loss: 2.206961  [44800/60000]
loss: 2.200840  [51200/60000]
loss: 2.171412  [57600/60000]
Test Error: 
 Accuracy: 42.0%, Avg loss: 2.165270 

Done!


## 4.2. 기존의 CombinedModel 에 세번째 모델 추가 하기
- 모델 네트워크가 아래와 같이 구성 됨
    - x = Flatten(x) (x 는 입력)
    - x_base_model = CombinedNeuralNetwork(x)
        - model_01
        - model_02
    - x_add_model = NeuralNetwork(x)
        - model_03
    - return x_base_model, x_add_model

In [22]:
CombinedModel_3 = CombinedNeuralNetwork(CombinedModel, model_03).to(device)
print(CombinedModel_3)

CombinedNeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (base_model): CombinedNeuralNetwork(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (base_model): NeuralNetwork(
      (flatten): Flatten(start_dim=1, end_dim=-1)
      (linear_relu_stack): Sequential(
        (0): Linear(in_features=784, out_features=512, bias=True)
        (1): ReLU()
        (2): Linear(in_features=512, out_features=512, bias=True)
        (3): ReLU()
        (4): Linear(in_features=512, out_features=10, bias=True)
      )
    )
    (add_model): NeuralNetwork(
      (flatten): Flatten(start_dim=1, end_dim=-1)
      (linear_relu_stack): Sequential(
        (0): Linear(in_features=784, out_features=512, bias=True)
        (1): ReLU()
        (2): Linear(in_features=512, out_features=512, bias=True)
        (3): ReLU()
        (4): Linear(in_features=512, out_features=10, bias=True)
      )
    )
  )
  (add_model): NeuralNetwork(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear_relu_

## 4.3. 통합된 모델 (세 개의 모델들) 의 추론 테스트

In [23]:

def predict_c2(dataloader, model, test_batch_size):
    def depth(l):
        if isinstance(l, list):
            return 1 + max(depth(item) for item in l)
        elif isinstance(l, tuple):
            return 1 + max(depth(item) for item in l)

        else:
            return 0

    def unflatten_tuple(t, depth):
        e_list = []
        while True:
            if depth ==0:
                e_list.append(x)
                break
            x, y = t
            e_list.append(y)

            t = x
            #print("x: ", x)

            depth -= 1

        e_list.reverse()

        return e_list
    
    
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            print("X : ", X.shape)
            probs = model(X)
            
            print("Combined Model - probs shape: ", np.array(probs).shape)
            #print("probs \n", probs)
            
            ground_truth = y
            print("Ground_Truth: \n", ground_truth, "\n")
            
            depth = depth(probs)    
            probs_list = unflatten_tuple(probs, depth)            
            #print("probs_list: \n", probs_list)
            
            for i, pred in enumerate(probs_list):
                print(f"From model_0{i+1} - Predicted Label:")                
                print(pred.argmax(1))
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()
                correct /= test_batch_size

                print(f"From model_0{i+1} Accuracy: {(100*correct):>0.2f}% \n")
                
                correct= 0
                
            break



In [24]:
predict_c2(test_dataloader, CombinedModel_3, test_batch_size)    

X :  torch.Size([100, 1, 28, 28])
Combined Model - probs shape:  (2,)
Ground_Truth: 
 tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5, 1, 1, 2, 3, 9, 8, 7, 0,
        2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2, 0, 6, 5, 3, 6, 7, 1, 8,
        0, 1, 4, 2], device='cuda:0') 

From model_01 - Predicted Label:
tensor([9, 4, 1, 1, 4, 4, 4, 4, 8, 7, 4, 9, 9, 3, 4, 1, 4, 4, 8, 0, 4, 9, 9, 9,
        1, 4, 4, 0, 9, 4, 8, 4, 1, 4, 4, 4, 8, 8, 8, 9, 0, 1, 4, 9, 4, 9, 4, 1,
        4, 4, 4, 4, 8, 4, 4, 4, 8, 4, 8, 0, 8, 9, 9, 9, 1, 1, 4, 0, 9, 8, 9, 4,
        4, 4, 4, 4, 1, 4, 9, 4, 1, 8, 9, 9, 9, 0, 4, 4, 0, 4, 9, 4, 4, 9, 1, 9,
        4, 1, 4, 4], device='cuda:0')
From model_01 Accuracy: 41.00% 

From model_02 - Predicted Label:
tensor([9, 2, 1, 1, 2, 1, 4, 4, 7, 7, 4, 9, 7, 3, 4, 1, 2, 2, 8, 0, 2, 7, 7, 9,
        1, 4, 4, 

# 커널 리스타팅

In [28]:
import IPython

IPython.Application.instance().kernel.do_shutdown(True)