# PyTorch model to onnx 

In [1]:
import os
import time
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.models as models
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
# from torchvision.datasets.utils import download_url
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchvision.utils import make_grid
from sklearn.metrics import *

## Training dataset load

In [None]:
# Convert images to Tensor ( Channel X Height X Width)
transform=transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.4911, 0.4820, 0.4467),(0.2022, 0.1993, 0.2009))
                             ]) 
                            

# Download training data
train=torchvision.datasets.CIFAR100(root='./data',train=True,download=True,transform=transform)

# Download test data                             
test = torchvision.datasets.CIFAR100(root='./data',train=False,download=True,transform=transform)

# Define validation ratio 
validation_ratio= 0.2
batch_size = 32
train_data,validation_data=torch.utils.data.random_split(train,[int((1-validation_ratio)*len(train)), int((validation_ratio)*len(train))])
print(len(train_data))
print(len(validation_data))

train_loader = DataLoader(train_data, batch_size,pin_memory=True)
val_loader = DataLoader(validation_data, batch_size,pin_memory=True)
test_loader = torch.utils.data.DataLoader(test,batch_size=100,shuffle=False,pin_memory=True)

## Device check and load model

In [None]:
# Check if your system has cuda gpu or only cpu

def check_device():
    if torch.cuda.is_available:
        return torch.device('cuda')
    else:
        return torch.device('cpu')

# Function to transfer from CPU to GPU
def move_device(tensor, device):
    
    # Move all individual tensors from cpu to gpu
    if isinstance(tensor, (list,tuple)):
        return [move_device(element, device) for element in tensor]
    return tensor.to(device, non_blocking=True) 

# Execute transfer from CPU to GPU for each device
class DeviceDataLoader():
    
    # Define Constructor
    def __init__(self, dataloader, device):
        self.dl = dataloader
        self.device = device
        
    def __iter__(self):
        
       # Transfer each batch and return
        for i in self.dl: 
            yield move_device(i, self.device)

    def __len__(self):
        
        # Return the number of batches
        return len(self.dl)


device = check_device()

# Move all the tensors to GPU
train_dl = DeviceDataLoader(train_loader, device)
valid_dl = DeviceDataLoader(val_loader, device)
test_dl = DeviceDataLoader(test_loader, device)

In [None]:
class convnet_no_dropout(nn.Module):
    # Constructor
    def __init__(self):
        super(convnet_no_dropout, self).__init__()
        '''
         Convolutional layers
         Conv2d (input channels, output channels, kernel_size, padding) 

        Each Sequential layer has :
              1. A Convolutional Layer
              2. Relu activation function
              3. Maxpool layer
        '''

        self.conv_layer_1 = torch.nn.Sequential(
            # Convoolutional layer
            nn.Conv2d(in_channels=3, out_channels=16,kernel_size= 3,stride=1, padding=1),
            
            # Activation function
            nn.ReLU(),

            # Max pooling layer
            torch.nn.MaxPool2d(kernel_size=2)
        )
        
        self.conv_layer_2 = torch.nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32,kernel_size= 3,stride=1, padding=1),
            nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2)
        )


        self.conv_layer_3 = torch.nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64,kernel_size= 3,stride=1, padding=1),
            nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2)
        )
        
        self.conv_layer_4 = torch.nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128,kernel_size= 3,stride=1, padding=1),
            nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2)
        )
    
        
        # Fully Connected layers
        self.hidden_layer = nn.Linear(128*2*2, 206)
        self.output_layer = nn.Linear(206, 100)
        
    def forward(self, ip):

        # Calling all the convolutional layers
        output = self.conv_layer_1(ip)
        output = self.conv_layer_2(output)
        output = self.conv_layer_3(output)
        output = self.conv_layer_4(output)
        
        # Flattening 
        output = output.view(-1, 128*2*2)
        
        # Call fully connected layer
        output = self.hidden_layer(output)
        
        output=self.output_layer(output)
   
        return output

model1 = convnet_no_dropout()
print(model1)

In [None]:
def accuracy(predicted,labels):
    pred, predclassid=torch.max(predicted,dim=1)
    return torch.tensor(torch.sum(predclassid==labels).item()/len(predicted))


def evaluate(model1,dl,loss_func):
    model1.eval()
    batch_losses, batch_accs=[],[]
    for images,labels in valid_dl:
        #start loop
        with torch.no_grad():
            predicted=model1(images)
        batch_losses.append(loss_func(predicted,labels))
        batch_accs.append(accuracy(predicted,labels))
    epoch_avg_loss=torch.stack(batch_losses).mean().item()
    epoch_avg_acc=torch.stack(batch_accs).mean().item()
    return epoch_avg_loss,epoch_avg_acc

def train(model1,train_dl,valid_dl,epochs, max_lr, loss_func,optim):
    
    # Normal optimizer
    #optimizer=optim(model1.parameters(), max_lr)
    
    # Applying L2 Regularization
    #optimizer=optim(model1.parameters(), max_lr,weight_decay=1e-5)
    
    # For SGD
    optimizer=optim(model1.parameters(), max_lr, momentum=0.9,weight_decay=1e-5)
    
    '''
    Learning Rate Scheduler
    '''
    scheduler=torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr,epochs*len(train_dl))
#     scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=5,verbose=True)
#     scheduler=torch.optim.lr_scheduler.ExponentialLR(optimizer,  gamma=0.9)
    
    results=[]
    for epoch in range(epochs):
        model1.train()
        train_losses=[]
        train_batch_accs=[]
        lrs=[]

        for images, labels in train_dl:
            predicted=model1(images)
            loss=loss_func(predicted,labels)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
          # keep track of learning rate
            lrs.append(optimizer.param_groups[0]['lr'])
            train_batch_accs.append(accuracy(predicted,labels))
    
        scheduler.step()
        epoch_train_acc=torch.stack(train_batch_accs).mean().item()
        epoch_train_loss=torch.stack(train_losses).mean().item()
        epoch_avg_loss,epoch_avg_acc=evaluate(model1,valid_dl,loss_func)
        
        results.append({'avg_valid_loss': epoch_avg_loss,
                        'avg_val_acc': epoch_avg_acc,
                        'avg_train_loss':epoch_train_loss,
                        'avg_train_acc':epoch_train_acc,
                        'lrs':lrs})
        
        print('Number of epochs:', epoch,'|', 
              'Validation loss :',epoch_avg_loss, ' |','Training loss :'
              ,epoch_train_loss,' |  '
              ,'Training accuracy:', epoch_train_acc
              , 'validation accuracy :',epoch_avg_acc)
    return results


model1=move_device(model1,device)
epochs = 5

'''
Learning Rates
'''
max_lr1 = 1e-1
max_lr2 = 1e-2
max_lr3 = 1e-3
max_lr4 = 1e-4

loss_func=nn.functional.cross_entropy

'''
Optimizers
'''
#optim=torch.optim.Adam
optim=torch.optim.SGD

'''
Train function call
'''
results1= train(model1,train_dl,valid_dl,epochs, max_lr1, loss_func,optim)
results2= train(model1,train_dl,valid_dl,epochs, max_lr2, loss_func,optim)
results3= train(model1,train_dl,valid_dl,epochs, max_lr3, loss_func,optim)
results4= train(model1,train_dl,valid_dl,epochs, max_lr4, loss_func,optim)

In [None]:
model1

In [None]:
torch_model = model1

In [None]:
torch_model.eval()

In [None]:
torch.save(model1, "E:/RESEARCH/torch_cifar100")

## Convert the model into onnx format

In [None]:
import io
import torch.onnx

In [None]:
x = torch.randn(16, 3, 3, 3, requires_grad=True)

In [None]:
x.shape

In [None]:
train_loader

In [None]:
torch.onnx.export(torch_model, 
                  x,                          #모델 입력값(튜플 또는 여러 입력값들도 가능)
                  f = "E:/RESEARCH/torch_onnx.onnx", #실행될 모델, 모델저장경로
                  export_params = True,       #모델 파일 안에 학습된 모델 가중치를 저장할지의 여부
                  opset_version = 10,         #모델을 변환할 때 사용할 ONNX의 버전
                  do_constant_folding = True, #최적화시 상수폴딩을 사용할지의 여부
                  input_names = ['input'],    #모델의 입력값을 가리키는 이름
                  output_names = ['output'],  #모델의 출력값을 가리키는 이름
                  dynamic_axes = {'input' : {0 : 'batch_size'}, 'output' : {0 : 'batch_size'}}
                 )

# Is this really working?

In [None]:
import torch
import onnx
# import onnx_tf
import torchvision

In [None]:
dummy_input = torch.randn(10, 3, 224, 224, device='cuda')
model = torchvision.models.alexnet(pretrained=True).cuda()

In [None]:
model

In [None]:
dummy_input.shape

In [None]:
input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ]
output_names = [ "output1" ]

In [None]:
torch.onnx.export(model, dummy_input, "E:/RESEARCH/alexnet.onnx", verbose=True, input_names=input_names, output_names=output_names)

In [None]:
# Load the ONNX model
# onnx_model = onnx.load("E:/RESEARCH/alexnet.onnx")
onnx_model = onnx.load("E:/RESEARCH/YOLOtiny.onnx")

# Check that the IR is well formed
onnx.checker.check_model(onnx_model)

# Print a human readable representation of the graph
onnx.helper.printable_graph(onnx_model.graph)

## Onnx to torch?

In [None]:
from onnx2torch import convert

# Implementation Issues

* ML model을 바로 tflite 모델로 추출하려면 tflite-model-maker 필요한데 사실상 사용이 불가능.
* 결국 우회루트 및 확장성을 위해서 ML model(Tensorflow, PyTorch) -> onnx -> tflite 로 거쳐서 가야함.
* 우선 tensor Keras model에서 onnx 로 가는 "keras2onnx" 라이브러리는 환경이 구데기. (python 3.5-3.8, tensorflow 1.x/2.0-2.2 가능)
* 시도하다가 결국 PyTorch model -> onnx -> tflite 로 가는 방법을 선택.
* PyTorch model 짜고 학습하고 onnx 파일로 추출하는것 까지는 얼추 가능함.
* 근데 또 onnx -> tflite 로 가려고 하니 onnx-tf 를 사용해야 하기에 tensorflow 설치 (참고로 tensorflow>=2.8.0 가능)
* 이번에는 protobuf 를 3.20.0 이하로 낮추라고 함. 그래서 protobuf==3.19.0 으로 낮춰서 설치.
* 그랬더니 또 onnx 가 protobuf>=3.20.2 의 환경을 갖추어야 한다고 함. + tensorboard2.9.1 이 환경이 안맞다고 한다.
* 검색해보니 pytorch 버전을 낮추면 될 수 도 있다는 글도 있고...