In [35]:
import torch
import torch.nn as nn

#input_size = 66  # 66첫 번째 LSTM 층의 입력 크기
sequence_length = 10  # 입력 시퀀스 길이
batch_size = 30  # 배치 크기
lstm_depth = 2  # LSTM 층의 깊이
model_dimension = 75  # 모델의 hidden state 차원

# hidden은 사실 초기 은닉 상태(hidden state)와 초기 셀 상태(cell state)로 구성된 튜플
# bidirectional LSTM을 사용하므로, hidden state와 cell state의 차원은 lstm_depth * 2

In [36]:
# random data for input
inputs = torch.randn(sequence_length,batch_size,model_dimension)

hidden = (torch.randn(lstm_depth,batch_size,model_dimension), torch.randn(lstm_depth,batch_size,model_dimension))
print(inputs[0].shape, hidden[0].shape) 

torch.Size([30, 75]) torch.Size([2, 30, 75])


In [37]:
class Model3(nn.Module):
    def __init__(self):
        super(Model3, self).__init__()
        self.recurrent_layer = nn.LSTM(hidden_size=100, input_size=75, bidirectional=True)
        self.nonLin = nn.BatchNorm1d(30)
        self.recurrent_layer2 = nn.LSTM(hidden_size=100, input_size=200, bidirectional=True) # biLSTM이라 input 2배로 늘림
        self.nonLin2 = nn.BatchNorm1d(30)
        self.conv = nn.Conv1d(30, 36, 7, 1)
        self.activation = nn.ReLU()  # or Leaky ReLU activation
        #self.dropout = nn.Dropout(0.5)
        self.classify_layer = nn.Linear(194, 5) # LSTM 출력 차원: 100, 두 번째 nn.BatchNorm1d 출력 차원: 35, nn.Conv1d 출력 차원: 36, : 100 + 35 + 36 = 171

    def forward(self, input, h_t_1=None, c_t_1=None):
        rnn_outputs, (hn, cn) = self.recurrent_layer(input)
        lin1 = self.nonLin(rnn_outputs)
        rnn_outputs2, (hn2, cn2) = self.recurrent_layer2(lin1)
        lin2 = self.nonLin2(rnn_outputs2)
        conv = self.conv(lin2)
        activation = self.activation(conv)

        logits = self.classify_layer(activation[:,-1])
        return logits


In [24]:
# fusion 시도할 경우
model_fp32 = Model3()

# model must be set to eval for fusion to work
model_fp32.eval()

model_fp32.qconfig = torch.ao.quantization.get_default_qat_qconfig('x86')

# fuse the activations to preceding layers, where applicable
# this needs to be done manually depending on the model architecture
model_fp32_fused = torch.ao.quantization.fuse_modules(model_fp32,
    [['conv', 'activation']])
print(model_fp32_fused)


Model3(
  (recurrent_layer): LSTM(75, 100, bidirectional=True)
  (nonLin): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (recurrent_layer2): LSTM(200, 100, bidirectional=True)
  (nonLin2): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): ConvReLU1d(
    (0): Conv1d(30, 36, kernel_size=(7,), stride=(1,))
    (1): ReLU()
  )
  (activation): Identity()
  (classify_layer): Linear(in_features=194, out_features=5, bias=True)
)


In [38]:
filepath = r'D:\2023\2023_1_1\2023-RnE\save_by_loss\goodmodel3.pth'

float_lstm = torch.load(filepath)

quantized_lstm = torch.quantization.quantize_dynamic(
    float_lstm, {nn.Linear}, dtype=torch.qint8
)

print('Here is the floating point version of this module:')
print(float_lstm)
print('')
print('and now the quantized version:')
print(quantized_lstm)


Here is the floating point version of this module:
Model3(
  (recurrent_layer): LSTM(75, 100, bidirectional=True)
  (nonLin): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (recurrent_layer2): LSTM(200, 100, bidirectional=True)
  (nonLin2): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Conv1d(30, 36, kernel_size=(7,), stride=(1,))
  (activation): ReLU()
  (classify_layer): Linear(in_features=194, out_features=5, bias=True)
)

and now the quantized version:
Model3(
  (recurrent_layer): LSTM(75, 100, bidirectional=True)
  (nonLin): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (recurrent_layer2): LSTM(200, 100, bidirectional=True)
  (nonLin2): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv): Conv1d(30, 36, kernel_size=(7,), stride=(1,))
  (activation): ReLU()
  (classify_layer): DynamicQuantizedLinear(in_features=194, out_features=5, 

In [39]:
import os

def print_size_of_model(model, label=""):
    torch.save(model.state_dict(), "temp.p")
    size=os.path.getsize("temp.p")
    print("model: ",label,' \t','Size (KB):', size/1e3)
    os.remove('temp.p')
    return size

# 규모 비교하기
f=print_size_of_model(float_lstm,"fp32")
q=print_size_of_model(quantized_lstm,"int8")
print("{0:.2f} times smaller".format(f/q))

model:  fp32  	 Size (KB): 1575.766
model:  int8  	 Size (KB): 1573.706
1.00 times smaller


In [25]:
input_params = next(float_lstm.parameters())
print(input_params.size())

torch.Size([400, 75])


In [40]:
# 응답시간 살펴보기
# 성능 비교하기
#print("Floating point FP32")
#%timeit float_lstm.forward(inputs, hidden)

print("Quantized INT8")
%timeit quantized_lstm.forward(inputs,hidden)

Quantized INT8
7.17 ms ± 302 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [41]:
# 정확도 살펴보기
# float 모델
logits1 = float_lstm(inputs, hidden)
mag1 = torch.mean(abs(logits1)).item()
print('mean absolute value of output tensor values in the FP32 model is {0:.5f} '.format(mag1))

# 양자화된 모델
logits2 = quantized_lstm(inputs, hidden)
mag2 = torch.mean(abs(logits2)).item()
print('mean absolute value of output tensor values in the INT8 model is {0:.5f}'.format(mag2))

# 결과 비교하기
mag3 = torch.mean(abs(logits1-logits2)).item()
print('mean absolute value of the difference between the output tensors is {0:.5f} or {1:.2f} percent'.format(mag3,mag3/mag1*100))

mean absolute value of output tensor values in the FP32 model is 1.34337 
mean absolute value of output tensor values in the INT8 model is 1.34120
mean absolute value of the difference between the output tensors is 0.02205 or 1.64 percent
