# Test loading RNN weights from file

In [1]:
%load_ext autoreload
%autoreload 2

In [6]:
import sys, os
ROOT = "../"
sys.path.append(ROOT)

import numpy as np
import matplotlib.pyplot as plt
import scipy.constants as const
import scipy
import types
import torch

from scipy.io import wavfile
import librosa.display

from IPython.core.display import HTML
import IPython.display as ipd

# Import all my libraries
import utils.lib_io as lib_io
import utils.lib_commons as lib_commons
import utils.lib_datasets as lib_datasets
import utils.lib_augment as lib_augment
import utils.lib_ml as lib_ml
import utils.lib_rnn as lib_rnn
from utils.lib_rnn import RNN


In [8]:
# Set arguments
args = types.SimpleNamespace()

args.input_size = 12
args.hidden_size = 64
args.num_layers = 5
args.classes_txt = "../config/classes_kaggle.names" 
args.classes_txt = "../config/classes.names" 
args.num_classes = len(lib_io.read_list(args.classes_txt)) # read from "config/classes.names"
args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

load_model_from="../models/kaggle_accu_914/model_025.ckpt"

In [9]:
def load_weights(model, weights, PRINT=False):
    # Load weights into model.
    # If param's name is different, raise error.
    # If param's size is different, skip this param.
    # see: https://discuss.pytorch.org/t/how-to-load-part-of-pre-trained-model/1113/2
    
    for i, (name, param) in enumerate(weights.items()):
        model_state = model.state_dict()
        
        if name not in model_state:
            assert 0, "Wrong weights file."
            
        model_shape = model_state[name].shape
        if model_shape != param.shape:
            if PRINT:
                print(f"\nSize of {name} layer is different between model and weights. Not copy parameters.")
                print(f"\tModel shape = {model_shape}, weights' shape = {param.shape}.")
            continue

        model_state[name].copy_(param)

In [10]:
# Create model
# model = lib_rnn.create_RNN_model(args, load_model_from)

device = args.device
args.num_classes = len(lib_io.read_list(args.classes_txt)) # read from "config/classes.names"

model = RNN(args.input_size, args.hidden_size, args.num_layers, args.num_classes, device).to(device)
print("Create a RNN model")

if load_model_from: # load model
    print("Load parameters from", load_model_from)
    weights = torch.load(load_model_from)
    load_weights(model, weights, PRINT=True)

Create a RNN model
Load parameters from ../models/kaggle_accu_914/model_025.ckpt

Size of fc.weight layer is different between model and weights. Not copy parameters.
	Model shape = torch.Size([11, 64]), weights' shape = torch.Size([35, 64]).

Size of fc.bias layer is different between model and weights. Not copy parameters.
	Model shape = torch.Size([11]), weights' shape = torch.Size([35]).


# Fix weights

In [14]:
model.state_dict()["lstm.weight_ih_l0"]

tensor([[ 0.0229, -1.1894,  0.3005,  ..., -1.2080, -0.2036, -0.5264],
        [ 0.2233,  0.7289,  0.8038,  ...,  0.1909, -0.1121,  0.1137],
        [-0.2408,  0.7406,  0.6178,  ...,  1.5159,  0.2505,  0.4425],
        ...,
        [-0.0213, -0.3206, -0.8492,  ..., -0.1701, -0.9032, -0.3206],
        [-0.3085, -0.5769,  0.4461,  ..., -0.5440, -0.5557,  0.3436],
        [ 0.1111,  0.0483,  0.3182,  ..., -0.4189,  0.5426,  1.1522]],
       device='cuda:0')

In [19]:
def fix_weights_except_fc(model):
    not_fix = "fc"
    for name, param in model.state_dict().items():
        if not_fix in name:
            continue
        else:
            print(f"Fix {name} layer", end='. ')
            param.requires_grad = False
fix_weights_except_fc(model)

Fix lstm.weight_ih_l0 layer. Fix lstm.weight_hh_l0 layer. Fix lstm.bias_ih_l0 layer. Fix lstm.bias_hh_l0 layer. Fix lstm.weight_ih_l1 layer. Fix lstm.weight_hh_l1 layer. Fix lstm.bias_ih_l1 layer. Fix lstm.bias_hh_l1 layer. Fix lstm.weight_ih_l2 layer. Fix lstm.weight_hh_l2 layer. Fix lstm.bias_ih_l2 layer. Fix lstm.bias_hh_l2 layer. Fix lstm.weight_ih_l3 layer. Fix lstm.weight_hh_l3 layer. Fix lstm.bias_ih_l3 layer. Fix lstm.bias_hh_l3 layer. Fix lstm.weight_ih_l4 layer. Fix lstm.weight_hh_l4 layer. Fix lstm.bias_ih_l4 layer. Fix lstm.bias_hh_l4 layer. 

# ====================================
# Below are just some useless tests
# ====================================

In [48]:
weights.__dict__

{'_metadata': OrderedDict([('', {'version': 1}),
              ('lstm', {'version': 1}),
              ('fc', {'version': 1})])}

In [47]:
for i, (key, val) in enumerate(weights.items()):
    print("-"*50 + "\n", i)
    print(key)
    print(val)

--------------------------------------------------
 0
lstm.weight_ih_l0
tensor([[ 0.0229, -1.1894,  0.3005,  ..., -1.2080, -0.2036, -0.5264],
        [ 0.2233,  0.7289,  0.8038,  ...,  0.1909, -0.1121,  0.1137],
        [-0.2408,  0.7406,  0.6178,  ...,  1.5159,  0.2505,  0.4425],
        ...,
        [-0.0213, -0.3206, -0.8492,  ..., -0.1701, -0.9032, -0.3206],
        [-0.3085, -0.5769,  0.4461,  ..., -0.5440, -0.5557,  0.3436],
        [ 0.1111,  0.0483,  0.3182,  ..., -0.4189,  0.5426,  1.1522]],
       device='cuda:0')
--------------------------------------------------
 1
lstm.weight_hh_l0
tensor([[-0.0963, -0.3485,  0.6226,  ..., -0.6704, -0.4524,  0.4905],
        [-1.0680,  1.3284,  0.4781,  ...,  0.1699, -0.7938, -0.1436],
        [ 0.1416, -0.3915,  0.7447,  ...,  0.9445, -1.0173,  0.9754],
        ...,
        [ 0.0808, -0.5832,  0.9872,  ..., -4.1253,  1.0131,  2.3520],
        [ 0.9701, -0.8446,  0.2547,  ..., -0.6916,  4.7517,  0.0098],
        [-0.8375, -0.2105, -1.6835,

tensor([[ 0.3887, -0.6564,  0.6058,  ...,  0.6053, -0.1579, -0.2254],
        [-0.5989, -0.6285,  0.1214,  ...,  0.5595, -0.0049, -0.1335],
        [-0.2501, -0.3673,  0.9601,  ..., -0.0523, -0.7829,  1.2004],
        ...,
        [ 0.1069, -0.3885, -0.4293,  ...,  0.4016, -0.4378, -0.2725],
        [ 1.1534,  0.9917, -0.0386,  ..., -0.0152, -0.8645,  0.0708],
        [ 0.6898,  0.9147, -0.0252,  ..., -0.8500, -0.0472,  0.4088]],
       device='cuda:0')
--------------------------------------------------
 17
lstm.weight_hh_l4
tensor([[ 0.9058, -0.5414, -0.3242,  ..., -0.6569, -0.6093, -0.1598],
        [-0.5461,  1.1679, -0.1704,  ...,  0.0041,  0.5566, -0.1176],
        [-0.0500,  0.4696, -0.5150,  ...,  0.6835, -0.5937, -0.3722],
        ...,
        [-0.2687,  1.3555, -0.3711,  ...,  0.2973,  0.2800,  1.5414],
        [-0.7413, -0.2379, -0.0416,  ..., -0.4020, -1.2143,  0.1360],
        [ 0.7380,  0.4861,  0.8859,  ...,  0.2879,  0.5197,  0.8912]],
       device='cuda:0')
-----------

In [26]:
model.__dict__

{'_backend': <torch.nn.backends.thnn.THNNFunctionBackend at 0x7fe2049b2c88>,
 '_parameters': OrderedDict(),
 '_buffers': OrderedDict(),
 '_backward_hooks': OrderedDict(),
 '_forward_hooks': OrderedDict(),
 '_forward_pre_hooks': OrderedDict(),
 '_state_dict_hooks': OrderedDict(),
 '_load_state_dict_pre_hooks': OrderedDict(),
 '_modules': OrderedDict([('lstm',
               LSTM(12, 64, num_layers=5, batch_first=True)),
              ('fc', Linear(in_features=64, out_features=35, bias=True))]),
 'training': True,
 'hidden_size': 64,
 'num_layers': 5,
 'device': device(type='cuda')}

In [35]:
for i, param in enumerate(model.parameters()):
    print("-"*50 + "\n", i)
    print(param)

--------------------------------------------------
 0
Parameter containing:
tensor([[ 0.0229, -1.1894,  0.3005,  ..., -1.2080, -0.2036, -0.5264],
        [ 0.2233,  0.7289,  0.8038,  ...,  0.1909, -0.1121,  0.1137],
        [-0.2408,  0.7406,  0.6178,  ...,  1.5159,  0.2505,  0.4425],
        ...,
        [-0.0213, -0.3206, -0.8492,  ..., -0.1701, -0.9032, -0.3206],
        [-0.3085, -0.5769,  0.4461,  ..., -0.5440, -0.5557,  0.3436],
        [ 0.1111,  0.0483,  0.3182,  ..., -0.4189,  0.5426,  1.1522]],
       device='cuda:0', requires_grad=True)
--------------------------------------------------
 1
Parameter containing:
tensor([[-0.0963, -0.3485,  0.6226,  ..., -0.6704, -0.4524,  0.4905],
        [-1.0680,  1.3284,  0.4781,  ...,  0.1699, -0.7938, -0.1436],
        [ 0.1416, -0.3915,  0.7447,  ...,  0.9445, -1.0173,  0.9754],
        ...,
        [ 0.0808, -0.5832,  0.9872,  ..., -4.1253,  1.0131,  2.3520],
        [ 0.9701, -0.8446,  0.2547,  ..., -0.6916,  4.7517,  0.0098],
       

Parameter containing:
tensor([-1.4036e+00, -8.0730e-01, -8.9033e-01, -8.4927e-01, -7.9848e-01,
        -8.9178e-01, -6.8741e-01, -3.9165e-02, -9.1357e-01, -6.5255e-01,
        -1.0518e+00, -5.4136e-01, -1.1358e+00, -9.3001e-01, -1.3138e+00,
        -1.1759e-01, -7.7650e-01, -4.6344e-01, -1.0076e+00, -7.5908e-01,
        -1.1624e+00, -7.2642e-01, -1.0133e+00, -1.0226e+00, -1.0103e+00,
        -1.1126e+00, -3.5276e-01, -1.4306e+00, -6.3949e-01, -5.8352e-01,
        -7.4974e-01, -8.0107e-01, -6.9581e-01, -3.8654e-01, -1.3534e+00,
        -8.0288e-01, -9.2721e-01, -1.4602e+00, -9.7990e-01, -1.0431e+00,
        -1.3470e+00, -1.2189e+00, -6.3063e-01, -7.6913e-01, -5.7102e-01,
        -1.0410e+00, -5.3561e-01, -6.5471e-01, -3.8621e-01, -1.0440e+00,
        -6.6196e-01, -8.0437e-01, -7.8597e-01, -9.6304e-01, -1.0199e+00,
        -4.6034e-01, -7.4373e-01, -4.1180e-01, -8.2998e-01, -6.8159e-01,
        -3.7736e-01, -8.3226e-01, -7.1511e-01, -7.1517e-01,  1.7404e-01,
         3.3879e-01,  5.3672e

In [36]:
for i, (name, param) in enumerate(model.state_dict().items()):
    print("-"*50 + "\n", i)
    print(name)
    print(param)

--------------------------------------------------
 0
lstm.weight_ih_l0
tensor([[ 0.0229, -1.1894,  0.3005,  ..., -1.2080, -0.2036, -0.5264],
        [ 0.2233,  0.7289,  0.8038,  ...,  0.1909, -0.1121,  0.1137],
        [-0.2408,  0.7406,  0.6178,  ...,  1.5159,  0.2505,  0.4425],
        ...,
        [-0.0213, -0.3206, -0.8492,  ..., -0.1701, -0.9032, -0.3206],
        [-0.3085, -0.5769,  0.4461,  ..., -0.5440, -0.5557,  0.3436],
        [ 0.1111,  0.0483,  0.3182,  ..., -0.4189,  0.5426,  1.1522]],
       device='cuda:0')
--------------------------------------------------
 1
lstm.weight_hh_l0
tensor([[-0.0963, -0.3485,  0.6226,  ..., -0.6704, -0.4524,  0.4905],
        [-1.0680,  1.3284,  0.4781,  ...,  0.1699, -0.7938, -0.1436],
        [ 0.1416, -0.3915,  0.7447,  ...,  0.9445, -1.0173,  0.9754],
        ...,
        [ 0.0808, -0.5832,  0.9872,  ..., -4.1253,  1.0131,  2.3520],
        [ 0.9701, -0.8446,  0.2547,  ..., -0.6916,  4.7517,  0.0098],
        [-0.8375, -0.2105, -1.6835,

tensor([-1.1732e+00, -8.2551e-01, -8.8102e-01, -9.2034e-01, -7.6208e-01,
        -7.7599e-01, -8.4967e-01, -1.0730e-01, -8.8052e-01, -5.5725e-01,
        -8.8728e-01, -4.4731e-01, -1.1396e+00, -9.1379e-01, -1.2458e+00,
         1.6156e-02, -8.5868e-01, -4.5690e-01, -8.3813e-01, -7.0821e-01,
        -1.0714e+00, -6.6743e-01, -9.0860e-01, -8.1299e-01, -1.0540e+00,
        -1.2694e+00, -4.8467e-01, -1.3121e+00, -6.8701e-01, -6.3823e-01,
        -6.0205e-01, -8.0362e-01, -5.7186e-01, -3.0955e-01, -1.4456e+00,
        -8.9129e-01, -9.7123e-01, -1.5411e+00, -9.9499e-01, -1.1152e+00,
        -1.4705e+00, -1.2352e+00, -5.5130e-01, -5.8703e-01, -4.9930e-01,
        -1.1018e+00, -4.0352e-01, -6.1960e-01, -3.2275e-01, -1.0361e+00,
        -4.8369e-01, -8.8504e-01, -6.8344e-01, -8.0760e-01, -1.1218e+00,
        -5.5243e-01, -7.6086e-01, -4.0259e-01, -8.8261e-01, -7.2731e-01,
        -4.0182e-01, -1.0411e+00, -8.5853e-01, -6.3110e-01,  2.2445e-01,
         1.7492e-01,  5.7675e-01, -1.5448e-01, -2.1

In [29]:
for child in model.children():
    print(child)

LSTM(12, 64, num_layers=5, batch_first=True)
Linear(in_features=64, out_features=35, bias=True)


In [30]:
for module in model.modules():
    print(module)

RNN(
  (lstm): LSTM(12, 64, num_layers=5, batch_first=True)
  (fc): Linear(in_features=64, out_features=35, bias=True)
)
LSTM(12, 64, num_layers=5, batch_first=True)
Linear(in_features=64, out_features=35, bias=True)
