# Copyright Netherlands eScience Center <br>
** Function     : Initialize BayesConvLSTM with known ConvLSTM weight** <br>
** Author       : Yang Liu ** <br>
** First Built  : 2020.03.27 ** <br>
** Last Update  : 2020.03.27 ** <br>
** Library      : Pytorth, Numpy, NetCDF4, os, iris, cartopy, dlacs, matplotlib **<br>
Description     : This notebook serves to test the initialization of BayesConvLSTM with ConvLSTM weight. <br>
<br>

Return Values   : pkl model <br>

In [2]:
%matplotlib inline

import sys
import warnings
import numbers

# for data loading
import os
from netCDF4 import Dataset
# for pre-processing and machine learning
import numpy as np
import sklearn
#import scipy
import torch
import torch.nn.functional

#sys.path.append(os.path.join('C:','Users','nosta','ML4Climate','Scripts','DLACs'))
sys.path.append("C:\\Users\\nosta\\ML4Climate\\Scripts\\DLACs")
sys.path.append("C:\\Users\\nosta\\ML4Climate\\Scripts\\DeepClim")
import dlacs
import dlacs.BayesConvLSTM
import dlacs.preprocess
import dlacs.function
import dlacs.saveNetCDF
# legacy
import deepclim
import deepclim.deepArray

# for visualization
import dlacs.visual
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import iris # also helps with regriding
import cartopy
import cartopy.crs as ccrs

# ignore all the DeprecationWarnings by pytorch
if not sys.warnoptions:
    warnings.simplefilter("ignore")

The testing device is Dell Inspirion 5680 with Intel Core i7-8700 x64 CPU and Nvidia GTX 1060 6GB GPU.<br>
Here is a benchmark about cpu v.s. gtx 1060 <br>
https://www.analyticsindiamag.com/deep-learning-tensorflow-benchmark-intel-i5-4210u-vs-geforce-nvidia-1060-6gb/

In [3]:
################################################################################# 
#########                           modelpath                             ########
#################################################################################
# path of initialization weight
init_path = 'C:\\Users\\nosta\\ML4Climate\\PredictArctic\\Maps'
# path of output
output_path = 'C:\\Users\\nosta\\ML4Climate\\PredictArctic\\BayesMaps'

In [4]:
if __name__=="__main__":
    print ('*******************  create basic dimensions for network  *********************')
    # specifications of neural network
    input_channels = 3
    hidden_channels = [3, 2, 1] # number of channels & hidden layers, the channels of last layer is the channels of output, too
    #hidden_channels = [3, 3, 3, 3, 2]
    #hidden_channels = [2]
    kernel_size = 3
    batch_size = 1
    #num_layers = 1
    learning_rate = 0.01
    num_epochs = 1500

*******************  create basic dimensions for network  *********************


In [10]:
    print ('*******************  load exsited LSTM model  *********************')
    # load model parameters
    #model = dlacs.BayesConvLSTM.BayesConvLSTM(input_channels, hidden_channels, kernel_size).to(device)
    #model.load_state_dict(torch.load(os.path.join(output_path, 'map_BayesConvLSTM_sic_ohc_Barents_hl_3_kernel_3_lr_0.01_epoch_1500_validSIC.pkl'),
    #                                 map_location=device))
    # load entire model
    model_init = torch.load(os.path.join(init_path, 'convlstm_era_sic_oras_ohc_Barents_hl_3_kernel_3_lr_0.005_epoch_1500_validSIC.pkl'))
    print(model_init)

*******************  load exsited LSTM model  *********************
ConvLSTM(
  (cell0): ConvLSTMCell(
    (Wxi): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Whi): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (Wxf): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Whf): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (Wxc): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Whc): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (Wxo): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Who): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (cell1): ConvLSTMCell(
    (Wxi): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Whi): Conv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (Wxf): Conv2d(3, 2, kernel_size=(3, 3), stride=(1, 1),

In [12]:
    print ('*******************  extract the weight from ConvLSTM  *********************')
    weight_model_init = {}
    for name, param in model_init.named_parameters():
        weight_model_init['{}'.format(name)] = param.data
        
    print ('*******************  check the weight from ConvLSTM  *********************')
    for i in weight_model_init:
        print('{}'.format(i))
        print('{}'.format(weight_model_init['{}'.format(i)]))
        print ("=========================")

*******************  extract the weight from ConvLSTM  *********************
*******************  check the weight from ConvLSTM  *********************
cell0.Wxi.weight
tensor([[[[-0.1253, -0.5814, -0.4379],
          [-0.2001, -0.2750, -0.1768],
          [-0.3347, -0.4417, -0.2793]],

         [[ 0.2093,  0.1484,  0.1220],
          [ 0.1573,  0.0237,  0.0926],
          [-0.0204,  0.0380,  0.2156]],

         [[-0.0252,  0.1174,  0.0601],
          [ 0.0353, -0.0247, -0.1563],
          [ 0.0043, -0.1410,  0.0755]]],


        [[[-0.3883,  0.1526,  0.9933],
          [ 0.0629,  0.8945,  0.7987],
          [-0.0420,  1.1681,  0.1278]],

         [[ 0.2115, -0.0215,  0.1117],
          [ 0.0290, -0.0173,  0.1298],
          [-0.1255,  0.0223, -0.0725]],

         [[ 0.0404,  0.0021,  0.0034],
          [ 0.2141,  0.0219, -0.0687],
          [-0.0953, -0.0275, -0.0201]]],


        [[[ 0.6254,  0.4591,  0.7186],
          [ 0.4563,  0.2877,  0.3925],
          [ 0.3539,  0.5635,  0.630

          [-0.0669,  0.2698,  0.1323]]]], device='cuda:0')
cell0.Wxo.bias
tensor([0.1341, 0.0072, 0.2107], device='cuda:0')
cell0.Who.weight
tensor([[[[ 4.2960e-02, -1.8782e-02,  3.4331e-02],
          [-3.7617e-01, -2.6454e-01, -3.5489e-01],
          [-2.6716e-01,  2.9074e-02, -1.0423e-01]],

         [[-4.6629e-01, -4.4109e-01,  1.7605e-01],
          [-4.1722e-01, -3.2188e-01, -2.3315e-01],
          [-3.6009e-01,  2.8799e-01,  5.8393e-01]],

         [[-1.3392e+00,  2.5167e-01,  6.6726e-01],
          [-8.2628e-01,  1.4399e-01, -6.3494e-01],
          [-1.6277e-01, -4.5857e-01,  6.9736e-01]]],


        [[[-2.0841e-02, -1.8111e-01, -2.4765e-02],
          [ 1.4310e-02, -9.9407e-02, -2.0886e-01],
          [-1.8029e-01, -2.0063e-01, -2.1434e-01]],

         [[-3.2561e-02, -4.7475e-02, -1.6084e-01],
          [-1.1912e-01, -4.5133e-02, -8.2083e-02],
          [-2.4669e-01, -3.4841e-02, -2.6680e-01]],

         [[-2.4766e-01,  1.8113e-01,  3.4192e-02],
          [-3.7504e-01, -2.2193

          [-0.2933,  0.1623,  0.3783]]]], device='cuda:0')
cell2.Wxc.weight
tensor([[[[-0.0833, -0.2570,  0.1175],
          [-0.5897, -0.3605, -0.2642],
          [ 0.0161,  0.1338, -0.0336]],

         [[-0.0422, -0.1120,  0.0646],
          [ 0.1847,  0.9165, -0.1244],
          [-0.0860, -0.0492, -0.1583]]]], device='cuda:0')
cell2.Wxc.bias
tensor([0.0313], device='cuda:0')
cell2.Whc.weight
tensor([[[[-0.0504, -0.0648, -0.1043],
          [-0.0942, -0.1594, -0.0390],
          [ 0.0510, -0.0974, -0.0302]]]], device='cuda:0')
cell2.Wxo.weight
tensor([[[[-0.7113, -0.3272, -0.2529],
          [-0.9133, -0.6301, -0.4213],
          [ 0.0056,  0.2432, -0.0953]],

         [[-0.2234, -0.0975,  0.0768],
          [ 0.2590,  0.9676, -0.1638],
          [ 0.0367,  0.2603, -0.0561]]]], device='cuda:0')
cell2.Wxo.bias
tensor([-0.0644], device='cuda:0')
cell2.Who.weight
tensor([[[[-0.3425,  0.1325, -0.1561],
          [ 0.3030,  0.1554,  0.3414],
          [ 0.4759, -0.1258, -0.3464]]]], devic

In [5]:
    print ('*******************  check the environment  *********************')
    print ("Pytorch version {}".format(torch.__version__))
    # check if CUDA is available
    use_cuda = torch.cuda.is_available()
    print("Is CUDA available? {}".format(use_cuda))
    # CUDA settings torch.__version__ must > 0.4
    # !!! This is important for the model!!! The first option is gpu
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

*******************  check the environment  *********************
Pytorch version 1.1.0
Is CUDA available? True


In [13]:
    print ('*******************  initialize BayesConvLSTM with given weight  *********************')
    # initialize our model
    model = dlacs.BayesConvLSTM.BayesConvLSTM(input_channels, hidden_channels, kernel_size,
                                              cell_type='reduced', weight_dict = weight_model_init).to(device)

*******************  initialize BayesConvLSTM with given weight  *********************
!@#$% The network will be built with reduced size BayesConvLSTM cell. !@#$%


In [14]:
    print ('*******************  check wieght matrix of BayesConvLSTM  *********************')
    for name, param in model.named_parameters():
        if param.requires_grad:
            print (name)
            print (param.data)
            print (param.size())
            print ("=========================")

*******************  check wieght matrix of BayesConvLSTM  *********************
cell0.Wxi_mu
tensor([[[[-0.1253, -0.5814, -0.4379],
          [-0.2001, -0.2750, -0.1768],
          [-0.3347, -0.4417, -0.2793]],

         [[ 0.2093,  0.1484,  0.1220],
          [ 0.1573,  0.0237,  0.0926],
          [-0.0204,  0.0380,  0.2156]],

         [[-0.0252,  0.1174,  0.0601],
          [ 0.0353, -0.0247, -0.1563],
          [ 0.0043, -0.1410,  0.0755]]],


        [[[-0.3883,  0.1526,  0.9933],
          [ 0.0629,  0.8945,  0.7987],
          [-0.0420,  1.1681,  0.1278]],

         [[ 0.2115, -0.0215,  0.1117],
          [ 0.0290, -0.0173,  0.1298],
          [-0.1255,  0.0223, -0.0725]],

         [[ 0.0404,  0.0021,  0.0034],
          [ 0.2141,  0.0219, -0.0687],
          [-0.0953, -0.0275, -0.0201]]],


        [[[ 0.6254,  0.4591,  0.7186],
          [ 0.4563,  0.2877,  0.3925],
          [ 0.3539,  0.5635,  0.6305]],

         [[ 0.3182,  0.1757,  0.2821],
          [ 0.1969,  0.2908,  

          [-0.0669,  0.2698,  0.1323]]]], device='cuda:0')
torch.Size([3, 3, 3, 3])
cell0.Who_mu
tensor([[[[ 4.2960e-02, -1.8782e-02,  3.4331e-02],
          [-3.7617e-01, -2.6454e-01, -3.5489e-01],
          [-2.6716e-01,  2.9074e-02, -1.0423e-01]],

         [[-4.6629e-01, -4.4109e-01,  1.7605e-01],
          [-4.1722e-01, -3.2188e-01, -2.3315e-01],
          [-3.6009e-01,  2.8799e-01,  5.8393e-01]],

         [[-1.3392e+00,  2.5167e-01,  6.6726e-01],
          [-8.2628e-01,  1.4399e-01, -6.3494e-01],
          [-1.6277e-01, -4.5857e-01,  6.9736e-01]]],


        [[[-2.0841e-02, -1.8111e-01, -2.4765e-02],
          [ 1.4310e-02, -9.9407e-02, -2.0886e-01],
          [-1.8029e-01, -2.0063e-01, -2.1434e-01]],

         [[-3.2561e-02, -4.7475e-02, -1.6084e-01],
          [-1.1912e-01, -4.5133e-02, -8.2083e-02],
          [-2.4669e-01, -3.4841e-02, -2.6680e-01]],

         [[-2.4766e-01,  1.8113e-01,  3.4192e-02],
          [-3.7504e-01, -2.2193e-01, -1.3561e-01],
          [-2.7016e-01, 

torch.Size([2, 2, 3, 3])
cell1.Wxi_bias
tensor([0.1796, 0.3558], device='cuda:0')
torch.Size([2])
cell1.Wxf_bias
tensor([-0.1626, -0.0935], device='cuda:0')
torch.Size([2])
cell1.Wxc_bias
tensor([-0.1765,  0.1234], device='cuda:0')
torch.Size([2])
cell1.Wxo_bias
tensor([ 0.3058, -0.0363], device='cuda:0')
torch.Size([2])
cell1.Wxi_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Whi_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Wxf_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Whf_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Wxc_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Whc_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Wxo_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Who_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell2.Wxi_mu
tensor([[[[-0.6423, -0.3906, -0.1379],
          [-1.1632, -0.6641, -0.7773],
          

In [7]:
    print ('*******************  initialize BayesConvLSTM without given weight  *********************')
    # initialize our model
    model = dlacs.BayesConvLSTM.BayesConvLSTM(input_channels, hidden_channels, kernel_size,
                                              cell_type='reduced').to(device)

*******************  initialize BayesConvLSTM without given weight  *********************
!@#$% The network will be built with reduced size BayesConvLSTM cell. !@#$%


In [8]:
    print ('*******************  check wieght matrix of BayesConvLSTM  *********************')
    for name, param in model.named_parameters():
        if param.requires_grad:
            print (name)
            print (param.data)
            print (param.size())
            print ("=========================")

*******************  check wieght matrix of BayesConvLSTM  *********************
cell0.Wxi_mu
tensor([[[[-0.1158,  0.0129,  0.0481],
          [ 0.0608,  0.0715, -0.0874],
          [-0.1035, -0.1617, -0.0581]],

         [[ 0.0543, -0.0250,  0.1098],
          [-0.1377,  0.0978,  0.0605],
          [-0.0510, -0.1285, -0.0766]],

         [[-0.0895,  0.0918,  0.0301],
          [-0.0995, -0.1500,  0.0362],
          [ 0.0097, -0.1787, -0.1635]]],


        [[[-0.0810, -0.0577, -0.1446],
          [-0.0884,  0.0724, -0.0906],
          [-0.1643,  0.0764, -0.1874]],

         [[ 0.0952,  0.1273,  0.1606],
          [-0.1033, -0.1635, -0.0726],
          [-0.0171,  0.1902, -0.0164]],

         [[ 0.1215,  0.0172, -0.0715],
          [-0.0475,  0.1481,  0.0047],
          [-0.0927,  0.0123, -0.1857]]],


        [[[-0.0844,  0.0875, -0.1109],
          [ 0.0819, -0.0614,  0.1107],
          [ 0.0112, -0.1909,  0.1020]],

         [[-0.1142, -0.1211,  0.1153],
          [ 0.1138, -0.1216,  

tensor([[[[ 0.1462, -0.0596,  0.1304],
          [-0.1829, -0.0371,  0.0721],
          [ 0.1151, -0.0044, -0.0196]],

         [[ 0.1118,  0.1432,  0.1801],
          [-0.1617,  0.1520,  0.0354],
          [-0.0399, -0.0475, -0.1526]],

         [[-0.1864,  0.0102,  0.0997],
          [-0.1683,  0.0095,  0.1147],
          [-0.0404,  0.0273,  0.1837]]],


        [[[-0.0359, -0.1813, -0.1639],
          [ 0.0576,  0.0400, -0.0090],
          [ 0.0822, -0.1179, -0.0170]],

         [[ 0.1498,  0.0260, -0.1795],
          [-0.1244, -0.0594, -0.0917],
          [-0.1016,  0.1586, -0.1525]],

         [[-0.1539, -0.1305, -0.1040],
          [-0.1914, -0.0598, -0.1557],
          [ 0.0599,  0.1333, -0.1027]]],


        [[[-0.1562,  0.1048,  0.1839],
          [-0.0204,  0.0091, -0.1186],
          [-0.0302, -0.0018,  0.1656]],

         [[-0.1892, -0.0673,  0.1475],
          [ 0.1444,  0.0676,  0.0438],
          [ 0.0177,  0.1792,  0.0814]],

         [[-0.0884,  0.1047, -0.1301],
     

cell1.Wxi_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Whi_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Wxf_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Whf_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Wxc_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Whc_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Wxo_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell1.Who_log_alpha
tensor([[-3.]], device='cuda:0')
torch.Size([1, 1])
cell2.Wxi_mu
tensor([[[[-0.2281, -0.0217,  0.1464],
          [ 0.1174, -0.2113, -0.0480],
          [ 0.0385,  0.1023, -0.1963]],

         [[-0.1539,  0.0242, -0.1635],
          [-0.1631, -0.0034,  0.1180],
          [ 0.2111, -0.1483, -0.2328]]]], device='cuda:0')
torch.Size([1, 2, 3, 3])
cell2.Whi_mu
tensor([[[[ 0.1964, -0.0346, -0.0650],
          [-0.1397, -0.1415, -0.0377],
          [-0.0421,  0.1149,  0.1946]]]