In [1]:
import os
import h5py
import pathlib
import logging
import numpy as np
import pandas as pd

import tensorflow
if int(tensorflow.__version__.split(".")[0]) >= 2:
    import tensorflow.compat.v1 as tf
else:
    import tensorflow as tf

from datetime import datetime
import multiprocessing
import argparse
import json
import pathlib
# from tqdm import tqdm
import sys
# sys.path.append(pathlib.Path(__file__).parent.absolute())
from commons import input_iterator

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
from commons import cnn_bi_lstm_model

os.chdir('/home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021_pytorch/')
import pickle
from torch import nn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
parser = argparse.ArgumentParser(description='Argument parser for generating model predictions.')
optional_arguments = parser._action_groups.pop()
required_arguments = parser.add_argument_group('required arguments')
required_arguments.add_argument('--pre-processed-dir', help='Pre-processed data directory', default='/home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021_pytorch/preprocessed_new', required=False)

optional_arguments.add_argument('--model', help='Pre-trained prediction model name (default: CHAP_ALL_ADULTS)', default='CHAP_ALL_ADULTS',
    required=False, choices=['CHAP_A', 'CHAP_B', 'CHAP_C', 'CHAP', 'CHAP_ALL_ADULTS', 'CHAP_CHILDREN', 'CHAP_AUSDIAB'])
optional_arguments.add_argument('--predictions-dir', help='Predictions output directory (default: ./predictions)', default='./predictions', required=False) 
optional_arguments.add_argument('--no-segment', help='Do not output segment number', default=False, required=False, action='store_true')
optional_arguments.add_argument('--output-label', help='Whether to output the actual label', default=False, required=False, action='store_true')

optional_arguments.add_argument('--model-checkpoint-path', help='Path where the custom trained model checkpoint is located', default=None, required=False)
optional_arguments.add_argument('--cnn-window-size', help='CNN window size of the model in seconds on which the predictions to be made (default: 10).', default=10, type=int, required=False)
optional_arguments.add_argument('--bi-lstm-window-size', help='BiLSTM window size in minutes (default: 7).', default=None, required=False, type=int)
optional_arguments.add_argument('--down-sample-frequency', help='Downsample frequency in Hz for GT3X data (default: 10).', default=10, type=int, required=False)   
optional_arguments.add_argument('--gt3x-frequency', help='GT3X device frequency in Hz (default: 30)', default=30, type=int, required=False)
optional_arguments.add_argument('--activpal-label-map', help='ActivPal label vocabulary (default: {"sitting": 0, "not-sitting": 1, "no-label": -1})', default='{"sitting": 0, "not-sitting": 1, "no-label": -1}', required=False)
optional_arguments.add_argument('--silent', help='Whether to hide info messages', default=False, required=False, action='store_true')
optional_arguments.add_argument(
    '--padding',
    type=str,
    help='Padding scheme for the last part of data that does not fill a whole lstm window (default: %(default)s)',
    default='drop',
    choices=('drop', 'zero', 'wrap')
)
parser._action_groups.append(optional_arguments)
args = parser.parse_known_args()
args = args[0]

if not os.path.exists(args.predictions_dir):
    os.makedirs(args.predictions_dir)

label_map = json.loads(args.activpal_label_map)
label_map = {label_map[k]:k for k in label_map}

bi_lstm_window_sizes = {"CHAP_A": 9, "CHAP_B": 9, "CHAP_C": 7, "CHAP_ALL_ADULTS": 7, "CHAP_CHILDREN": 3, "CHAP_AUSDIAB": 9}
if args.bi_lstm_window_size is None:
    bi_lstm_window_sizes['CUSTOM_MODEL'] = 7
else:
    bi_lstm_window_sizes[args.model] = args.bi_lstm_window_size


    bi_lstm_window_sizes['CUSTOM_MODEL'] = args.bi_lstm_window_size

if args.model_checkpoint_path is not None:
    if not args.silent:
        print('Loading custom model from checkpoint path: {}'.format(args.model_checkpoint_path))
    args.model = 'CUSTOM_MODEL'
else:
    args.model_checkpoint_path =  '/home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021_pytorch/pre-trained-models'

pre_processed_data_dir = args.pre_processed_dir
output_dir=args.predictions_dir
model=args.model
segment=not args.no_segment
output_label=True

label_map=label_map
downsample_window=1.0/args.down_sample_frequency
cnn_window_size=args.cnn_window_size
gt3x_frequency=args.gt3x_frequency
model_ckpt_path=args.model_checkpoint_path
padding=args.padding

In [3]:
model = model.strip()
if model not in ['CHAP', 'CHAP_A', 'CHAP_B', 'CHAP_C', 'CHAP_ALL_ADULTS', 'CHAP_CHILDREN', 'CHAP_AUSDIAB', 'CUSTOM_MODEL']:
    raise Exception('model should be one of: CHAP, CHAP_A, CHAP_B, CHAP_C, CHAP_ALL_ADULTS, CHAP_CHILDREN, CHAP_AUSDIAB or CUSTOM_MODEL')

subject_ids = [fname.split('.')[0] for fname in os.listdir(pre_processed_data_dir) if not fname.startswith('.')]


perform_ensemble = False
if model == 'CHAP':
    models = ['CHAP_A', 'CHAP_B', 'CHAP_C']
    perform_ensemble = True
else:
    models = [model]

print("models", models, model_ckpt_path)

for model in models:
    if not os.path.exists(os.path.join(output_dir, '{}'.format(model))):
        os.makedirs(os.path.join(output_dir, '{}'.format(model)))

    tf.reset_default_graph()
    p = max(1, multiprocessing.cpu_count()//2)
    sess = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=p, intra_op_parallelism_threads=p))
    tf.saved_model.loader.load(sess, ["serve"], "/home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021/model-checkpoint/CUSTOM_MODEL")


    for subject_id in subject_ids:
        if not args.silent:
            logger.info('Starting prediction generation for the subject {}'.format(subject_id))
        data = list(input_iterator(pre_processed_data_dir, subject_id))
        x, timestamps, labels = [d[0].reshape(-1, int(1/downsample_window * cnn_window_size),
                                        int(gt3x_frequency*downsample_window), 1) for d in data], [d[1] for d in data], [d[2] for d in data]
        fout = open(os.path.join(output_dir, "{}".format(model), "{}.csv".format(subject_id)), 'w')

        if segment:
            fout.write('segment,')
        fout.write('timestamp')
        if output_label:
            fout.write(',label')

        fout.write(',prediction\n')

        for n in range(len(x)):
            wanna_be = bi_lstm_window_sizes[model] * \
                        int(60*downsample_window)
            border = x[n].shape[0] % wanna_be
            wrapped = False
            zeroed = False
            if border != 0:
                if padding == "drop":
                    x[n] = x[n][:-border]
                    timestamps[n] = timestamps[n][:-border]
                    labels[n] = labels[n][:-border]
                    # deficit = wanna_be - border
                    # print("Dropped: {} sec".format((42 - deficit) * 10))


                else:
                    
                    deficit = wanna_be - border
                    increment = int(1 / downsample_window)
                    labels_padded = np.full(deficit, -1)

                    if padding == "zero":
                        x_padded = np.zeros(
                            [deficit] + list(x[n].shape[1:]))
                        timestamps_padded = np.full(
                            deficit,
                            timestamps[n][-1]
                        ) + np.array(
                            [increment * (i + 1) for i in range(deficit)])
                        x[n] = np.vstack((x[n], x_padded))
                        timestamps[n] = np.hstack(
                            (timestamps[n], timestamps_padded))
                        zeroed = True
                        
                    if padding == "wrap":
                        x_last_p1 = x[n][:-border]
                        x_last_p2 = x[n][-wanna_be:] 
                        x[n] = np.vstack((x_last_p1, x_last_p2))
                        wrapped = True
                    labels[n] = np.hstack((labels[n], labels_padded))

            y_pred = []
            for k in range(0, x[n].shape[0], bi_lstm_window_sizes[model] * int(60*downsample_window)):
                temp = x[n][k:k + bi_lstm_window_sizes[model] * int(60*downsample_window)]
                temp_output = sess.run('output:0', feed_dict={'input:0': temp})
                flattened_output = temp_output.flatten()
                print(np.array(temp_output).shape, np.array(flattened_output).shape, np.array(temp_output), np.array(flattened_output))
                y_pred.append(flattened_output)
                break
            y_pred = np.array(y_pred).flatten()
            
            if padding == "wrap" and wrapped:
                y_pred = np.hstack((y_pred[:-wanna_be], y_pred[-border:]))
            elif padding == "zero" and zeroed:
                y_pred = y_pred[:-deficit]


            for t, l, pred in zip(timestamps[n], labels[n], y_pred):
                formatstr = ""
                if segment:
                    formatstr += "{},{}"
                    values = [n, datetime.fromtimestamp(t).strftime("%Y-%m-%d %H:%M:%S")]
                else:
                    formatstr += "{}"
                    values = [datetime.fromtimestamp(t).strftime("%Y-%m-%d %H:%M:%S")]

                if output_label:
                    formatstr += ",{}"
                    values.append(label_map[int(l)])    

                formatstr += ",{}\n"
                values.append(label_map[int(pred)])

                fout.write(formatstr.format(*values))

        fout.close()
        if not args.silent:
            logger.info('Completed prediction generation for the subject {}'.format(subject_id))

if perform_ensemble:
    if not os.path.exists(os.path.join(output_dir, 'CHAP')):
        os.makedirs(os.path.join(output_dir, 'CHAP'))

    for subject_id in subject_ids:
        if not args.silent:
            logger.info('Starting enseble model prediction generation for the subject {}'.format(subject_id))

        df_1 = pd.read_csv(os.path.join(output_dir, "CHAP_A/{}.csv".format(subject_id)))
        df_2 = pd.read_csv(os.path.join(output_dir, "CHAP_B/{}.csv".format(subject_id)))
        df_3 = pd.read_csv(os.path.join(output_dir, "CHAP_C/{}.csv".format(subject_id)))

        modfied_dfs = []
        if segment:
            max_segment = max([df['segment'].max() for df in [df_1, df_2, df_3]])
            for seg in range(max_segment+1):
                v_1 = df_1[df_1['segment'] == seg].sort_values('timestamp')
                v_2 = df_2[df_2['segment'] == seg].sort_values('timestamp')
                v_3 = df_3[df_3['segment'] == seg].sort_values('timestamp')
            
                min_len = min(min(v_1.prediction.count(), v_2.prediction.count()), v_3.prediction.count())

                if min_len > 0:
                    v_1 = v_1[:min_len]
                    v_2 = v_2[:min_len]
                    v_3 = v_3[:min_len]

                    v = v_3.copy()
                    v['predictions_A'] = v_1.prediction.values.tolist()
                    v['predictions_B'] = v_2.prediction.values.tolist()
                    v['predictions_C'] = v_3.prediction.values.tolist()

                    v.prediction = v[['predictions_A', 'predictions_B', 'predictions_C']].mode(axis='columns')
                    modfied_dfs.append(v)
        else:
            min_len = min(min(df_1.prediction.count(), df_2.prediction.count()), df_3.prediction.count())

            v_1 = df_1[:min_len]
            v_2 = df_2[:min_len]
            v_3 = df_3[:min_len]

            v = v_3.copy()
            v['predictions_A'] = v_1.prediction.values.tolist()
            v['predictions_B'] = v_2.prediction.values.tolist()
            v['predictions_C'] = v_3.prediction.values.tolist()

            v.prediction = (v['predictions_A'] + v['predictions_B'] + v['predictions_C']) / 3
            v.prediction = v.prediction.map(lambda x: round(x))
            modfied_dfs.append(v)


        if len(modfied_dfs) > 0:
            user_df = pd.concat(modfied_dfs)
            user_df.to_csv(os.path.join(output_dir, "CHAP/{}.csv".format(subject_id)), index=False)
        
        if not args.silent:
            logger.info('Completed enseble model prediction generation for the subject {}'.format(subject_id))

models ['CHAP_ALL_ADULTS'] /home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021_pytorch/pre-trained-models
















2024-05-08 13:38:20.174871: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2024-05-08 13:38:20.204888: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2799925000 Hz
2024-05-08 13:38:20.205405: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5593d8d4af50 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2024-05-08 13:38:20.205441: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2024-05-08 13:38:20.205581: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.


2024-05-08 13:38:22.127208: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2024-05-08 13:38:22.127253: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (animeshkumar-msi): /proc/driver/nvidia/version does not exist
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.


INFO:tensorflow:Restoring parameters from /home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021/model-checkpoint/CUSTOM_MODEL/variables/variables


INFO:tensorflow:Restoring parameters from /home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021/model-checkpoint/CUSTOM_MODEL/variables/variables
INFO:__main__:Starting prediction generation for the subject 202838


(42,) (42,) [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1] [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1]
(42,) (42,) [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1] [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1]
(42,) (42,) [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1] [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1]
(42,) (42,) [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1] [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1]
(42,) (42,) [1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1] [1 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1]
(42,) (42,) [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

INFO:__main__:Completed prediction generation for the subject 202838


(42,) (42,) [0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 0 0] [0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 0 0]


In [18]:
from commons import Conv2dSame, CNNBiLSTMModel
import torch
amp_factor = 2

input_shape = (672, 100, 3, 1)

# Create the constant tensor
random_array = np.random.rand(672, 100, 3, 1)
x = tf.convert_to_tensor(random_array, dtype=tf.float32)

conv1 = tf.layers.conv2d(x, filters=32*amp_factor, kernel_size=[5, 3],
                                 data_format='channels_last', padding= "same",
                                 strides=(2, 1),
                                 )

m = Conv2dSame(in_channels=1, out_channels=32*amp_factor, kernel_size=(5, 3), stride=(2, 1))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    output  = sess.run(conv1)
    output_transposed = np.transpose(output, (0, 3, 1, 2))

    vars = sess.graph.get_collection('trainable_variables')
    weights = {}
   
    for v in vars[-2:]:
        weights[v.name] = sess.run(v)

    # print(weights)

    with torch.no_grad():
        for (name, param), (tf_name, tf_param) in zip(m.named_parameters(), weights.items()):
            # convert NHWC to NCHW format and copy to change memory layout
            tf_param = np.transpose(tf_param, (3, 2, 0, 1)).copy() if len(tf_param.shape) == 4 else tf_param
            assert tf_param.shape == param.detach().numpy().shape, name

            # https://discuss.pytorch.org/t/how-to-assign-an-arbitrary-tensor-to-models-parameter/44082/3
            param.copy_(torch.tensor(tf_param, requires_grad=True, dtype=param.dtype))
    x_list = x.eval()
    x_np= np.transpose(x_list, (0, 3, 1, 2)).copy()
    x_pt = torch.tensor(x_np, dtype=torch.float32)
    output_pt = m(x_pt)
    output_pt_np = output_pt.detach().numpy()
    print(np.abs(output_transposed-output_pt_np).max())

    print(np.allclose(output_transposed, output_pt_np))


Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.


Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.


Instructions for updating:
Please use `layer.__call__` method instead.


Instructions for updating:
Please use `layer.__call__` method instead.








0.0
True


In [4]:
tf.reset_default_graph()
p = max(1, multiprocessing.cpu_count()//2)
sess = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=p, intra_op_parallelism_threads=p))
tf.saved_model.loader.load(sess, ["serve"], "/home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021/model-checkpoint/CUSTOM_MODEL")

INFO:tensorflow:Restoring parameters from /home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021/model-checkpoint/CUSTOM_MODEL/variables/variables


INFO:tensorflow:Restoring parameters from /home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021/model-checkpoint/CUSTOM_MODEL/variables/variables


meta_info_def {
  stripped_op_list {
    op {
      name: "Add"
      input_arg {
        name: "x"
        type_attr: "T"
      }
      input_arg {
        name: "y"
        type_attr: "T"
      }
      output_arg {
        name: "z"
        type_attr: "T"
      }
      attr {
        name: "T"
        type: "type"
        allowed_values {
          list {
            type: DT_BFLOAT16
            type: DT_HALF
            type: DT_FLOAT
            type: DT_DOUBLE
            type: DT_UINT8
            type: DT_INT8
            type: DT_INT16
            type: DT_INT32
            type: DT_INT64
            type: DT_COMPLEX64
            type: DT_COMPLEX128
            type: DT_STRING
          }
        }
      }
    }
    op {
      name: "AddN"
      input_arg {
        name: "inputs"
        type_attr: "T"
        number_attr: "N"
      }
      output_arg {
        name: "sum"
        type_attr: "T"
      }
      attr {
        name: "N"
        type: "int"
        has_minimum: t

In [1]:
tf.reset_default_graph()
p = max(1, multiprocessing.cpu_count()//2)
sess = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=p, intra_op_parallelism_threads=p))
# tf.saved_model.loader.load(sess, ["serve"], os.path.join(model_ckpt_path, "{}".format(model)))
tf.saved_model.loader.load(sess, ["serve"], "/home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021_pytorch/pre-trained-models/CHAP_A")

NameError: name 'tf' is not defined

In [10]:
vars = sess.graph.get_collection('trainable_variables')
weights = {}
for v in vars:
    weights[v.name] = sess.run(v)  # retrieve the value from the tf backend
weights_list = [(k, v) for k, v in weights.items()]
with open('/home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021_pytorch/model_weights/chapA_weights_tuple.pickle', 'wb') as handle:
    pickle.dump(weights_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [5]:

# file_path = "/home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021_pytorch/chap_all_adults_weights/weights.pickle"

# # Open the pickle file for reading in binary mode
# with open(file_path, 'rb') as f:
#     # Load the data from the pickle file
#     weights_dict = pickle.load(f)


In [5]:
file_path = "/home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021_pytorch/model_weights/custom_weights_tuple.pickle"

# Open the pickle file for reading in binary mode
with open(file_path, 'rb') as f:
    # Load the data from the pickle file
    custom_weights_tuple = pickle.load(f)


In [8]:
for ele in custom_weights_tuple:
    print(ele[0], ele[1].shape)

model/conv2d/kernel:0 (5, 3, 1, 64)
model/conv2d/bias:0 (64,)
model/conv2d_1/kernel:0 (5, 1, 64, 128)
model/conv2d_1/bias:0 (128,)
model/conv2d_2/kernel:0 (5, 1, 128, 256)
model/conv2d_2/bias:0 (256,)
model/conv2d_3/kernel:0 (5, 1, 256, 512)
model/conv2d_3/bias:0 (512,)
model/conv2d_4/kernel:0 (5, 1, 512, 512)
model/conv2d_4/bias:0 (512,)
model/dense/kernel:0 (6144, 512)
model/dense/bias:0 (512,)
bidirectional_rnn/fw/lstm_cell/kernel:0 (640, 512)
bidirectional_rnn/fw/lstm_cell/bias:0 (512,)
bidirectional_rnn/bw/lstm_cell/kernel:0 (640, 512)
bidirectional_rnn/bw/lstm_cell/bias:0 (512,)
dense/kernel:0 (256, 2)
dense/bias:0 (2,)


In [16]:
file_path = "/home/animeshkumar/workspace/adalab/workspace/DeepPostures/MSSE-2021_pytorch/model_weights/chapA_weights_tuple.pickle"

# Open the pickle file for reading in binary mode
with open(file_path, 'rb') as f:
    # Load the data from the pickle file
    custom_weights_tuple = pickle.load(f)

for ele in custom_weights_tuple:
    print(ele[0], ele[1].shape)


model/conv2d/kernel:0 (5, 3, 1, 64)
model/conv2d/bias:0 (64,)
model/conv2d_1/kernel:0 (5, 1, 64, 128)
model/conv2d_1/bias:0 (128,)
model/conv2d_2/kernel:0 (5, 1, 128, 256)
model/conv2d_2/bias:0 (256,)
model/conv2d_3/kernel:0 (5, 1, 256, 512)
model/conv2d_3/bias:0 (512,)
model/conv2d_4/kernel:0 (5, 1, 512, 512)
model/conv2d_4/bias:0 (512,)
model/dense/kernel:0 (6144, 512)
model/dense/bias:0 (512,)
bidirectional_rnn/fw/lstm_cell/kernel:0 (640, 512)
bidirectional_rnn/fw/lstm_cell/bias:0 (512,)
bidirectional_rnn/bw/lstm_cell/kernel:0 (640, 512)
bidirectional_rnn/bw/lstm_cell/bias:0 (512,)
dense/kernel:0 (256, 1)
dense/bias:0 (1,)


In [20]:
amp_factor =2
bi_lstm_window_size = bi_lstm_window_sizes[args.model]
num_classes = 2

In [21]:
model = CNNBiLSTMModel(amp_factor=2, bi_lstm_win_size=bi_lstm_window_size, num_classes=1)

In [22]:
for name, param in list(model.named_parameters()):
    print(name, param.shape)

cnn_model.conv1.weight torch.Size([64, 1, 5, 3])
cnn_model.conv1.bias torch.Size([64])
cnn_model.conv2.weight torch.Size([128, 64, 5, 1])
cnn_model.conv2.bias torch.Size([128])
cnn_model.conv3.weight torch.Size([256, 128, 5, 1])
cnn_model.conv3.bias torch.Size([256])
cnn_model.conv4.weight torch.Size([512, 256, 5, 1])
cnn_model.conv4.bias torch.Size([512])
cnn_model.conv5.weight torch.Size([512, 512, 5, 1])
cnn_model.conv5.bias torch.Size([512])
cnn_model.fc.weight torch.Size([512, 6144])
cnn_model.fc.bias torch.Size([512])
bil_lstm.weight_ih_l0 torch.Size([512, 512])
bil_lstm.weight_hh_l0 torch.Size([512, 128])
bil_lstm.bias_ih_l0 torch.Size([512])
bil_lstm.bias_hh_l0 torch.Size([512])
bil_lstm.weight_ih_l0_reverse torch.Size([512, 512])
bil_lstm.weight_hh_l0_reverse torch.Size([512, 128])
bil_lstm.bias_ih_l0_reverse torch.Size([512])
bil_lstm.bias_hh_l0_reverse torch.Size([512])
fc_bilstm.weight torch.Size([1, 256])
fc_bilstm.bias torch.Size([1])


In [8]:
list(model.named_parameters())

[('cnn_model.conv1.weight',
  Parameter containing:
  tensor([[[[ 0.1834,  0.1202, -0.0359],
            [-0.1185, -0.2338, -0.0799],
            [-0.0221, -0.0429, -0.0407],
            [-0.2148,  0.1968, -0.1905],
            [ 0.2508, -0.1486,  0.1733]]],
  
  
          [[[ 0.0891,  0.0498,  0.1014],
            [-0.0633, -0.1351,  0.0027],
            [ 0.0040,  0.0346, -0.1437],
            [ 0.1395,  0.2408, -0.1495],
            [-0.0864,  0.1562, -0.0005]]],
  
  
          [[[-0.0657,  0.0646, -0.2312],
            [-0.1596,  0.1184, -0.1232],
            [-0.2175, -0.1855, -0.2000],
            [ 0.2366, -0.1643,  0.0857],
            [-0.1408, -0.0918, -0.1313]]],
  
  
          [[[ 0.1094,  0.0286,  0.1070],
            [ 0.2302,  0.2120, -0.2308],
            [ 0.0358,  0.1371,  0.0580],
            [-0.1692,  0.2413,  0.0053],
            [-0.0863,  0.1455, -0.2055]]],
  
  
          [[[ 0.1281, -0.1892, -0.2160],
            [ 0.0779,  0.1240, -0.1077],
            [-

In [9]:
for k, v in weights_dict.items():
    print(k, v.shape)

model/conv2d/kernel:0 (5, 3, 1, 64)
model/conv2d/bias:0 (64,)
model/conv2d_1/kernel:0 (5, 1, 64, 128)
model/conv2d_1/bias:0 (128,)
model/conv2d_2/kernel:0 (5, 1, 128, 256)
model/conv2d_2/bias:0 (256,)
model/conv2d_3/kernel:0 (5, 1, 256, 512)
model/conv2d_3/bias:0 (512,)
model/conv2d_4/kernel:0 (5, 1, 512, 512)
model/conv2d_4/bias:0 (512,)
model/dense/kernel:0 (6144, 512)
model/dense/bias:0 (512,)
bidirectional_rnn/fw/lstm_cell/kernel:0 (640, 512)
bidirectional_rnn/fw/lstm_cell/bias:0 (512,)
bidirectional_rnn/bw/lstm_cell/kernel:0 (640, 512)
bidirectional_rnn/bw/lstm_cell/bias:0 (512,)
dense/kernel:0 (256, 1)
dense/bias:0 (1,)


In [10]:
# [input_depth + h_depth, 4 * self._num_units]
# 512+128, 4*128

In [11]:
print(model)

CNNBiLSTMModel(
  (cnn_model): CNNModel(
    (conv1): Conv2dSame(1, 64, kernel_size=(5, 3), stride=(2, 1))
    (conv2): Conv2dSame(64, 128, kernel_size=(5, 1), stride=(2, 1))
    (conv3): Conv2dSame(128, 256, kernel_size=(5, 1), stride=(2, 1))
    (conv4): Conv2dSame(256, 512, kernel_size=(5, 1), stride=(2, 1))
    (conv5): Conv2dSame(512, 512, kernel_size=(5, 1), stride=(2, 1))
    (fc): Linear(in_features=6144, out_features=512, bias=True)
  )
  (bil_lstm): LSTM(512, 128, batch_first=True, bidirectional=True)
  (fc_bilstm): Linear(in_features=256, out_features=2, bias=True)
)


In [12]:
para = list(model.named_parameters())
for p in para:
    print(p[0], p[1].shape)

cnn_model.conv1.weight torch.Size([64, 1, 5, 3])
cnn_model.conv1.bias torch.Size([64])
cnn_model.conv2.weight torch.Size([128, 64, 5, 1])
cnn_model.conv2.bias torch.Size([128])
cnn_model.conv3.weight torch.Size([256, 128, 5, 1])
cnn_model.conv3.bias torch.Size([256])
cnn_model.conv4.weight torch.Size([512, 256, 5, 1])
cnn_model.conv4.bias torch.Size([512])
cnn_model.conv5.weight torch.Size([512, 512, 5, 1])
cnn_model.conv5.bias torch.Size([512])
cnn_model.fc.weight torch.Size([512, 6144])
cnn_model.fc.bias torch.Size([512])
bil_lstm.weight_ih_l0 torch.Size([512, 512])
bil_lstm.weight_hh_l0 torch.Size([512, 128])
bil_lstm.bias_ih_l0 torch.Size([512])
bil_lstm.bias_hh_l0 torch.Size([512])
bil_lstm.weight_ih_l0_reverse torch.Size([512, 512])
bil_lstm.weight_hh_l0_reverse torch.Size([512, 128])
bil_lstm.bias_ih_l0_reverse torch.Size([512])
bil_lstm.bias_hh_l0_reverse torch.Size([512])
fc_bilstm.weight torch.Size([2, 256])
fc_bilstm.bias torch.Size([2])


In [31]:
weights_list = [(k, v) for k, v in weights_dict.items()]

In [47]:
def get_pytorch_lstm_weights_from_tensorflow(kernel, bias, INPUT_SIZE, HIDDEN_SIZE):
    def reorder_lstm_weights(w):
        return np.transpose(w)

    w_ih = nn.Parameter(reorder_lstm_weights(torch.tensor((kernel[:INPUT_SIZE]))))
    w_hh = nn.Parameter(reorder_lstm_weights(torch.tensor((kernel[INPUT_SIZE:]))))
    if bias is not None:
        alpha = 0.5
        b_ih = nn.Parameter(reorder_lstm_weights(torch.tensor((alpha * bias[:INPUT_SIZE]))))
        b_hh = nn.Parameter(reorder_lstm_weights(torch.tensor((1-alpha) * bias[:INPUT_SIZE])))
    else:
        b_ih = nn.Parameter(torch.zeros(4 * HIDDEN_SIZE))
        b_hh = nn.Parameter(torch.zeros(4 * HIDDEN_SIZE))

    return w_ih, w_hh, b_ih, b_hh

In [48]:
with torch.no_grad():
      pt_model_params = list(model.named_parameters())
      n_tf = 0
      n_pt = 0
      while n_tf<len(weights_list):
            pt_name, pt_param = pt_model_params[n_pt]
            tf_name, tf_param = weights_list[n_tf]
            # lstm weights for tensorflow are Kernel = [input_depth + h_depth, 4 * self._num_units] Bias = [4 * self._num_units]
            # lstm weights for Pytorch are Kernel Input = (4*hidden_size, input_size)
            #                              Kernel Hidden = (4*hidden_size, hidden_size)
            #                              Bias Input = (4*hidden_size)
            #                              Bias Hidden = (4*hidden_size)
            if "lstm" in tf_name:
                  kernel = tf_param
                  _, bias = weights_list[n_tf+1]
                  # from input kernel get the second dimension of shape
                  input_size = pt_model_params[n_pt][1].size()[1]
                   # from hidden kernel get the second dimension of shape
                  hidden_size = pt_model_params[n_pt+1][1].size()[1]
                  w_ih, w_hh, b_ih, b_hh = get_pytorch_lstm_weights_from_tensorflow(tf_param, bias, input_size, hidden_size)
                  print(w_ih.shape, w_hh.shape, b_ih.shape, b_hh.shape)
                  pt_model_params[n_pt][1].copy_(torch.tensor(w_ih, requires_grad=True, dtype=param.dtype))
                  pt_model_params[n_pt+1][1].copy_(torch.tensor(w_hh, requires_grad=True, dtype=param.dtype))
                  pt_model_params[n_pt+2][1].copy_(torch.tensor(b_ih, requires_grad=True, dtype=param.dtype))
                  pt_model_params[n_pt+3][1].copy_(torch.tensor(b_hh, requires_grad=True, dtype=param.dtype))
                  n_pt=n_pt+4
                  n_tf=n_tf+2
                  continue
            # conv weights are in order NHWC for TF and NCHW for PyTorch
            if "conv" in tf_name and len(tf_param.shape) == 4:
                  tf_param = np.transpose(tf_param, (3, 2, 0, 1)).copy()  
            # dense weights are in order KN for TF and NK for PyTorch
            elif "dense" in tf_name and len(tf_param.shape) == 2:
                  tf_param = np.transpose(tf_param).copy()
            n_tf+=1
            n_pt+=1
                  
            if not tf_param.shape == pt_param.detach().numpy().shape:
                  print("Shape error", "TF", tf_name, tf_param.shape, "\tPT", name, param.detach().numpy().shape)

torch.Size([512, 512]) torch.Size([512, 128]) torch.Size([512]) torch.Size([512])
torch.Size([512, 512]) torch.Size([512, 128]) torch.Size([512]) torch.Size([512])
Shape error TF dense/kernel:0 (1, 256) 	PT bil_lstm.weight_hh_l0_reverse (512, 128)
Shape error TF dense/bias:0 (1,) 	PT bil_lstm.weight_hh_l0_reverse (512, 128)




In [30]:

def get_pytorch_lstm_weights_from_tensorflow(kernel, bias, INPUT_SIZE, HIDDEN_SIZE):

    w_ih = nn.Parameter(torch.tensor((kernel[:INPUT_SIZE])))
    w_hh = nn.Parameter(torch.tensor((kernel[INPUT_SIZE:])))
    if bias is not None:
        alpha = 0.5
    
        b_ih = nn.Parameter(torch.tensor((alpha * bias[:INPUT_SIZE])))
        b_hh = nn.Parameter(torch.tensor(((1-alpha) * bias[INPUT_SIZE:])))
    else:
        b_ih = nn.Parameter(torch.zeros(4 * HIDDEN_SIZE))
        b_hh = nn.Parameter(torch.zeros(4 * HIDDEN_SIZE))

    return w_ih, w_hh, b_ih, b_hh


# Example
INPUT_SIZE = 512
HIDDEN_SIZE = 128
rnn = nn.LSTMCell(INPUT_SIZE, INPUT_SIZE)
rnn.weight_ih, rnn.weight_hh, rnn.bias_ih, rnn.bias_hh = get_pytorch_lstm_weights_from_tensorflow(weights_dict['bidirectional_rnn/fw/lstm_cell/kernel:0'], weights_dict['bidirectional_rnn/fw/lstm_cell/bias:0'], INPUT_SIZE, HIDDEN_SIZE)
# h, c = rnn(inp)



<class 'numpy.ndarray'>
