## Load input & outputs at each layer

In [1]:
import arrayfire as af

# in order
keys = [
    "model_input",
    "conv_output", "misc_padMask",
] 
for i in range(36):
    keys.append(f"trf_input_{i}")
    keys.append(f"trf_output_{i}")

keys = keys + [
    "ctc_head_input",
    "ctc_head_output"
]
    
layer_lookup = {}
for key in keys:
    arr = af.array.read_array("OUTPUT.arr", key=key).to_ndarray()
    layer_lookup[key] = arr


## Load Each Step of input -> features


In [5]:
# not all these are possible (due to branching process in the code)
ft_keys_possib = ["input_raw", "input_raw_2", "input_raw_3", 
    "input_transpose2d", 
    "output_POW_SPECTRUM", "output_MFSC", "output_MFCC", "output_RAW",
    "output_T_then_transpose", 
    "output_localNormalize", "output_normalize",
    "output_finalOutput"
]  
ft_lookup = {}
for ftkey in ft_keys_possib:
    try:
        arr = af.array.read_array("FEATURES.arr", key=ftkey).to_ndarray()
        ft_lookup[ftkey] = arr
    except:
        pass

In [6]:
ft_lookup.keys()

dict_keys(['input_raw', 'input_raw_2', 'input_raw_3', 'output_MFSC', 'output_T_then_transpose', 'output_normalize', 'output_finalOutput'])

In [84]:
import torch
import numpy as np
from transformers import MCTCForCTC, MCTCProcessor, MCTCConfig
from datasets import load_dataset, load_metric, Audio, Dataset

config = MCTCConfig()
model = MCTCForCTC(config)
model = model.load_state_dict(torch.load("./ported_pytorch_model.bin"))
# processor = MCTCProcessor.from_pretrained("cwkeam/mctc-large")

In [None]:
feature_inputs = torch.Tensor(ft_lookup["output_finalOutput"])
model_inputs = torch.Tensor(layer_lookup["model_input"])
conv_output = layer_lookup["conv_output"]

In [None]:
print(model_inputs.sum(), feature_inputs.sum())

In [79]:
model.eval()

first_layer_norm = model.mctc.encoder.layer_norm.eval()
first_layer_conv = model.mctc.encoder.conv.eval()
print("first_layer_norm", first_layer_norm)
print("first_layer_conv", first_layer_conv)

first_layer_norm LayerNorm((80,), eps=1e-05, elementwise_affine=True)
first_layer_conv Conv1dSubsampler(
  (dropout): Dropout(p=0.3, inplace=False)
  (conv_layers): ModuleList(
    (0): Conv1d(80, 3072, kernel_size=(7,), stride=(3,), padding=valid)
  )
)


In [82]:
feature_inputs_batch = feature_inputs.unsqueeze(0)
print("feature_inputs_batch", feature_inputs_batch.shape)
feature_inputs_batch = torch.nn.functional.pad(feature_inputs_batch, (0,0,3,3), "constant", 0)
print("feature_inputs_batch", feature_inputs_batch.shape)
my_conv_output = first_layer_norm(feature_inputs_batch)
print("my_conv_output", my_conv_output.shape)
my_conv_output = first_layer_conv(my_conv_output)
my_conv_output = my_conv_output[0].transpose(0,1)
print("my_conv_output", my_conv_output.shape, my_conv_output.sum())
print("ft_conv_output", conv_output.shape, conv_output.sum())

feature_inputs_batch torch.Size([1, 961, 80])
feature_inputs_batch torch.Size([1, 967, 80])
my_conv_output torch.Size([1, 967, 80])
my_conv_output torch.Size([1536, 321]) tensor(-386.5894, grad_fn=<SumBackward0>)
ft_conv_output (1536, 321) 330.57333


In [83]:
for x, y in list(zip(my_conv_output.flatten(), conv_output.flatten()))[1000:1010]:
    print(x,y)

tensor(0.2657, grad_fn=<UnbindBackward0>) 0.07035182
tensor(0.1323, grad_fn=<UnbindBackward0>) 0.008384018
tensor(-0.2081, grad_fn=<UnbindBackward0>) -0.11530824
tensor(0.0091, grad_fn=<UnbindBackward0>) -0.033328082
tensor(0.0477, grad_fn=<UnbindBackward0>) -0.012838049
tensor(-0.1239, grad_fn=<UnbindBackward0>) -0.093559034
tensor(0.2770, grad_fn=<UnbindBackward0>) 0.10090502
tensor(0.1052, grad_fn=<UnbindBackward0>) 0.0055681327
tensor(0.1420, grad_fn=<UnbindBackward0>) -0.07093749
tensor(-0.1696, grad_fn=<UnbindBackward0>) -0.04261394


tensor(564.0670, grad_fn=<SumBackward0>)
330.57333


In [37]:
print(layer_lookup.keys())
ctc_head_output = layer_lookup["ctc_head_output"]

dict_keys(['model_input', 'conv_output', 'misc_padMask', 'trf_input_0', 'trf_output_0', 'trf_input_1', 'trf_output_1', 'trf_input_2', 'trf_output_2', 'trf_input_3', 'trf_output_3', 'trf_input_4', 'trf_output_4', 'trf_input_5', 'trf_output_5', 'trf_input_6', 'trf_output_6', 'trf_input_7', 'trf_output_7', 'trf_input_8', 'trf_output_8', 'trf_input_9', 'trf_output_9', 'trf_input_10', 'trf_output_10', 'trf_input_11', 'trf_output_11', 'trf_input_12', 'trf_output_12', 'trf_input_13', 'trf_output_13', 'trf_input_14', 'trf_output_14', 'trf_input_15', 'trf_output_15', 'trf_input_16', 'trf_output_16', 'trf_input_17', 'trf_output_17', 'trf_input_18', 'trf_output_18', 'trf_input_19', 'trf_output_19', 'trf_input_20', 'trf_output_20', 'trf_input_21', 'trf_output_21', 'trf_input_22', 'trf_output_22', 'trf_input_23', 'trf_output_23', 'trf_input_24', 'trf_output_24', 'trf_input_25', 'trf_output_25', 'trf_input_26', 'trf_output_26', 'trf_input_27', 'trf_output_27', 'trf_input_28', 'trf_output_28', 'trf_i

In [45]:
print("feature_inputs", feature_inputs.shape)
my_ctc_out = model(feature_inputs.unsqueeze(0))

feature_inputs torch.Size([961, 80])


In [41]:
my_ctc_out.logits.shape

torch.Size([1, 319, 8065])

In [42]:
ctc_head_output.shape

(8065, 321)

In [44]:
print(ctc_head_output.sum())
print(my_ctc_out.logits.sum())


8723291.0
tensor(24259920., grad_fn=<SumBackward0>)


In [58]:
trf_input_0 = torch.Tensor(layer_lookup["trf_input_0"]).unsqueeze(0)
trf_output_0 = torch.Tensor(layer_lookup["trf_output_0"])

print("trf_input_0", trf_input_0.shape)
print("trf_output_0", trf_output_0.shape)
# print(model.mctc.encoder.layers[0])
my_output = model.mctc.encoder.layers[0](trf_input_0)

trf_input_0 torch.Size([1, 1536, 321])
trf_output_0 torch.Size([1536, 321])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1536x321 and 1536x1536)