In [1]:
# INIT
%reset -s -f
# %matplotlib ipympl
# %matplotlib inline 
# notebook
%load_ext autoreload
%autoreload 2

# standard modules    
import os
import numpy as np

# third-party packages
import torch

# local packages
from utils_jgm.machine_compatibility_utils import MachineCompatibilityUtils
import ecog2txt
import ecog2txt.trainers as e2t_trainers
from ecog2txt import makin_lab_data_generators

MCUs = MachineCompatibilityUtils()

2023-12-14 11:53:06.862281: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-14 11:53:06.862314: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-14 11:53:06.863231: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-14 11:53:06.867917: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




In [2]:
# CREATE A NEW MODEL
experiment = 'mocha-1'
token_type = 'word_sequence'
project = 'EFC'
subject_ids = [401]

experiment_manifest_name = os.path.join(
    project,
    '_'.join([experiment, token_type]) + '.yaml'
)
trainer = e2t_trainers.MultiSubjectTrainer(
    experiment_manifest_name=experiment_manifest_name,
    subject_ids=subject_ids,
    SN_kwargs={
        # 'Nepochs': 20,
        # 'training_GPUs': [1],
        'layer_sizes': {
            'decoder_embedding': [150],
            'decoder_projection': [],
            'decoder_rnn': [800],
            'encoder_embedding': [100],
            'encoder_rnn': [400, 400, 400],
            # 'encoder_0_projection': [225],
            'encoder_1_projection': [225],
        },
        'coupling': 'attention',
        'RNN_type': 'GRU',
    },
    DG_kwargs={
        'REFERENCE_BIPOLAR': True,
        'prefix': project,
        'subdir': 'with_audio',
        'num_cepstral_coeffs': 13,
        # 'max_seconds': 3,
        'tf_record_partial_path': os.path.join(
            MCUs.get_path('data'),
            'ecog2txt', token_type, 'tf_records_w2v_sil_rem', # 200 Hz, 429 channels
            # 'ecog2txt', token_type, 'tf_records_BY_raw_filtered', # 200 Hz, 238 channels
            project + repr(subject_ids[-1]) + '_B{0}.tfrecord', # 101 Hz
        ),
        'REFERENCE_BIPOLAR': False,
        'good_electrodes': list(range(512)),
        'grid_size': [512, 1],
        'sampling_rate': 101.7
    },
    ES_kwargs = {
        'data_mapping': {
            'encoder_inputs': 'ecog_sequence',
            'encoder_1_targets': 'phoneme_sequence',
            # 'encoder_1_targets': 'audio_sequence',
            'decoder_targets': 'text_sequence',
        },
    },
)

Setting feature_list for encoder_1_targets to training-intersection/validation-union


2023-12-14 11:53:09.177796: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-14 11:53:09.178657: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-14 11:53:09.178793: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

.
.
....

2023-12-14 11:53:09.512525: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-14 11:53:09.512723: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-14 11:53:09.512853: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

....
All tf_records have been written...
Setting feature_list for decoder_targets to vocab list stored in /home/bayuan/Documents/fall23/ecog2vec/packages/ecog2txt/ecog2txt/auxiliary/vocab.mocha-timit.1806
USING GRU in the RNNs
COUPLING ENCODER TO DECODER WITH ATTENTION




In [3]:
print('Before:')
print(trainer.ecog_subjects[-1].block_ids)

trainer.ecog_subjects[-1].block_ids = {
    'validation': {83},
    'testing': {},
    'training': {4, 41, 57, 61, 66, 69, 73, 77, 87}
}

# ['testing'] = set()
# trainer.ecog_subjects[-1].block_ids['training'] = {41, 57, 61, 66, 69, 73, 77, 83, 87}
# trainer.ecog_subjects[-1].block_ids['validation'] = {4}

print('After:')
print(trainer.ecog_subjects[-1].block_ids)

Before:
{'validation': {83}, 'testing': {87}, 'training': {66, 4, 69, 41, 73, 77, 57, 61}}
After:
{'validation': {83}, 'testing': {}, 'training': {66, 4, 69, 73, 41, 77, 87, 57, 61}}


In [4]:
import torch
from machine_learning.neural_networks.torch_sequence_networks import (
    Sequence2Sequence, SequenceTrainer
)

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
manifest = trainer.experiment_manifest[trainer.ecog_subjects[-1].subnet_id]

torch_trainer = SequenceTrainer(manifest, trainer.ecog_subjects)

In [5]:
torch_trainer.train_and_assess(400, trainer.net, device)


[46mdon't do charlie dirty dishes                                don't do charlie dirty dishes[0m
[46mhelp celebrate your brother success                          help celebrate your brother success[0m
[46mcatastrophic economic cutbacks neglect the poor              catastrophic economic cutbacks neglect the poor[0m
[46mmum strongly dislikes appetizers                             mum strongly dislikes appetizers[0m
[46ma roll of wire lay near the wall                             a roll of wire lay near the wall[0m
[46mwill robin wear a yellow lily                                will robin wear a yellow lily[0m
[46maddition and subtraction are learned skills                  addition and subtraction are learned skills[0m
[46mthose thieves stole thirty jewels                            those thieves stole thirty jewels[0m
[46mswing your arm as high as you can                            swing your arm as high as you can[0m
[46mwhere were you while we were away         

In [8]:
from scipy.stats import ranksums

wer_orig = [0.242, 0.241, 0.025, 0.022, 0.020, 0.017, 0.080, 0.043, 0.072, 0.021, 
            0.060, 0.021, 0.071, 0.020, 0.000, 0.034, 0.054, 0.053, 0.096, 0.107]
wer_w2v_sil_inc = [0.590, 0.701, 0.105, 0.068, 0.000, 0.042, 0.043, 0.041, 0.080, 0.094, 
                   0.027, 0.030, 0.017, 0.060, 0.035, 0.018, 0.020, 0.057, 0.073, 0.075]
wer_w2v_sil_rem = [0.549, 0.594, 0.123, 0.090, 0.058, 0.057, 0.043, 0.023, 0.074, 0.105,
                   0.037, 0.058, 0.093, 0.094, 0.036, 0.062, 0.040, 0.000, 0.066, 0.089]

statistic, p = ranksums(wer_orig, wer_w2v_sil_rem, alternative='two-sided')
p

0.10458826282895559