In [1]:
import torch

In [1]:
import os
import sys
import numpy as np
import h5py
root = os.path.dirname(os.path.abspath(os.curdir))
sys.path.append(root)

import torch
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from torch.utils.data import Subset

from predify.utils.training import train_pcoders, eval_pcoders

from networks_2022 import BranchedNetwork
from data.CleanSoundsDataset import CleanSoundsDataset, TrainCleanSoundsDataset, PsychophysicsCleanSoundsDataset

# Specify Network to train
TODO: This should be converted to a script that accepts arguments for which network to train

In [2]:
from pbranchednetwork_a1 import PBranchedNetwork_A1SeparateHP
PNetClass = PBranchedNetwork_A1SeparateHP
pnet_name = 'a1'

In [3]:
from pbranchednetwork_conv1 import PBranchedNetwork_Conv1SeparateHP
PNetClass = PBranchedNetwork_Conv1SeparateHP
pnet_name = 'conv1'

In [4]:
from pbranchednetwork_all import PBranchedNetwork_AllSeparateHP
PNetClass = PBranchedNetwork_AllSeparateHP
pnet_name = 'all'

# Parameters

In [5]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Device: {DEVICE}')
BATCH_SIZE = 50
NUM_WORKERS = 2
PIN_MEMORY = True
NUM_EPOCHS = 50

lr = 1E-4
engram_dir = '/mnt/smb/locker/issa-locker/users/Erica/'
checkpoints_dir = '/mnt/smb/locker/abbott-locker/hcnn/checkpoints/'
tensorboard_dir = '/mnt/smb/locker/abbott-locker/hcnn/tensorboard/'
train_datafile = f'{engram_dir}training_dataset_random_order.hdf5'

Device: cuda


In [6]:
print(train_datafile)

/mnt/smb/locker/issa-locker/users/Erica/training_dataset_random_order.hdf5


In [7]:
!nvidia-smi

Sat Aug  6 16:56:37 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.48.07    Driver Version: 515.48.07    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:1D:00.0 Off |                  N/A |
| 55%   48C    P8    22W / 250W |      3MiB / 11264MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

# Load network and optimizer

In [8]:
net = BranchedNetwork()
net.load_state_dict(torch.load(f'{engram_dir}networks_2022_weights.pt'))



<All keys matched successfully>

In [9]:
pnet = PNetClass(net, build_graph=True)

In [10]:
#pnet.load_state_dict(torch.load(
#    f"{checkpoints_dir}all/all-25-regular.pth",
#    map_location='cpu'
#    ))

In [11]:
pnet.eval()

PBranchedNetwork_AllSeparateHP(
  (backbone): BranchedNetwork(
    (speech_branch): Sequential(
      (conv1): ConvLayer(
        (block): Sequential(
          (0): Conv2d(1, 96, kernel_size=(6, 14), stride=(3, 3), padding=(2, 6))
          (1): ReLU()
        )
      )
      (rnorm1): LRNorm(
        (block): LocalResponseNorm(5, alpha=0.005, beta=0.75, k=1.0)
      )
      (pool1): PoolLayer(
        (block): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
      )
      (conv2): ConvLayer(
        (block): Sequential(
          (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(2, 2), padding=(1, 2))
          (1): ReLU()
        )
      )
      (rnorm2): LRNorm(
        (block): LocalResponseNorm(5, alpha=0.005, beta=0.75, k=1.0)
      )
      (pool2): PoolLayer(
        (block): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
      )
      (conv3): ConvLayer(
        (block): Sequential(
          (0): Conv2d(256, 512, kernel_size=

In [12]:



pnet.to(DEVICE)
optimizer = torch.optim.Adam(
    [{'params':getattr(pnet,f"pcoder{x+1}").pmodule.parameters(), 'lr':lr} for x in range(pnet.number_of_pcoders)],
    weight_decay=5e-4)

# Set up TrainSoundsDataset

In [13]:
import os
import torch
import numpy as np
import h5py
from torch.utils.data import Dataset, DataLoader

In [14]:
 
class CleanSoundsDataset(Dataset):
    """
    Clean sounds dataset from WSJ, but excludes the psychophysics.
    """

    def __init__(self, hdf_file, subset = None, train = True):
        self.hdf_file = hdf_file
        self.train = train
        self.f = h5py.File(hdf_file, 'r')
        self.n_data, __ =  np.shape(self.f['data'])
        
        if subset is not None:
            
            if train: 
                
                self.n_data = int(self.n_data*subset)
            else:
                self.n_data = int(self.n_data * (1-subset))
                self.start_ind = int(self.n_data * subset)

    def __len__(self):
        return self.n_data

    def __getitem__(self, idx):
        
        if not self.train:
            idx = idx + self.start_ind # Add the off set of the start of the test set 
        
        if torch.is_tensor(idx):
            idx = idx.tolist()
        return torch.tensor(np.array(self.f['data'][idx]).reshape((-1, 164, 400))), torch.tensor(self.f['labels'][idx])
    
        

In [15]:
train_dataset = CleanSoundsDataset(train_datafile, .9)
test_dataset = CleanSoundsDataset(train_datafile, .9, train = False)

In [16]:

train_loader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY
    )
eval_loader = DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY
    )

# Set up checkpoints and tensorboards

In [17]:
checkpoint_path = os.path.join(checkpoints_dir, f"{pnet_name}")
if not os.path.exists(checkpoint_path):
    os.makedirs(checkpoint_path)
checkpoint_path = os.path.join(checkpoint_path, pnet_name + '-{epoch}-{type}.pth')

# summarywriter
from torch.utils.tensorboard import SummaryWriter
tensorboard_path = os.path.join(tensorboard_dir, f"{pnet_name}")
if not os.path.exists(tensorboard_path):
    os.makedirs(tensorboard_path)
sumwriter = SummaryWriter(tensorboard_path, filename_suffix=f'')

# Train

In [None]:
loss_function = torch.nn.MSELoss()
for epoch in range(1, NUM_EPOCHS+1):
    train_pcoders(pnet, optimizer, loss_function, epoch, train_loader, DEVICE, sumwriter)
    eval_pcoders(pnet, loss_function, epoch, eval_loader, DEVICE, sumwriter)

    # save checkpoints every 5 epochs
    if epoch % 5 == 0:
        torch.save(pnet.state_dict(), checkpoint_path.format(epoch=epoch, type='regular'))

  "The default behavior for interpolate/upsample with float scale_factor changed "


Training Epoch: 1 [50/49669]	Loss: 0.6606
Training Epoch: 1 [100/49669]	Loss: 0.4528
Training Epoch: 1 [150/49669]	Loss: 0.4229
Training Epoch: 1 [200/49669]	Loss: 0.1998
Training Epoch: 1 [250/49669]	Loss: 0.1939
Training Epoch: 1 [300/49669]	Loss: 0.2723
Training Epoch: 1 [350/49669]	Loss: 0.2518
Training Epoch: 1 [400/49669]	Loss: 0.1381
Training Epoch: 1 [450/49669]	Loss: 0.1299
Training Epoch: 1 [500/49669]	Loss: 0.1874
Training Epoch: 1 [550/49669]	Loss: 0.1854
Training Epoch: 1 [600/49669]	Loss: 0.1295
Training Epoch: 1 [650/49669]	Loss: 0.0914
Training Epoch: 1 [700/49669]	Loss: 0.1107
Training Epoch: 1 [750/49669]	Loss: 0.1349
Training Epoch: 1 [800/49669]	Loss: 0.1148
Training Epoch: 1 [850/49669]	Loss: 0.0888
Training Epoch: 1 [900/49669]	Loss: 0.0717
Training Epoch: 1 [950/49669]	Loss: 0.0886
Training Epoch: 1 [1000/49669]	Loss: 0.1003
Training Epoch: 1 [1050/49669]	Loss: 0.0826
Training Epoch: 1 [1100/49669]	Loss: 0.0616
Training Epoch: 1 [1150/49669]	Loss: 0.0612
Training

Training Epoch: 1 [9400/49669]	Loss: 0.0113
Training Epoch: 1 [9450/49669]	Loss: 0.0114
Training Epoch: 1 [9500/49669]	Loss: 0.0110
Training Epoch: 1 [9550/49669]	Loss: 0.0106
Training Epoch: 1 [9600/49669]	Loss: 0.0111
Training Epoch: 1 [9650/49669]	Loss: 0.0109
Training Epoch: 1 [9700/49669]	Loss: 0.0112
Training Epoch: 1 [9750/49669]	Loss: 0.0109
Training Epoch: 1 [9800/49669]	Loss: 0.0110
Training Epoch: 1 [9850/49669]	Loss: 0.0106
Training Epoch: 1 [9900/49669]	Loss: 0.0104
Training Epoch: 1 [9950/49669]	Loss: 0.0109
Training Epoch: 1 [10000/49669]	Loss: 0.0109
Training Epoch: 1 [10050/49669]	Loss: 0.0114
Training Epoch: 1 [10100/49669]	Loss: 0.0111
Training Epoch: 1 [10150/49669]	Loss: 0.0105
Training Epoch: 1 [10200/49669]	Loss: 0.0106
Training Epoch: 1 [10250/49669]	Loss: 0.0109
Training Epoch: 1 [10300/49669]	Loss: 0.0108
Training Epoch: 1 [10350/49669]	Loss: 0.0103
Training Epoch: 1 [10400/49669]	Loss: 0.0112
Training Epoch: 1 [10450/49669]	Loss: 0.0106
Training Epoch: 1 [105