In [1]:
import time

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from datasets import EchoNetDataset
from models import EchoNetClassifier

In [2]:

memory = torch.rand(10, 32, 512)
tgt = torch.rand(20, 32, 512)
out = transformer_decoder(tgt, memory)


In [3]:
out.shape

torch.Size([20, 32, 512])

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 8

In [3]:
transform = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Grayscale(num_output_channels=1)
])

trainset = EchoNetDataset(
    root_dir='/home/tienyu/data/EchoNet-Dynamic/Videos',
    target_csv='/home/tienyu/data/EchoNet-Dynamic/FileList.csv',
    split='train',
    transform=transform)

valset = EchoNetDataset(
    root_dir='/home/tienyu/data/EchoNet-Dynamic/Videos',
    target_csv='/home/tienyu/data/EchoNet-Dynamic/FileList.csv',
    split='val',
    transform=transform)

trainloader = DataLoader(trainset, batch_size=batch_size)
valloader = DataLoader(valset, batch_size=batch_size)
dataloaders = {'train': trainloader, 'val': valloader}

In [4]:
model = EchoNetClassifier()
model = model.to(device)

In [5]:
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

In [6]:
num_epochs = 10
n_iter = 100

In [7]:
best_val_loss = torch.finfo(torch.float32).max
since = time.time()

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    iterations = 0
    running_loss = 0.0

    for phase in ('train', 'val'):
        if phase == 'train':
            model.train()
        else:
            model.eval()
        for video_tensor, labels, nofs in dataloaders[phase]:
            iterations += 1
            optimizer.zero_grad()

            ef, esv, edv = [l.float().to(device) for l in labels]
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(video_tensor)
                esv_pred, edv_pred = outputs[nofs - 1, range(len(nofs)), :].T

                loss_esv = criterion(esv_pred, esv)
                loss_edv = criterion(edv_pred, edv)
                ef_pred = torch.clip(100 * (edv_pred - esv_pred) / (edv_pred + 1e-5), min=0., max=100.)
                loss_ef = criterion(ef_pred, ef)

                total_loss = loss_esv + loss_edv + loss_ef
                if phase == 'train':
                    total_loss.backward()
                    optimizer.step()

            running_loss += total_loss.item() * video_tensor.size(0)

            if not iterations % n_iter and phase == 'train':
                print(
                    f"Iteration: {iterations:.0f} \t {phase.title()} Loss: {running_loss/(iterations*batch_size):.5f}"
                )
        if phase == 'val':
            val_loss = running_loss/len(dataloaders[phase].dataset)
            print(
                f"Iteration: {iterations:.0f} \t {phase.title()} Loss: {val_loss:.5f}"
            )
            if val_loss < best_val_loss:
                torch.save(model.state_dict(), 'checkpoints/best_checkpoint.pt')
            best_val_loss = val_loss
    print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

Epoch 0/9
----------
Iteration: 2 	 Train Loss: 12218.51660
Iteration: 4 	 Train Loss: 46643.68530
Iteration: 6 	 Train Loss: 35244.73438
Iteration: 8 	 Train Loss: 32137.28113
Iteration: 10 	 Train Loss: 33280.14111
Iteration: 12 	 Train Loss: 28611.50863
Iteration: 14 	 Train Loss: 25236.39701
Iteration: 16 	 Train Loss: 22833.48429
Iteration: 18 	 Train Loss: 20782.60288
Iteration: 20 	 Train Loss: 18980.65771
Iteration: 22 	 Train Loss: 18912.84528
Iteration: 24 	 Train Loss: 17686.24466
Iteration: 26 	 Train Loss: 16590.17180
Iteration: 28 	 Train Loss: 15765.97881
Iteration: 30 	 Train Loss: 14851.45490
Iteration: 32 	 Train Loss: 14714.24643
Iteration: 34 	 Train Loss: 14060.70674
Iteration: 36 	 Train Loss: 13498.31698
Iteration: 38 	 Train Loss: 12923.44711
Iteration: 40 	 Train Loss: 12458.52292
Iteration: 42 	 Train Loss: 12101.52401
Iteration: 44 	 Train Loss: 11693.11684
Iteration: 46 	 Train Loss: 11300.14861
Iteration: 48 	 Train Loss: 10902.97961
Iteration: 50 	 Train L

Iteration: 412 	 Train Loss: 4885.09608
Iteration: 414 	 Train Loss: 4876.68102
Iteration: 416 	 Train Loss: 4870.40767
Iteration: 418 	 Train Loss: 4854.71860
Iteration: 420 	 Train Loss: 4843.16856
Iteration: 422 	 Train Loss: 4830.41221
Iteration: 424 	 Train Loss: 4819.28146
Iteration: 426 	 Train Loss: 4809.22326
Iteration: 428 	 Train Loss: 4797.05126
Iteration: 430 	 Train Loss: 4786.92799
Iteration: 432 	 Train Loss: 4779.35573
Iteration: 434 	 Train Loss: 4770.23658
Iteration: 436 	 Train Loss: 4758.51272
Iteration: 438 	 Train Loss: 4742.13389
Iteration: 440 	 Train Loss: 4768.48353
Iteration: 442 	 Train Loss: 4762.12008
Iteration: 444 	 Train Loss: 4766.53521
Iteration: 446 	 Train Loss: 4755.03962
Iteration: 448 	 Train Loss: 4742.83724
Iteration: 450 	 Train Loss: 4728.68711
Iteration: 452 	 Train Loss: 4734.15988
Iteration: 454 	 Train Loss: 4720.81038
Iteration: 456 	 Train Loss: 4714.45873
Iteration: 458 	 Train Loss: 4699.27715
Iteration: 460 	 Train Loss: 4691.77195


Iteration: 822 	 Train Loss: 4175.83640
Iteration: 824 	 Train Loss: 4184.36739
Iteration: 826 	 Train Loss: 4179.91276
Iteration: 828 	 Train Loss: 4180.57901
Iteration: 830 	 Train Loss: 4175.46537
Iteration: 832 	 Train Loss: 4169.51487
Iteration: 834 	 Train Loss: 4174.07842
Iteration: 836 	 Train Loss: 4170.49321
Iteration: 838 	 Train Loss: 4170.39189
Iteration: 840 	 Train Loss: 4165.79539
Iteration: 842 	 Train Loss: 4159.20643
Iteration: 844 	 Train Loss: 4161.91489
Iteration: 846 	 Train Loss: 4165.43678
Iteration: 848 	 Train Loss: 4159.74951
Iteration: 850 	 Train Loss: 4153.46288
Iteration: 852 	 Train Loss: 4155.94159
Iteration: 854 	 Train Loss: 4150.68469
Iteration: 856 	 Train Loss: 4149.28458
Iteration: 858 	 Train Loss: 4156.91098
Iteration: 860 	 Train Loss: 4153.49954
Iteration: 862 	 Train Loss: 4170.67049
Iteration: 864 	 Train Loss: 4170.97145
Iteration: 866 	 Train Loss: 4165.28885
Iteration: 868 	 Train Loss: 4161.23976
Iteration: 870 	 Train Loss: 4164.44651


NameError: name 'loaders' is not defined

In [9]:

import os

import av
import pandas as pd
import torch
import torchvision
from torch.utils.data import Dataset
import numpy as np


class EchoNetDataset(Dataset):
    """Face Landmarks dataset."""
    def __init__(self,
                 target_csv,
                 root_dir,
                 split="train",
                 transform=None,
                 frame_size=112,
                 sampling_frequency=4,
                 clip_length=16):
        """
        Args:
            target_csv (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        assert split in (
            "train",
            "val",
            "test",
        ), "Please validate the split specification (train, val or test)"
        self.split = split.upper()
        self.sampling_frequency = sampling_frequency
        self.clip_length = clip_length
        self.min_num_frames = self.clip_length * self.sampling_frequency
        self.df = pd.read_csv(target_csv)
        self.df = self.df.loc[(self.df["Split"] == self.split) & (self.df['NumberOfFrames'] > self.min_num_frames)]
        self.max_num_frames = self.df.NumberOfFrames.max()
        self.root_dir = root_dir
        self.transform = transform
        self.frame_size = frame_size
        

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        video_name = os.path.join(self.root_dir, self.df.iloc[idx, 0]) + ".avi"
        container = av.open(video_name)

        frames = []
        for frame in container.decode(video=0):
            f = frame.to_image()
            if self.transform:
                f = self.transform(f)
            frames.append(f)
        video_tensor = torch.stack(frames)
        nof = self.df.NumberOfFrames.iloc[idx]
        
        candidates = nof - self.min_num_frames
        start_frame = np.random.choice(candidates)
        end_frame = start_frame + self.min_num_frames
        sampled_index = np.arange(nof)[start_frame:end_frame:4]
        sampled_video_tensor = video_tensor[sampled_index]

        ef, esv, edv = self.df.iloc[idx, 1:4]

        return sampled_video_tensor, (ef, esv, edv), self.clip_length


In [10]:
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.transforms import InterpolationMode

transform = transforms.Compose([
    transforms.Resize((112, 112), interpolation=InterpolationMode.BICUBIC),
    transforms.ToTensor(),
    transforms.Grayscale(num_output_channels=1)
])

batch_size = 2

trainset = EchoNetDataset(
    root_dir='/home/tienyu/data/EchoNet-Dynamic/Videos',
    target_csv='/home/tienyu/data/EchoNet-Dynamic/FileList.csv',
    split='train',
    transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size)

In [11]:
for video_tensor, labels, nofs in trainloader:
    print(video_tensor.shape)
    break

torch.Size([2, 16, 1, 112, 112])


In [10]:
import pandas as pd
df = pd.read_csv('/home/tienyu/data/EchoNet-Dynamic/FileList.csv')

In [61]:
train_df = df[df['Split'] == 'TRAIN']
validation_df = df[df['Split'] == 'VAL']
test_df = df[(df['Split'] == 'TEST') & (df['NumberOfFrames'] > clip_length * sampling_frequency)]

In [60]:
test_df.shape

(1266, 9)

In [28]:
nof = test_df.NumberOfFrames.sample().item()
nof

277

In [29]:
clip_length = 16
sampling_frequency = 4

In [34]:

candidates

213

array([108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156,
       160, 164, 168])