In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
import pandas as pd
import skimage.draw
from tqdm import tqdm
import echonet

In [None]:
from google.colab import files
uploaded = files.upload()

Saving echonet.zip to echonet.zip


In [None]:
!unzip echonet.zip

Archive:  echonet.zip
   creating: echonet/
  inflating: echonet/config.py       
   creating: echonet/datasets/
  inflating: echonet/datasets/echo.py  
  inflating: echonet/datasets/__init__.py  
   creating: echonet/datasets/__pycache__/
  inflating: echonet/datasets/__pycache__/echo.cpython-311.pyc  
  inflating: echonet/datasets/__pycache__/__init__.cpython-311.pyc  
   creating: echonet/utils/
  inflating: echonet/utils/segmentation.py  
  inflating: echonet/utils/video.py  
  inflating: echonet/utils/__init__.py  
   creating: echonet/utils/__pycache__/
  inflating: echonet/utils/__pycache__/segmentation.cpython-311.pyc  
  inflating: echonet/utils/__pycache__/video.cpython-311.pyc  
  inflating: echonet/utils/__pycache__/__init__.cpython-311.pyc  
  inflating: echonet/__init__.py     
  inflating: echonet/__main__.py     
   creating: echonet/__pycache__/
  inflating: echonet/__pycache__/config.cpython-311.pyc  
  inflating: echonet/__pycache__/__init__.cpython-311.pyc  
  infla

In [None]:
import echonet


In [None]:
import torch
import torchvision
print(torch.__version__, torch.version.cuda)
print(torchvision.__version__)


2.5.1+cu124 12.4
0.20.1+cu124


In [None]:
import torch
print(torch.cuda.is_available())  # Should return True if a GPU is available
print(torch.cuda.device_count())  # Should return the number of available GPUs
print(torch.cuda.get_device_name(0))


True
1
Tesla T4


In [None]:
# Set the path to your dataset
data_dir = '/content/drive/MyDrive/EchoNet-Dynamic/EchoNet-Dynamic'



In [None]:
import pandas as pd

# Load the CSV files
file_list_path = os.path.join(data_dir, 'FileList.csv')
volume_tracings_path = os.path.join(data_dir, 'VolumeTracings.csv')

file_list = pd.read_csv(file_list_path)
volume_tracings = pd.read_csv(volume_tracings_path)

# Display the first few rows of the CSV files
print(file_list.head())
print(volume_tracings.head())

             FileName         EF         ESV         EDV  FrameHeight  \
0  0X100009310A3BD7FC  78.498406   14.881368   69.210534          112   
1  0X1002E8FBACD08477  59.101988   40.383876   98.742884          112   
2  0X1005D03EED19C65B  62.363798   14.267784   37.909734          112   
3  0X10075961BC11C88E  54.545097   33.143084   72.914210          112   
4  0X10094BA0A028EAC3  24.887742  127.581945  169.855024          112   

   FrameWidth  FPS  NumberOfFrames  Split  
0         112   50             174    VAL  
1         112   50             215  TRAIN  
2         112   50             104  TRAIN  
3         112   55             122  TRAIN  
4         112   52             207    VAL  
                 FileName         X1         Y1         X2         Y2  Frame
0  0X100009310A3BD7FC.avi  51.260417  15.348958  64.932292  69.125000     46
1  0X100009310A3BD7FC.avi  50.037611  17.167841  53.367222  16.321330     46
2  0X100009310A3BD7FC.avi  49.157378  20.407629  57.090549  18.390

In [None]:
class EchoDataset(torch.utils.data.Dataset):
    def __init__(self, data_dir, split='train', transform=None):
        self.data_dir = data_dir
        self.split = split
        self.transform = transform

        # Filter files based on the split
        self.file_list = file_list[file_list['Split'] == split.upper()]
        self.fnames = self.file_list['FileName'].tolist()

        # Load volume tracings
        self.volume_tracings = volume_tracings[volume_tracings['FileName'].isin(self.fnames)]

    def __len__(self):
        return len(self.fnames)

    def __getitem__(self, idx):
     fname = self.fnames[idx]
     video_path = os.path.join(self.data_dir, 'Videos', fname + '.avi')  # Append .avi extension
     video = echonet.utils.loadvideo(video_path).astype(np.float32)

    # Normalize the video
     video = (video - video.mean()) / video.std()

    # Get the frames with tracings
     frames = self.volume_tracings[self.volume_tracings['FileName'] == fname]['Frame'].unique()

    # Generate segmentation masks for the traced frames
     masks = []
     if len(frames) > 0:  # Check if there are any tracings
         for frame in frames:
             tracing = self.volume_tracings[(self.volume_tracings['FileName'] == fname) & (self.volume_tracings['Frame'] == frame)]
             x1, y1, x2, y2 = tracing[['X1', 'Y1', 'X2', 'Y2']].values.T
             x = np.concatenate((x1[1:], np.flip(x2[1:])))
             y = np.concatenate((y1[1:], np.flip(y2[1:])))

            # Create a binary mask
             mask = np.zeros((video.shape[2], video.shape[3]), dtype=np.float32)
             rr, cc = skimage.draw.polygon(np.rint(y).astype(int), np.rint(x).astype(int), mask.shape)
             mask[rr, cc] = 1
             masks.append(mask)

        # Stack masks into a single tensor
         masks = np.stack(masks)
     else:
        # If no tracings are found, return an all-zeros mask
         masks = np.zeros((1, video.shape[2], video.shape[3]), dtype=np.float32)

    # Convert videos and masks to PyTorch tensors
     video = torch.from_numpy(video)  # Shape: (channels, frames, height, width)
     masks = torch.from_numpy(masks)  # Shape: (num_masks, height, width)

    # Apply resizing transform
     if self.transform:
        # Resize the video frames
         video = torch.stack([self.transform(frame) for frame in video.permute(1, 0, 2, 3)])  # Resize each frame
         video = video.permute(1, 0, 2, 3)  # Restore original shape

        # Resize the masks
         masks = torch.stack([self.transform(mask) for mask in masks])  # Resize each mask

     return video, masks

In [None]:
from torchvision import transforms
from torchvision.transforms.functional import to_pil_image

# Define transformations
transform = transforms.Compose([
    transforms.ToPILImage(),  # Convert NumPy array to PIL image
    transforms.Resize((112, 112))  # Resize to a smaller size for faster training
])

# Create datasets
train_dataset = EchoDataset(data_dir, split='train', transform=transform)
val_dataset = EchoDataset(data_dir, split='val', transform=transform)
test_dataset = EchoDataset(data_dir, split='test', transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [None]:
class TransUNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, img_size=112, patch_size=16, embed_dim=768, num_heads=12, num_layers=12):
        super(TransUNet, self).__init__()
        self.patch_size = patch_size
        self.embed_dim = embed_dim
        self.num_patches = (img_size // patch_size) ** 2

        # Patch embedding
        self.patch_embed = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)

        # Transformer encoder
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads),
            num_layers=num_layers
        )

        # U-Net decoder
        self.up1 = nn.ConvTranspose2d(embed_dim, embed_dim // 2, kernel_size=2, stride=2)
        self.up2 = nn.ConvTranspose2d(embed_dim // 2, embed_dim // 4, kernel_size=2, stride=2)
        self.up3 = nn.ConvTranspose2d(embed_dim // 4, embed_dim // 8, kernel_size=2, stride=2)
        self.final = nn.Conv2d(embed_dim // 8, out_channels, kernel_size=1)

    def forward(self, x):
        # Patch embedding
        x = self.patch_embed(x)
        b, c, h, w = x.shape
        x = x.flatten(2).transpose(1, 2)  # Flatten patches

        # Transformer encoder
        x = self.transformer(x)
        x = x.transpose(1, 2).view(b, c, h, w)  # Reshape back to image

        # U-Net decoder
        x = self.up1(x)
        x = self.up2(x)
        x = self.up3(x)
        x = self.final(x)

        return x

# Initialize the model
model = TransUNet().to('cuda')

In [None]:
def train(model, dataloader, criterion, optimizer, device='cuda'):
    model.train()
    running_loss = 0.0
    for videos, masks in tqdm(dataloader):
        videos, masks = videos.to(device), masks.to(device)

        optimizer.zero_grad()
        outputs = model(videos)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(dataloader)

def validate(model, dataloader, criterion, device='cuda'):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for videos, masks in tqdm(dataloader):
            videos, masks = videos.to(device), masks.to(device)
            outputs = model(videos)
            loss = criterion(outputs, masks)
            running_loss += loss.item()

    return running_loss / len(dataloader)

def test(model, dataloader, criterion, device='cuda'):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for videos, masks in tqdm(dataloader):
            videos, masks = videos.to(device), masks.to(device)
            outputs = model(videos)
            loss = criterion(outputs, masks)
            running_loss += loss.item()

    return running_loss / len(dataloader)

In [None]:
# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer)
    val_loss = validate(model, val_loader, criterion)
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

  0%|          | 0/934 [00:00<?, ?it/s]


TypeError: expected Tensor as element 0 in argument 0, but got Image