In [1]:
from utils.utils import *
from scripts.config import DefaultArgsNamespace
import torch
import torch.nn as nn
import torchvision.models as models
from datautils.ems import *
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
import wandb

import warnings
warnings.filterwarnings("ignore", message="Accurate seek is not implemented for pyav backend")




device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

args = DefaultArgsNamespace()

train_loader, val_loader, test_loader, train_class_stats, val_class_stats = eee_get_dataloaders(args)

# # get input feature  size
# dataiter = next(iter(train_loader))
# batch =  dataiter
args.dataloader_params["train_class_stats"] = train_class_stats
args.dataloader_params["val_class_stats"] = val_class_stats


# # Access the parsed arguments
model, optimizer, criterion, device = init_model(args)# verbose_mode = args.verbose
model = model.to(device)
scheduler = StepLR(optimizer, step_size=args.learning_params["lr_drop"], gamma=0.1)  # adjust parameters as needed

dummy_input = torch.randn(1, 120, 1024) # batch, num_frames, channels, height, width

dummy_input = dummy_input.to(args.device)

dummy_output = model(dummy_input)   

print(dummy_output.shape)
# repeat the output to match sequence length
dummy_output = dummy_output.repeat(1, 120, 1)

print(dummy_output.shape)




Loading dataloader for Segmentation task
Splitting data to windows
Class stats: {'check_responsiveness': 28, 'check_pulse': 30, 'chest_compressions': 449, 'no_action': 165, 'turn_on_aed': 47, 'attach_defib_pads': 121, 'clear_for_analysis': 113, 'clear_for_shock': 50, 'administer_shock_aed': 42, 'open_airway': 1, 'ventilate_patient': 154, 'approach_patient': 3, 'request_aed': 5, 'place_bvm': 85, 'check_breathing': 26, 'request_assistance': 6}
Total windowed clips: 1364
Splitting data to windows
Class stats: {'check_responsiveness': 11, 'check_breathing': 11, 'chest_compressions': 146, 'request_aed': 2, 'no_action': 40, 'turn_on_aed': 10, 'attach_defib_pads': 38, 'clear_for_analysis': 33, 'clear_for_shock': 11, 'place_bvm': 24, 'ventilate_patient': 39, 'check_pulse': 9, 'administer_shock_aed': 11, 'open_airway': 1}
Total windowed clips: 397
Splitting data to windows
Class stats: {'approach_patient': 3, 'check_responsiveness': 14, 'check_pulse': 19, 'chest_compressions': 219, 'request_ass



Class counts:  tensor([ 28.,  30., 449., 165.,  47., 121., 113.,  50.,  42.,   1., 154.,   3.,
          5.,  85.,  26.,   6.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
          1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.]) 35
torch.Size([1, 16])
torch.Size([1, 120, 16])


In [2]:
keysteps = args.dataloader_params['keysteps']
out_classes = len(keysteps)
print(out_classes)

35


In [3]:
modality = args.dataloader_params['modality']
print("Modality: ", modality)

# Find feature dimension
input, feature_size, label = preprocess(next(iter(train_loader)), args.dataloader_params['modality'], model, device=device)
print("Feature size: ", feature_size)

# print(input[:,0,-3:])

args.transformer_params['input_dim'] = feature_size
args.transformer_params['output_dim'] = out_classes

model, optimizer, criterion, device = init_model(args)# verbose_mode = args.verbose
model = model.to(device)
scheduler = StepLR(optimizer, step_size=args.learning_params["lr_drop"], gamma=0.1)  # adjust parameters as needed

wandb_logger = wandb.init(
    # set the wandb project where this run will be logged
    project="EgoExoEMS",
    group="Keystep Recognition",
    mode="disabled",
    name="Testing on EgoExoEMS with I3D RGB Features - ICRA Model",
    notes="initial attempt ICRA model with I3D RGB features",
    config={
    "args": args,
    }
)



Modality:  ['smartwatch']
getting clip 740 with frame 1800 to 1920 of length (120)
sw_acc: ['-5.310339450836182,-7.879317760467529,2.7437551021575928', '-5.382165431976318,-7.8529815673828125,2.801215887069702', '-5.305551052093506,-7.927201747894287,2.765302896499634', '-5.30076265335083,-7.864952564239502,2.7317841053009038', '-5.319916248321533,-7.872135162353516,2.6671407222747803', '-5.331887245178223,-7.917624950408935,2.6886885166168213', '-5.281609058380127,-7.790732383728027,2.7126305103302', '-5.25527286529541,-7.879317760467529,2.7246015071868896', '-5.295974254608154,-7.869740962982178,2.736572504043579', '-5.367800235748291,-7.91283655166626,2.695871114730835', '-5.34625244140625,-7.85537576675415,2.6862943172454834', '-5.310339450836182,-7.819462776184082,2.786850690841675', '-5.2672438621521,-7.886500358581543,2.722207307815552', '-5.317522048950195,-7.888894557952881,2.655169725418091', '-5.257667064666748,-7.800309181213379,2.6599581241607666', '-5.279214859008789,-7.9

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Class counts:  tensor([ 28.,  30., 449., 165.,  47., 121., 113.,  50.,  42.,   1., 154.,   3.,
          5.,  85.,  26.,   6.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
          1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.]) 35


In [4]:
# # # print one batch

for i, batch in enumerate(train_loader):
    print(batch['smartwatch'].shape)
    input,feature_size, label = preprocess(batch, args.dataloader_params['modality'], model, device=device, task='segmentation')
    print("Model input:",input.shape)
    output = model(input)
    
    # print("Model output:",output.shape)
    # print("Label:",label.shape)
    # Get the majority label along the sequence length dimension
    
    # print("Majority label:", majority_label.shape)
    
    print("Model prediction:",output.argmax(dim=1))
    print("Ground truth:",label.item())   
    # Adjust output shape if needed to match the label shape before computing loss
    # Assuming you want to compute the loss between majority_label and pred, no need to repeat output now
    loss = criterion(output, label)  # Use majority label for loss computation
    
    print("Loss:",loss.item())
    # break

# results_dir = './results'
# # # # Train the model
# for epoch in range(args.learning_params["epochs"]):
#     train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, wandb_logger, modality=modality)
#     val_loss = validate(model, val_loader, criterion, device, wandb_logger, modality=modality)
#     scheduler.step()
#     print(f"Epoch: {epoch}, Train Loss: {train_loss}, Val Loss: {val_loss}")
#     results = test_model(model, test_loader, criterion, device, wandb_logger, modality=modality, epoch=epoch, results_dir=results_dir)
#     print(results)
#     break

# dummy_input = torch.randn(1, 3, 1024) # batch, num_frames, channels, height, width
# dummy_input = dummy_input.to(args.device)
# pred = model(dummy_input)
# print(pred.shape)


getting clip 1255 with frame 3120 to 3240 of length (120)
sw_acc: ['8.128314971923828,-5.4013190269470215,1.965639591217041', '8.154650688171387,-5.432443618774414,2.2242133617401123', '8.327033042907715,-5.25527286529541,2.542642116546631', '8.243236541748047,-5.073313236236572,2.451662540435791', '8.293514251708984,-5.238513469696045,2.492363929748535', '9.287108421325684,-5.157110691070557,3.2369606494903564', '7.295132637023926,-6.095637321472168,2.255337953567505', '9.191340446472168,-5.853823184967041,3.5051112174987797', '10.278307914733888,-5.044582843780518,4.108449935913086', '10.90798282623291,-4.185064792633057,0.9169792532920836', '10.711658477783203,-3.2537200450897217,0.3064578175544739', '9.708487510681152,0.4070142805576324,2.360682964324951', '8.681375503540039,-2.7772738933563232,3.914519786834717', '6.919242858886719,0.0119710080325603,3.4620156288146973', '6.739677906036377,-2.217030763626098,4.927267074584961', '4.261679172515869,-3.969586372375488,6.5361704826354

In [5]:
import torch
import torch.nn as nn

class CNN_Encoder(nn.Module):
    def __init__(self, feature_dim, out_channels, kernel_size=3):
        super(CNN_Encoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=feature_dim, out_channels=512, kernel_size=kernel_size, stride=1, padding=kernel_size//2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),  # No change in temporal dimension
            nn.Conv1d(in_channels=512, out_channels=256, kernel_size=kernel_size, stride=1, padding=kernel_size//2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),  # No change in temporal dimension
            nn.Conv1d(in_channels=256, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=kernel_size//2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2)  # No change in temporal dimension
        )

    def forward(self, x):
        # x: [batch, seq_len, feature_dim] -> need to permute for Conv1d
        x = x.permute(0, 2, 1)  # [batch, feature_dim, seq_len]

        x = self.encoder(x)
        # x = torch.mean(x, dim=-1)  # Global average pooling
        return x

feature_size = 1024
hidden_dim = 256
cnn_encoder = CNN_Encoder(feature_size, hidden_dim)

# Test the model
dummy_input = torch.randn(1, 30, 1024)  # batch, seq_len, feature_dim

pred = cnn_encoder(dummy_input)

print(pred.shape)  # [batch, 1]




torch.Size([1, 256, 3])
