In [1]:
import sys
sys.path.append('../')
from modeling.dataloader import VideoDataModule
import torch

# Initialize the data module
data_module = VideoDataModule(data_dir="../preprocessing", batch_size=4)
data_module.setup()

# Test a single record
train_dataset = data_module.train_dataset
single_record = train_dataset[0]

print("Single Record:")
print(f"Sentence Embedding Shape: {single_record['sentence_embeddings'].shape}")
print(f"Segment Indices Shape: {single_record['segment_indicators'].shape}")
print(f"Sample of Sentence Embedding: {single_record['sentence_embeddings'][:5]}")
print(f"Sample of Segment Indices: {single_record['segment_indicators'][:10]}")

# Test a batch
train_dataloader = data_module.train_dataloader()
batch = next(iter(train_dataloader))

print("\nBatch:")
print(f"Batch Size: {len(batch['sentence_embeddings'])}")
print(f"Sentence Embeddings Shape: {batch['sentence_embeddings'].shape}")
print(f"Segment Indices Shape: {batch['segment_indicators'].shape}")

# Print shapes for each item in the batch
for i in range(len(batch['sentence_embeddings'])):
    print(f"\nItem {i} in batch:")
    print(f"  Sentence Embedding Shape: {batch['sentence_embeddings'][i].shape}")
    print(f"  Segment Indices Shape: {batch['segment_indicators'][i].shape}")

# Optionally, visualize the first few elements of the first item in the batch
print("\nSample of first item in batch:")
print(f"Sentence Embedding: {batch['sentence_embeddings'][0][:5]}")
print(f"Segment Indices: {batch['segment_indicators'][0][:10]}")

  from .autonotebook import tqdm as notebook_tqdm


Single Record:
Sentence Embedding Shape: torch.Size([256, 384])
Segment Indices Shape: torch.Size([256])
Sample of Sentence Embedding: tensor([[-0.0305,  0.0120,  0.0273,  ..., -0.0209,  0.0422, -0.0173],
        [-0.0393, -0.0090,  0.0028,  ..., -0.0202,  0.0755, -0.0272],
        [-0.0515,  0.0250, -0.0282,  ..., -0.0357,  0.1296, -0.0534],
        [-0.0348, -0.0384,  0.0201,  ...,  0.0702, -0.0472, -0.0051],
        [-0.0823,  0.0004,  0.0841,  ..., -0.1221,  0.0531,  0.0019]])
Sample of Segment Indices: tensor([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])

Batch:
Batch Size: 4
Sentence Embeddings Shape: torch.Size([4, 256, 384])
Segment Indices Shape: torch.Size([4, 256])

Item 0 in batch:
  Sentence Embedding Shape: torch.Size([256, 384])
  Segment Indices Shape: torch.Size([256])

Item 1 in batch:
  Sentence Embedding Shape: torch.Size([256, 384])
  Segment Indices Shape: torch.Size([256])

Item 2 in batch:
  Sentence Embedding Shape: torch.Size([256, 384])
  Segment Indices Shape: torch.Size(