In [13]:
import numpy as np
import torch

from dataset import create_wall_dataloader
from normalizer import StateNormalizer

In [2]:
def calculate_array_statistics(url, chunk_size=None):
    """
    Calculate running mean and standard deviation for a large memory-mapped array
    using chunked processing to manage memory usage.
    
    Parameters:
    -----------
    url : str
        Path to the .npy file
    chunk_size : int, optional
        Number of temporal samples to process at once. If None, will use 1000 * temporal_dimension
        
    Returns:
    --------
    tuple
        (running_mean, running_std) arrays of shape (channels,)
    """
    # Open memory-mapped array
    npy_array = np.lib.format.open_memmap(url, mode="r")
    total_samples, temporal, channels, height, width = npy_array.shape
    
    # Calculate effective batch size
    merged_batch_size = total_samples * temporal
    
    # Set default chunk size if not provided
    if chunk_size is None:
        chunk_size = 1000 * temporal
    
    # Initialize statistics trackers with explicit dtype
    running_mean = np.zeros(channels, dtype=np.float64)
    running_var = np.zeros(channels, dtype=np.float64)
    total_elements = 0
    
    # Process data in chunks
    for start in range(0, merged_batch_size, chunk_size):
        end = min(start + chunk_size, merged_batch_size)
        
        # Convert indices to sample and temporal ranges
        sample_start = start // temporal
        sample_end = (end - 1) // temporal + 1
        temporal_offset_start = start % temporal
        temporal_offset_end = end % temporal if end % temporal != 0 else temporal
        
        # Load and slice chunk
        chunk = npy_array[sample_start:sample_end]
        if sample_start == sample_end - 1:
            chunk = chunk[:, temporal_offset_start:temporal_offset_end]
        
        # Reshape for efficient computation
        reshaped = chunk.reshape(-1, channels, height, width).transpose(1, 0, 2, 3).reshape(channels, -1)
        
        # Update statistics using Welford's online algorithm with explicit type conversion
        chunk_mean = np.asarray(reshaped.mean(axis=1), dtype=np.float64)
        chunk_var = np.asarray(reshaped.var(axis=1), dtype=np.float64)
        chunk_elements = float(reshaped.shape[1])  # Convert to float for division
        
        
        total_elements += chunk_elements
        delta = chunk_mean - running_mean
        running_mean += delta * (chunk_elements / total_elements)
        running_var += (chunk_var * (chunk_elements / total_elements) + 
                        (delta**2) * (chunk_elements * (total_elements - chunk_elements)) / 
                        total_elements**2)
    
    running_std = np.sqrt(running_var)
    return running_mean, running_std

In [3]:
def print_statistics(url, chunk_size=None):
    """
    Calculate and print channel-wise statistics with proper formatting.
    """
    means, stds = calculate_array_statistics(url, chunk_size)
    
    print("\nChannel Statistics:")
    print("-" * 50)
    print(f"{'Channel':<10} {'Mean':>15} {'Std Dev':>15}")
    print("-" * 50)
    for i, (mean, std) in enumerate(zip(means, stds)):
        print(f"{i:<10} {mean:>15.6f} {std:>15.6f}")
    print("-" * 50)

In [4]:
url = r"C:\Users\Andrew Deur\Documents\NYU\DS-GA 1008 Deep Learning\1008-Final-Proj\data\states.npy"
print_statistics(url, chunk_size=1000)


Channel Statistics:
--------------------------------------------------
Channel               Mean         Std Dev
--------------------------------------------------
0                 0.000237        0.009656
1                 0.009316        0.081523
--------------------------------------------------


In [26]:
print_statistics(url, chunk_size=10000)


Channel Statistics:
--------------------------------------------------
Channel               Mean         Std Dev
--------------------------------------------------
0                 0.000237        0.008225
1                 0.009316        0.069441
--------------------------------------------------


In [None]:
from models import BarlowTwins, ViTBackbone

enc_path = r"C:\Users\Andrew Deur\Documents\NYU\DS-GA 1008 Deep Learning\1008-Final-Proj\models\encoder.pth"
state_dict = torch.load(enc_path)['model_state_dict']

# Define the ViT Backbone
backbone = ViTBackbone(
    image_size=65,
    patch_size=5,
    in_channels=2,
    embed_dim=256,
    num_heads=4,
    mlp_dim=1024,
    num_layers=2,
    dropout=0.1,
)


model = BarlowTwins(backbone=backbone, batch_size=64, repr_dim=256)
model.load_state_dict(state_dict, strict=True)
model.eval()

  state_dict = torch.load(enc_url)['model_state_dict']


BarlowTwins(
  (backbone): ViTBackbone(
    (patch_embedding): PatchEmbedding(
      (conv_proj): Conv2d(2, 256, kernel_size=(5, 5), stride=(5, 5))
    )
    (transformer_blocks): ModuleList(
      (0-1): 2 x TransformerBlock(
        (layer_norm_1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (layer_norm_2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (attention): MultiHeadSelfAttention(
          (qkv): Linear(in_features=256, out_features=768, bias=True)
          (q_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (k_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (projection): Linear(in_features=256, out_features=256, bias=True)
          (projection_dropout): Dropout(p=0.1, inplace=False)
        )
        (ffn): Sequential(
          (0): Linear(in_features=256, out_features=1024, bias=True)
          (1): Dropout(p=0.1, inplace=False)
          (2): GELU(approximate='none')
          (3): Linear(in_f

In [None]:
def load_train_data(device, batch_size):
    path = r'C:/Users/Andrew Deur/Documents/NYU/DS-GA 1008 Deep Learning/1008-Final-Proj/data'

    train_ds = create_wall_dataloader(
        data_path=path,
        device=device,
        probing=False,
        train=True,
        batch_size=batch_size
    )
    return train_ds

def get_device():
    """Check for GPU availability."""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)
    return device

In [7]:
device = get_device()
data = load_train_data(device, batch_size=64)

Using device: cuda


In [20]:
normalizer = StateNormalizer()
model.to(device)

for batch in data:
    
    states = batch.states.to(device)
    states = normalizer.normalize_state(states)

    batch_size, num_frames, channels, height, width = states.shape
    states = states.view(batch_size * num_frames, channels, height, width)
    
    embeddings = model(states)
    print(embeddings)


tensor([[-2.4743,  1.1201, -0.7877,  ..., -0.7649,  4.3110,  2.9583],
        [-1.3010,  1.3352,  0.1237,  ...,  0.0302,  4.8428,  2.5011],
        [ 0.3922,  1.7335,  0.9176,  ...,  1.2595,  4.5633,  1.8186],
        ...,
        [-1.4449,  0.5167,  0.8463,  ...,  0.5638,  2.1960,  0.5111],
        [-2.5982,  0.9308,  0.5206,  ...,  0.3864,  2.4461,  0.1552],
        [-3.3442,  1.2919,  0.4808,  ...,  0.1914,  2.7992, -0.1630]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[-1.1509,  0.1699,  1.5443,  ...,  1.4417,  0.7814,  0.9722],
        [-0.5826,  0.2676,  1.8711,  ...,  2.0028,  0.4186,  0.7595],
        [ 2.5281,  0.4413,  3.4001,  ...,  3.1392,  1.0466,  0.1465],
        ...,
        [-1.8641, -0.2022,  0.4913,  ...,  0.7156,  1.7732,  0.3004],
        [-1.8641, -0.2022,  0.4913,  ...,  0.7156,  1.7732,  0.3004],
        [-1.8641, -0.2022,  0.4913,  ...,  0.7156,  1.7732,  0.3004]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[ 0.3449,  0.1249,  2.19

KeyboardInterrupt: 