In [4]:
import torch

In [5]:
def rich_info(t, name="Tensor"):
    # 1. Identity & Pointers
    obj_addr = hex(id(t))                # Python metadata object address
    data_ptr = hex(t.data_ptr())          # Address of the first element
    # Storage is the raw memory block. Multiple tensors can point to one storage.
    storage = t.untyped_storage()
    storage_ptr = hex(storage.data_ptr())
    
    # 2. Memory Layout & Navigation
    # Layout refers to 'strided' vs 'sparse'. 
    # Memory Format refers to 'contiguous' vs 'channels_last' (NHWC).
    layout = t.layout
    memory_format = "channels_last" if t.is_contiguous(memory_format=torch.channels_last) else "contiguous"
    
    # 3. Size calculations
    element_size = t.element_size()      # Bytes per element (e.g., 4 for float32)
    num_elements = t.nelement()
    actual_bytes = num_elements * element_size
    storage_bytes = storage.nbytes()     # Total size of the underlying memory block
    
    print(f"[{name.upper()}]")
    print(f"  Tensor            : {t}")
    print(f"  ID (Obj Addr)   : {obj_addr}")
    print(f"  Data Pointer    : {data_ptr}")
    print(f"  Storage Pointer : {storage_ptr}")
    print(f"  Storage Size    : {storage_bytes / 1024**2:.4f} MB")
    print(f"  Tensor Size     : {actual_bytes / 1024**2:.4f} MB")
    print(f"  ---")
    print(f"  Shape           : {list(t.shape)}")
    print(f"  Stride          : {t.stride()}")
    print(f"  Offset          : {t.storage_offset()} (elements)")
    print(f"  Layout          : {layout}")
    print(f"  Memory Format   : {memory_format}")
    print(f"  Dtype / Device  : {t.dtype} / {t.device}")
    print(f"  Contiguous      : {t.is_contiguous()}")
    print(f"  Grad Required   : {t.requires_grad}")
    print(f"  Grad Function   : {t.grad_fn}")
    print(f"  Grad            : {t.grad}")   
    print("="*40)



## Viewing Tensors

In [18]:
#Transpose creates a non contiguous view tensor
t = torch.tensor(([1,2,3],[4,5,6]))
rich_info(t)
t = t.T
rich_info(t)
t = t.contiguous()
rich_info(t)

[TENSOR]
  ID (Obj Addr)   : 0x7046a9403b50
  Data Pointer    : 0x6087d03b6c80
  Storage Pointer : 0x6087d03b6c80
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[TENSOR]
  ID (Obj Addr)   : 0x7046a9403060
  Data Pointer    : 0x6087d03b6c80
  Storage Pointer : 0x6087d03b6c80
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [3, 2]
  Stride          : (1, 3)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : False
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[TENSOR]
  ID (Obj Addr)   : 0x7046a9d11df0
  Data Pointe

In [22]:
#Reshape creates a contiguous tensor first if given non-contiguous tensors.
t = torch.tensor(([1,2,3],[4,5,6]))
rich_info(t)
t = t.T
rich_info(t)
t = t.reshape((2,3))
rich_info(t)
print(t)

[TENSOR]
  ID (Obj Addr)   : 0x7046a93e83b0
  Data Pointer    : 0x6087d04d8480
  Storage Pointer : 0x6087d04d8480
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[TENSOR]
  ID (Obj Addr)   : 0x7046a8e69e40
  Data Pointer    : 0x6087d04d8480
  Storage Pointer : 0x6087d04d8480
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [3, 2]
  Stride          : (1, 3)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : False
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[TENSOR]
  ID (Obj Addr)   : 0x7046a8e6a980
  Data Pointe

In [23]:
#Reshape creates a view if given contiguous tensors.
t = torch.tensor(([1,2,3],[4,5,6]))
rich_info(t)
'''
t = t.T
rich_info(t)
'''
t = t.reshape((3,2))
rich_info(t)
print(t)

[TENSOR]
  ID (Obj Addr)   : 0x7046a93e83b0
  Data Pointer    : 0x6087d04d8480
  Storage Pointer : 0x6087d04d8480
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[TENSOR]
  ID (Obj Addr)   : 0x7046a9d2cdb0
  Data Pointer    : 0x6087d04d8480
  Storage Pointer : 0x6087d04d8480
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [3, 2]
  Stride          : (2, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
tensor([[1, 2],
        [3, 4],
        [5, 6]])


In [13]:
#Slicing a tensor creates a view. Notice how storage pointers are same but the data pointers are different. 
#Also notice the change in offset. It denote the number of elements to jump to reach this satrting point
t = torch.tensor(([1,2,3],[4,5,6]))
rich_info(t)
t = t[1:, 1:3]
rich_info(t)
print(t)

[TENSOR]
  ID (Obj Addr)   : 0x744346381800
  Data Pointer    : 0x557b5c1b6c00
  Storage Pointer : 0x557b5c1b6c00
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[TENSOR]
  ID (Obj Addr)   : 0x7442482eea70
  Data Pointer    : 0x557b5c1b6c20
  Storage Pointer : 0x557b5c1b6c00
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [1, 2]
  Stride          : (3, 1)
  Offset          : 4 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
tensor([[5, 6]])


## Moving Tensors

In [8]:
# Observe the change in address architecture. Its interesting to see the object address still have cpu architecture
t = torch.tensor([[1,2,3],[4,5,6]])
rich_info(t, name="CPU")
t = t.cuda()
rich_info(t, name="CUDA")

[CPU]
  ID (Obj Addr)   : 0x744248313a10
  Data Pointer    : 0x557b5bd1ed00
  Storage Pointer : 0x557b5bd1ed00
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[CUDA]
  ID (Obj Addr)   : 0x744360160900
  Data Pointer    : 0x503e00000
  Storage Pointer : 0x503e00000
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cuda:0
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None


## Arranging tensors

In [15]:
t1 = torch.tensor([[1,2,3],[4,5,6]])
rich_info(t1, name="T1")
t2 = torch.tensor([[7,8,9],[10,11,12]])
rich_info(t2, name="T2")

t = torch.stack([t1,t2], dim=0)
rich_info(t, name="STACK0")
print(t)

t = torch.stack([t1,t2], dim=1)
rich_info(t, name="STACK1")
print(t)

rich_info(t, name="STACK2")
t = torch.stack([t1,t2], dim=2)
print(t)

[T1]
  ID (Obj Addr)   : 0x74424836d1c0
  Data Pointer    : 0x557b5bd20b80
  Storage Pointer : 0x557b5bd20b80
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[T2]
  ID (Obj Addr)   : 0x74424836f560
  Data Pointer    : 0x557b5bd231c0
  Storage Pointer : 0x557b5bd231c0
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[STACK0]
  ID (Obj Addr)   : 0x74424836ec50
  Data Pointer    : 0x