In [None]:
import torch
#Vscode so OP

In [2]:
!nvidia-smi

Sun Jan 11 17:15:27 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.77.01              Driver Version: 566.36         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 ...    On  |   00000000:01:00.0 Off |                  N/A |
| N/A   49C    P0             12W /   46W |       0MiB /   4096MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:
def rich_info(t, name="Tensor"):
    # 1. Identity & Pointers
    obj_addr = hex(id(t))                # Python metadata object address
    data_ptr = hex(t.data_ptr())          # Address of the first element
    # Storage is the raw memory block. Multiple tensors can point to one storage.
    storage = t.untyped_storage()
    storage_ptr = hex(storage.data_ptr())
    
    # 2. Memory Layout & Navigation
    # Layout refers to 'strided' vs 'sparse'. 
    # Memory Format refers to 'contiguous' vs 'channels_last' (NHWC).
    layout = t.layout
    memory_format = "channels_last" if t.is_contiguous(memory_format=torch.channels_last) else "contiguous"
    
    # 3. Size calculations
    element_size = t.element_size()      # Bytes per element (e.g., 4 for float32)
    num_elements = t.nelement()
    actual_bytes = num_elements * element_size
    storage_bytes = storage.nbytes()     # Total size of the underlying memory block
    
    print(f"[{name.upper()}]")
    print(t)
    print(f"  ID (Obj Addr)   : {obj_addr}")
    print(f"  Data Pointer    : {data_ptr}")
    print(f"  Storage Pointer : {storage_ptr}")
    print(f"  Storage Size    : {storage_bytes / 1024**2:.4f} MB")
    print(f"  Tensor Size     : {actual_bytes / 1024**2:.4f} MB")
    print(f"  ---")
    print(f"  Shape           : {list(t.shape)}")
    print(f"  Stride          : {t.stride()}")
    print(f"  Offset          : {t.storage_offset()} (elements)")
    print(f"  Layout          : {layout}")
    print(f"  Memory Format   : {memory_format}")
    print(f"  Dtype / Device  : {t.dtype} / {t.device}")
    print(f"  Contiguous      : {t.is_contiguous()}")
    print(f"  Grad Required   : {t.requires_grad}")
    print(f"  Grad Function   : {t.grad_fn}")
    print(f"  Grad            : {t.grad}")   
    print("="*40)



## Viewing Tensors

In [9]:
#Transpose creates a non contiguous view tensor
t = torch.tensor(([1,2,3],[4,5,6]))
rich_info(t)
t = t.T
rich_info(t)
t = t.contiguous()
rich_info(t)

[TENSOR]
tensor([[1, 2, 3],
        [4, 5, 6]])
  ID (Obj Addr)   : 0x7d571fb7dae0
  Data Pointer    : 0x59fa732628c0
  Storage Pointer : 0x59fa732628c0
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[TENSOR]
tensor([[1, 4],
        [2, 5],
        [3, 6]])
  ID (Obj Addr)   : 0x7d571fb7e760
  Data Pointer    : 0x59fa732628c0
  Storage Pointer : 0x59fa732628c0
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [3, 2]
  Stride          : (1, 3)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : False
  Grad Required   : False
  Grad Function   :

In [6]:
#Reshape creates a contiguous tensor first if given non-contiguous tensors.
t = torch.tensor(([1,2,3],[4,5,6]))
rich_info(t)
t = t.T
rich_info(t)
t = t.reshape((2,3))
rich_info(t)


[TENSOR]
tensor([[1, 2, 3],
        [4, 5, 6]])
  ID (Obj Addr)   : 0x7d571fb7c870
  Data Pointer    : 0x59fa732628c0
  Storage Pointer : 0x59fa732628c0
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[TENSOR]
tensor([[1, 4],
        [2, 5],
        [3, 6]])
  ID (Obj Addr)   : 0x7d571fb7d090
  Data Pointer    : 0x59fa732628c0
  Storage Pointer : 0x59fa732628c0
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [3, 2]
  Stride          : (1, 3)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : False
  Grad Required   : False
  Grad Function   :

In [15]:
#Reshape creates a view if given contiguous tensors.
t = torch.tensor(([1,2,3],[4,5,6]))
rich_info(t)
'''
t = t.T
rich_info(t)
'''
t = t.reshape((3,2))
rich_info(t)


[TENSOR]
tensor([[1, 2, 3],
        [4, 5, 6]])
  ID (Obj Addr)   : 0x74dbc61e88b0
  Data Pointer    : 0x599bcb817b40
  Storage Pointer : 0x599bcb817b40
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[TENSOR]
tensor([[1, 2],
        [3, 4],
        [5, 6]])
  ID (Obj Addr)   : 0x74dbc62020c0
  Data Pointer    : 0x599bcb817b40
  Storage Pointer : 0x599bcb817b40
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [3, 2]
  Stride          : (2, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : 

In [3]:
#Slicing a tensor creates a view. Notice how storage pointers are same but the data pointers are different. 
#Also notice the change in offset. It denote the number of elements to jump to reach this satrting point
t = torch.tensor(([1,2,3],[4,5,6]))
rich_info(t)
t = t[1:, 1:3]
rich_info(t)

[TENSOR]
tensor([[1, 2, 3],
        [4, 5, 6]])
  ID (Obj Addr)   : 0x7b202495a890
  Data Pointer    : 0x63a132a6f780
  Storage Pointer : 0x63a132a6f780
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[TENSOR]
tensor([[5, 6]])
  ID (Obj Addr)   : 0x7b202490e9d0
  Data Pointer    : 0x63a132a6f7a0
  Storage Pointer : 0x63a132a6f780
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [1, 2]
  Stride          : (3, 1)
  Offset          : 4 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None


## Moving Tensors

In [4]:
# Observe the change in address architecture. Its interesting to see the object address still have cpu architecture
t = torch.tensor([[1,2,3],[4,5,6]])
rich_info(t, name="CPU")
t = t.cuda()
rich_info(t, name="CUDA")

[CPU]
tensor([[1, 2, 3],
        [4, 5, 6]])
  ID (Obj Addr)   : 0x7b202495a070
  Data Pointer    : 0x63a132a70000
  Storage Pointer : 0x63a132a70000
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[CUDA]
tensor([[1, 2, 3],
        [4, 5, 6]], device='cuda:0')
  ID (Obj Addr)   : 0x7b2024dd0bd0
  Data Pointer    : 0x503e00000
  Storage Pointer : 0x503e00000
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cuda:0
  Contiguous      : True
  Grad Required   : False
  Grad Function   : N

## Arranging tensors

In [5]:
t1 = torch.tensor([[1,2,3],[4,5,6]])
rich_info(t1, name="T1")
t2 = torch.tensor([[7,8,9],[10,11,12]])
rich_info(t2, name="T2")

t = torch.stack([t1,t2], dim=0)
rich_info(t, name="STACK0")

t = torch.stack([t1,t2], dim=1)
rich_info(t, name="STACK1")

t = torch.stack([t1,t2], dim=2)
rich_info(t, name="STACK2")

[T1]
tensor([[1, 2, 3],
        [4, 5, 6]])
  ID (Obj Addr)   : 0x7b20249469d0
  Data Pointer    : 0x63a1337dab80
  Storage Pointer : 0x63a1337dab80
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[T2]
tensor([[ 7,  8,  9],
        [10, 11, 12]])
  ID (Obj Addr)   : 0x7b211ec26f20
  Data Pointer    : 0x63a1343b3f00
  Storage Pointer : 0x63a1343b3f00
  Storage Size    : 0.0000 MB
  Tensor Size     : 0.0000 MB
  ---
  Shape           : [2, 3]
  Stride          : (3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad 

In [19]:
t1 = torch.tensor([[[1,2,3],[4,5,6]], [[7,8,9],[10,11,12]]])
rich_info(t1, name="T1")
t2 = torch.tensor([[[13,14,15],[16,17,18]], [[19,20,21],[22,23,24]]])
rich_info(t2, name="T2")

t = torch.cat([t1,t2], dim=0)
rich_info(t, name="CAT0")

t = torch.cat([t1,t2], dim=1)
rich_info(t, name="CAT1")

t = torch.cat([t1,t2], dim=2)
rich_info(t, name="CAT2")

[T1]
tensor([[[ 1,  2,  3],
         [ 4,  5,  6]],

        [[ 7,  8,  9],
         [10, 11, 12]]])
  ID (Obj Addr)   : 0x74dbc68a1b20
  Data Pointer    : 0x599bcca28580
  Storage Pointer : 0x599bcca28580
  Storage Size    : 0.0001 MB
  Tensor Size     : 0.0001 MB
  ---
  Shape           : [2, 2, 3]
  Stride          : (6, 3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
  Dtype / Device  : torch.int64 / cpu
  Contiguous      : True
  Grad Required   : False
  Grad Function   : None
  Grad            : None
[T2]
tensor([[[13, 14, 15],
         [16, 17, 18]],

        [[19, 20, 21],
         [22, 23, 24]]])
  ID (Obj Addr)   : 0x74dbc6249d50
  Data Pointer    : 0x599bcca2f0c0
  Storage Pointer : 0x599bcca2f0c0
  Storage Size    : 0.0001 MB
  Tensor Size     : 0.0001 MB
  ---
  Shape           : [2, 2, 3]
  Stride          : (6, 3, 1)
  Offset          : 0 (elements)
  Layout          : torch.strided
  Memory Format   : contiguous
 