In [7]:
import numpy as np
import time

In [9]:
# Lets say you working with 1D data that is always going to be between 0 and 200.

# size of data
N = 10000

# Different data types with random integers between 0 and 200
a_int64 = np.random.randint(0, 201, size=N, dtype=np.int64)
a_int32 = np.random.randint(0, 201, size=N, dtype=np.int32)
a_int16 = np.random.randint(0, 201, size=N, dtype=np.int16)
a_uint8 = np.random.randint(0, 201, size=N, dtype=np.uint8)
# print(a_uint8[:259])

# Check memory usage
print(f"Memory usage for {N} elements:")
print(f"int64 : {a_int64.nbytes} Bytes")
print(f"int32 : {a_int32.nbytes} Bytes")
print(f"int16 : {a_int16.nbytes} Bytes")
print(f"uint8 : {a_uint8.nbytes} Bytes")

Memory usage for 10000 elements:
int64 : 80000 Bytes
int32 : 40000 Bytes
int16 : 20000 Bytes
uint8 : 10000 Bytes


In [None]:
### Conclusion: If you know that numbers are going to be between 0 to 200, then use np.uint8

In [10]:
# Lets perform some numerical computation with np.int16 and np.int64 and compare time

N = 100000000  # 10 million elements

# Two arrays with different dtypes
a_int64 = np.random.randint(0, 201, size=N, dtype=np.int64)
b_int64 = np.random.randint(0, 201, size=N, dtype=np.int64)

a_int16 = np.random.randint(0, 201, size=N, dtype=np.int16)
b_int16 = np.random.randint(0, 201, size=N, dtype=np.int16)

print("Memory usage for 10 million elements:")
print(f"int64 : {a_int64.nbytes / 1e6:.2f} MB")
print(f"int16 : {a_int16.nbytes / 1e6:.2f} MB")

# Heavy computation: simulate "work"
def heavy_compute(x, y):
    # dot product + elementwise math
    return np.sum((x * y) ** 2 + np.sin(x) + np.sqrt(y))


start = time.time()
result = heavy_compute(a_int64, b_int64)
end = time.time()
print(f"Computation with {a_int64.dtype}: {end - start} sec")


start = time.time()
result = heavy_compute(a_int16, b_int16)
end = time.time()
print(f"Computation with {a_int16.dtype}: {end - start} sec")

Memory usage for 10 million elements:
int64 : 800.00 MB
int16 : 200.00 MB
Computation with int64: 5.34650993347168 sec
Computation with int16: 1.6984648704528809 sec


In [11]:
# how fast
5.3/1.69

3.136094674556213

In [None]:
# Conclusion: Choosing right data type speeds up the computation

In [12]:
# Now lets compare Compound Interest with Float32 vs Float64

# Parameters
principal = 1000.0  # Initial amount
rate      = 0.05    # 5% annual interest
years     = 1000    # Simulate 1000 years for effect

# Float16 simulation
amount16 = np.float16(principal)
for _ in range(years):
    amount16 *= (1 + np.float32(rate))

# Float64 simulation
amount64 = np.float64(principal)
for _ in range(years):
    amount64 *= (1 + np.float64(rate))

# Exact theoretical value
exact = principal * (1 + rate) ** years

print(f"Compound Interest after {years} years:")
print(f"Float16 result : {amount16:.2f}")
print(f"Float64 result : {amount64:.2f}")
print(f"Exact result   : {exact:.2f}")

print("\nError:")
print(f"Float16 error  : {abs(amount16 - exact):.2f}")
print(f"Float64 error  : {abs(amount64 - exact):.2f}")

Compound Interest after 1000 years:
Float16 result : 1546320017967915164762112.00
Float64 result : 1546318920731995137900544.00
Exact result   : 1546318920731992453545984.00

Error:
Float16 error  : 1097235922711216128.00
Float64 error  : 2684354560.00
