# Need for numpy arrays for numeric computation
- Compare numpy array over python list
- NumPy outperforms regular Python lists in both memory efficiency and computation speed.
- Numpy syntax is **easy**

In [None]:
import numpy as np
import time         # check operation time
import random       # generate random  numbers
from sys import getsizeof # check the size of objects
import math

# Size

In [15]:
# size
n_elements = 100 # 100000, 100

x_list = list(range(0, n_elements)) # or [x for x in range(n_elements)]

print(x_list)
print("size of list in memory :", getsizeof(x_list), "bytes")

x_np = np.array(x_list) 
print("size of numpy in memory:", getsizeof(x_np), "bytes")

size of list in memory : 800056 bytes
size of numpy in memory: 400112 bytes


### almost 50% drop in size. 

# Performance on element-wise addition
Add a number to all elements of list and numpy-arrays

In [16]:
# add 100 to all elements in a list
n_elements = 10000000 # ten million
x_list = list(range(0, n_elements)) # 

start = time.process_time()
x_new = []
for e in x_list:
    x_new.append(e + 100)
end = time.process_time()

print(end - start)
print(x_list[:10])
print(x_new[:10])

0.71875
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[100, 101, 102, 103, 104, 105, 106, 107, 108, 109]


In [17]:
# add 100 to all elements in a numpy arrays: Easy and no for loop
n_elements = 10000000

x_list  = list(range(0, n_elements))
x_numpy = np.array(x_list)

start = time.process_time()
x_new = x_numpy + 100
end   = time.process_time()

print(end - start)

0.109375


In [18]:
# How fast ?
0.71875/0.109375

6.571428571428571

# Performance on element-wise square
Square all elements of list and numpy-arrays

In [19]:
# square all elements of list
n_elements = 10000000 # ten million
x_list = list(range(0, n_elements)) # or [x for x in range(n_elements)]

start = time.process_time()
x_new = []
for e in x_list:
    x_new.append(e ** 2)
end = time.process_time()

print(end - start)

1.5625


In [20]:
# square all elements of numpy array
n_elements = 10000000

x_list  = list(range(0, n_elements))
x_numpy = np.array(x_list)

start = time.process_time()
x_new = x_numpy ** 2
end   = time.process_time()

print(end - start)

0.140625


#### Numpy did not use for loop and its faster

In [21]:
# How fast ?
1.5625/0.140625

11.11111111111111

# Performance on addition of 2 vectors

In [22]:
# Using Python list: Complicated because it uses list comprehension and for loop
n_elements = 10000000
list1 = list(range(n_elements))
list2 = list(range(n_elements))

start = time.time()
result_list = [x + y for x, y in zip(list1, list2)]
end = time.time()

print(end - start)

1.471404790878296


In [23]:
# Using NumPy array: No for loop.
arr1 = np.arange(n_elements)
arr2 = np.arange(n_elements)

start = time.time()
result_array = arr1 + arr2  # vectorized operation
end = time.time()

print(end - start)

0.035004615783691406


#### Numpy did not use for loop and its faster

In [24]:
# How fast ?
1.471/0.0350

42.028571428571425

# Performance on matrix multiplication

In [25]:
# Matrix multiplication with Python Lists: Can take upto 8+ seconds

N = 300 # 300 took 8 secs, 400 took 18 secs
A_list = [[random.randint(1, 10) for _ in range(N)] for _ in range(N)]
B_list = [[random.randint(1, 10) for _ in range(N)] for _ in range(N)]

# Prepare empty result matrix
result_list = [[0 for _ in range(N)] for _ in range(N)]

start = time.time()
for i in range(N):
    for j in range(N):
        for k in range(N):
            result_list[i][j] += A_list[i][k] * B_list[k][j]
end = time.time()

print(end - start)

13.596438646316528


In [26]:
#  Matrix multiplication with NumPy: 

A_np = np.random.randint(1, 11, size=(N, N))
B_np = np.random.randint(1, 11, size=(N, N))

start = time.time()
result_np = A_np @ B_np   # or A_np @ B_np, np.matmul(A_np, B_np)
end = time.time()

print(end - start)

0.10050630569458008


#### Numpy did not use for loop and its faster

In [27]:
# How fast
13.59/.1005

135.22388059701493

## Statistics Operations

In [28]:
# Lets calculate mean, sum, max, standard deviation using Python list

SIZE = 10_000_000  # 10 million elements
py_list = [random.randint(1, 100) for _ in range(SIZE)]

print(f"Data size: {SIZE:,} elements\n")

start = time.time()

py_sum = sum(py_list)
py_mean = py_sum / len(py_list)
py_max = max(py_list)
# Standard deviation (manual)
mean_diff = [(x - py_mean) ** 2 for x in py_list]
py_std = math.sqrt(sum(mean_diff) / len(py_list))

end = time.time()
py_time = end - start

print("Python Results:")
print(f"Mean: {py_mean:.2f}")
print(f"Sum: {py_sum}")
print(f"Max: {py_max}")
print(f"Standard Deviation: {py_std:.2f}")
print(f"Time taken: {py_time:.5f} seconds\n")

Data size: 10,000,000 elements

Python Results:
Mean: 50.52
Sum: 505150774
Max: 100
Standard Deviation: 28.86
Time taken: 4.44461 seconds



In [29]:
# Lets calculate mean, sum, max, standard deviation using NumPy Computation

np_array = np.random.randint(1, 100, SIZE)

start = time.time()

np_mean = np.mean(np_array)
np_sum  = np.sum(np_array)
np_max  = np.max(np_array)
np_std  = np.std(np_array)

end = time.time()
np_time = end - start

print("NumPy Results:")
print(f"Mean: {np_mean:.2f}")
print(f"Sum: {np_sum}")
print(f"Max: {np_max}")
print(f"Standard Deviation: {np_std:.2f}")
print(f"Time taken: {np_time:.5f} seconds\n")

# Comparison Summary
speedup = py_time / np_time if np_time > 0 else float('inf')
print(f"NumPy is approximately {speedup:.2f}x faster than pure Python!")

NumPy Results:
Mean: 50.00
Sum: 499993178
Max: 99
Standard Deviation: 28.58
Time taken: 0.14058 seconds

NumPy is approximately 31.62x faster than pure Python!
